using System; 
using System.Net; 
using System.Text; 
using System.Text.RegularExpressions; 
class Program 
{ 
// 获取网页的HTML内容,根据网页的charset自动判断Encoding 
static string GetHtml(string url) 
{ 
return GetHtml(url, null); 
} 
// 获取网页的HTML内容,指定Encoding 
static string GetHtml(string url, Encoding encoding) 
{ 
byte[] buf = new WebClient().DownloadData(url); 
if (encoding != null) return encoding.GetString(buf); 
string html = Encoding.UTF8.GetString(buf); 
encoding = GetEncoding(html); 
if (encoding == null || encoding == Encoding.UTF8) return html; 
return encoding.GetString(buf); 
} 
// 根据网页的HTML内容提取网页的Encoding 
static Encoding GetEncoding(string html) 
{ 
string pattern = @"(?i)\bcharset=(?<charset>[-a-zA-Z_0-9]+)"; 
string charset = Regex.Match(html, pattern).Groups["charset"].Value; 
try { return Encoding.GetEncoding(charset); } 
catch (ArgumentException) { return null; } 
} 
// 程序入口 
static void Main() 
{ 
Console.WriteLine(GetHtml(https://www.jb51.net)); 
Console.Read(); 
} 
} 
asp.net 网页编码自动识别代码
内容版权声明:除非注明,否则皆为本站原创文章。
