asp.net c# 抓取页面信息方法介绍(2)


using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading;
using HtmlAgilityPack;
using System.Text.RegularExpressions;
using System.Net;
using System.IO;
using System.IO.Compression;

namespace ConsoleApplication2
{
public class Program
{
static void Main(string[] args)
{
//var currentUrl = "http://www.mm5mm.com/";

var currentUrl = "http://www.sohu.com/";

var request = WebRequest.Create(currentUrl) as HttpWebRequest;

var response = request.GetResponse() as HttpWebResponse;

var encode = string.Empty;

if (response.CharacterSet == "ISO-8859-1")
encode = "gb2312";
else
encode = response.CharacterSet;

Stream stream;

if (response.ContentEncoding.ToLower() == "gzip")
{
stream = new GZipStream(response.GetResponseStream(), CompressionMode.Decompress);
}
else
{
stream = response.GetResponseStream();
}

var sr = new StreamReader(stream, Encoding.GetEncoding(encode));

var html = sr.ReadToEnd();

sr.Close();

HtmlDocument document = new HtmlDocument();

document.LoadHtml(html);

//提取title
var title = document.DocumentNode.SelectSingleNode("//title").InnerText;

//提取keywords
var keywords = document.DocumentNode.SelectSingleNode("//meta[@name='Keywords']").Attributes["content"].Value;
}
}
}


asp.net c# 抓取页面信息方法介绍


好了,打完收工,睡觉。。。

您可能感兴趣的文章:

内容版权声明:除非注明,否则皆为本站原创文章。

转载注明出处:https://www.heiqu.com/wjdxpx.html