基本信息
源码名称:HTMLAgilityPack + ScrapySharp 采集爱站关键词工具源码
源码大小:1.98M
文件格式:.zip
开发语言:C#
更新时间:2016-01-26
友情提示:(无需注册或充值,赞助后即可获取资源下载链接)
嘿,亲!知识可是无价之宝呢,但咱这精心整理的资料也耗费了不少心血呀。小小地破费一下,绝对物超所值哦!如有下载和支付问题,请联系我们QQ(微信同号):813200300
本次赞助数额为: 2 元×
微信扫码支付:2 元
×
请留下您的邮箱,我们将在2小时内将文件发到您的邮箱
源码介绍
using System; using System.Collections.Generic; using System.Linq; using System.Text; using ScrapySharp.Extensions; using ScrapySharp.Network; using HtmlAgilityPack; using System.Web; namespace BaiduTools { class Program { static void Main(string[] args) { string word = "张善友"; string url = string.Format("http://ci.aizhan.com/{0}/", HttpUtility.UrlEncode(word)); var uri = new Uri(url); var browser1 = new ScrapingBrowser(); var html1 = browser1.DownloadString(uri); var htmlDocument = new HtmlDocument(); htmlDocument.LoadHtml(html1); var html = htmlDocument.DocumentNode; var content = html.CssSelect("div.box_05"); foreach (var htmlNode in content) { //Console.WriteLine(htmlNode.InnerHtml); var query = from table in htmlNode.SelectNodes("//table").Cast<HtmlNode>() from row in table.SelectNodes("tr").Cast<HtmlNode>() from cell in row.SelectNodes("th|td").Cast<HtmlNode>() select new { Table = table.Id, CellText = cell.InnerText, HTML = row.InnerHtml}; List<BaiduKeyWord> baiduKeyWords = new List<BaiduKeyWord>(); var list = query.ToList(); int colCount = 7; int counts = list.Count / colCount; for (int i = 1; i < counts; i ) { int start = i * colCount; var keywords = list.GetRange(start, colCount); BaiduKeyWord keyWord = new BaiduKeyWord(); for (int j = 0; j < colCount; j ) { if (j == 0) { keyWord.Id = Convert.ToInt32(keywords[j].CellText); } if (j == 1) { keyWord.KeyWord = keywords[j].CellText; } if (j == 2) { keyWord.SearchCount = Convert.ToInt32(keywords[j].CellText); } if (j == 3) { keyWord.IncludeCount = Convert.ToInt32(keywords[j].CellText); } if (j == 4) { keyWord.FirstItem = keywords[j].CellText; } if (j == 5) { keyWord.SecondItem = keywords[j].CellText; } } baiduKeyWords.Add(keyWord); } foreach (var cell in baiduKeyWords) { Console.WriteLine("{0}: {1}:{2}:{3}", cell.Id, cell.KeyWord, cell.SearchCount,cell.IncludeCount); } } Console.Read(); } } }