基本信息
源码名称:HTMLAgilityPack + ScrapySharp 采集爱站关键词工具源码
源码大小:1.98M
文件格式:.zip
开发语言:C#
更新时间:2016-01-26
友情提示:(无需注册或充值,赞助后即可获取资源下载链接)
嘿,亲!知识可是无价之宝呢,但咱这精心整理的资料也耗费了不少心血呀。小小地破费一下,绝对物超所值哦!如有下载和支付问题,请联系我们QQ(微信同号):78630559
本次赞助数额为: 2 元×
微信扫码支付:2 元
×
请留下您的邮箱,我们将在2小时内将文件发到您的邮箱
源码介绍
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using ScrapySharp.Extensions;
using ScrapySharp.Network;
using HtmlAgilityPack;
using System.Web;
namespace BaiduTools
{
class Program
{
static void Main(string[] args)
{
string word = "张善友";
string url = string.Format("http://ci.aizhan.com/{0}/", HttpUtility.UrlEncode(word));
var uri = new Uri(url);
var browser1 = new ScrapingBrowser();
var html1 = browser1.DownloadString(uri);
var htmlDocument = new HtmlDocument();
htmlDocument.LoadHtml(html1);
var html = htmlDocument.DocumentNode;
var content = html.CssSelect("div.box_05");
foreach (var htmlNode in content)
{
//Console.WriteLine(htmlNode.InnerHtml);
var query = from table in htmlNode.SelectNodes("//table").Cast<HtmlNode>()
from row in table.SelectNodes("tr").Cast<HtmlNode>()
from cell in row.SelectNodes("th|td").Cast<HtmlNode>()
select new { Table = table.Id, CellText = cell.InnerText, HTML = row.InnerHtml};
List<BaiduKeyWord> baiduKeyWords = new List<BaiduKeyWord>();
var list = query.ToList();
int colCount = 7;
int counts = list.Count / colCount;
for (int i = 1; i < counts; i )
{
int start = i * colCount;
var keywords = list.GetRange(start, colCount);
BaiduKeyWord keyWord = new BaiduKeyWord();
for (int j = 0; j < colCount; j )
{
if (j == 0)
{
keyWord.Id = Convert.ToInt32(keywords[j].CellText);
}
if (j == 1)
{
keyWord.KeyWord = keywords[j].CellText;
}
if (j == 2)
{
keyWord.SearchCount = Convert.ToInt32(keywords[j].CellText);
}
if (j == 3)
{
keyWord.IncludeCount = Convert.ToInt32(keywords[j].CellText);
}
if (j == 4)
{
keyWord.FirstItem = keywords[j].CellText;
}
if (j == 5)
{
keyWord.SecondItem = keywords[j].CellText;
}
}
baiduKeyWords.Add(keyWord);
}
foreach (var cell in baiduKeyWords)
{
Console.WriteLine("{0}: {1}:{2}:{3}", cell.Id, cell.KeyWord, cell.SearchCount,cell.IncludeCount);
}
}
Console.Read();
}
}
}