基本信息
源码名称:网络数据采集并保存到数据库实例
源码大小:1.03M
文件格式:.rar
开发语言:C#
更新时间:2015-09-15
   友情提示:(无需注册或充值,赞助后即可获取资源下载链接)

     嘿,亲!知识可是无价之宝呢,但咱这精心整理的资料也耗费了不少心血呀。小小地破费一下,绝对物超所值哦!如有下载和支付问题,请联系我们QQ(微信同号):813200300

本次赞助数额为: 2 元 
   源码介绍


using System;
using System.Collections.Generic;
using System.Text;
using System.Text.RegularExpressions;
using System.Data;
using System.Data.SqlClient;

namespace WebSpider
{
    class ChainNode : WebSpider.AbsChain
    {
        protected override void Process(string html)
        {
            try
            {
                   string title = string.Empty;
                Regex re = new Regex(@"href=(?<web_url>[\s\S]*?)>|href=""(?<web_url>[\s\S]*?)""|href='(?<web_url>[\s\S]*?)'");
                MatchCollection mc = re.Matches(html);
                foreach (Match m in mc)
                {
                    string url = m.Groups["web_url"].ToString();

                    //去除头部的'与"
                    if ((url.IndexOf("'") == 0) || (url.IndexOf("\"") == 0))
                    {
                        url = url.Remove(0, 1);
                        if (url.IndexOf("'") != -1)
                        {
                            url = url.Remove(url.IndexOf("'"), 1);
                        }
                        if (url.IndexOf("\"") != -1)
                        {
                            url = url.Remove(url.IndexOf("\""), 1);
                        }
                    }
                    if (url.IndexOf(" ") != -1)
                    {
                        url = url.Remove(url.IndexOf(" "));
                    }
                    if (url.IndexOf("http://") != -1)
                    {
                     
                        re = new Regex(@"<title>(?<title>[\s\S]*?)</title>");
                        Match temp = re.Match(html.ToLower());
                        title = temp.Groups["title"].ToString();
                        if (!string.IsNullOrEmpty(title))
                        {
                            AddUrl(url, title);
                            Console.WriteLine(url);
                        }
                       
                        UrlStack.Instance.Push(url);
                    }
                }
                Console.Write(this.Url "    " title);
                Console.WriteLine();
            }
            catch(Exception ex)
            {
                System.Windows.Forms.MessageBox.Show(ex.Message);
            }
        }

        private void AddUrl(string url,string title)
        {
            //using (SqlConnection conn = new SqlConnection())
            //{
            //    conn.ConnectionString = System.Configuration.ConfigurationManager.AppSettings["DB"];
            //    conn.Open();

            //    using (SqlCommand cmd = conn.CreateCommand())
            //    {
            //        cmd.CommandText = "AddWeb";
            //        cmd.CommandType = CommandType.StoredProcedure;
            //        cmd.Parameters.AddWithValue("@url", url);
            //        cmd.Parameters.AddWithValue("@title", title);

            //        cmd.ExecuteNonQuery();
            //    }
            //}
            SqlConnection cn = new SqlConnection("Data Source=192.168.0.187;Initial Catalog=SpiderDB;User ID=netcc;Password=fax99");
            cn.Open();
            SqlCommand cmd = new SqlCommand();
            cmd.Connection = cn;
            cmd.CommandType = CommandType.StoredProcedure;
            cmd.CommandText = "AddWeb";
            cmd.Parameters.AddWithValue("@url", url);
            cmd.Parameters.AddWithValue("@title", title);
            cmd.ExecuteNonQuery();
            cn.Close();
        }
    }

    class MyServer : WebSpider.AbsThreadManager
    {
        protected override AbsChain GetChainHeader()
        {
            return new ChainNode();
        }
    }

    class Program
    {
        static void Main(string[] args)
        {
            MyServer server = new MyServer();
            //server.Start("http://trims.fibre2fashion.com/beads/");
            server.Start("http://www.baidu.com");
            Console.Read();
            server.Stop();
        }      
    }
}