基本信息
源码名称:vb 读取网页数据
源码大小:0.02M
文件格式:.zip
开发语言:ASP
更新时间:2019-06-10
   友情提示:(无需注册或充值,赞助后即可获取资源下载链接)

     嘿,亲!知识可是无价之宝呢,但咱这精心整理的资料也耗费了不少心血呀。小小地破费一下,绝对物超所值哦!如有下载和支付问题,请联系我们QQ(微信同号):813200300

本次赞助数额为: 2 元 
   源码介绍

'田草博客:www.tiancao.net
'tiancao1001@126.com
'QQ:327750885
'2008.1.16
'Option Explicit
Private Declare Function GetKeyState Lib "user32" (ByVal nVirtKey As Long) As Integer
Const VK_NUMLOCK = &H90


Private Declare Function SendMessage Lib "user32" _
                          Alias "SendMessageA" (ByVal hwnd As Long, _
                          ByVal wMsg As Long, ByVal wParam As Long, _
                          lParam As Any) As Long

Private Const LB_SETHORIZONTALEXTENT = &H194

Private Sub Combo2_Click()
   Me.Text2.Text = Me.Combo2.Text
End Sub

Private Sub Command1_Click()
    On Error Resume Next
    Dim HTML As String
    HTML = viewSource(Me.Text2.Text, Me.Combo1.Text)
    If HTML = "" Then Exit Sub
    Me.Text1.Text = HTML
    Dim URLS() As String
    Url_In_Html Me.Text2.Text, URLS, Me.Combo1.Text
    Dim i As Integer
    'Me.List1.Clear
    For i = 0 To UBound(URLS)
        Me.List1.AddItem URLS(i)
    Next
    Me.Timer1.Enabled = True
    Me.Label1.Caption = Me.List1.ListCount
End Sub
'URL为网页地址
'URLS为网页代码中的URL组
Function Url_In_Html(URL As String, ByRef URLS() As String, CodeType As String)
    Dim i As Long, j As Integer

    URL = Replace(URL, "\", "/") '将网页地址中可能含有的“\“全部替换成成”/”,这样地址中的分割符合就一致。

    i = inStr_n(URL, "/") '比如给的路径是http://www.tiancao.net
    If i = 2 Then URL = URL & "/"
    
    i = InStrRev(URL, "/")
    Dim URL1 As String
    URL1 = Left(URL, i) '查找地址的绝对地址路径

    Dim HTML As String
    HTML = viewSource(URL, CodeType)
    If HTML = "" Then Exit Function
    HTML = UCase(HTML) '将网页源码全部转换成大写
    Dim N As Integer
    Dim index() As Long
    N = inStr_n(HTML, "HREF", index)
    'MsgBox "总共有" & N & "个href标签"
    Dim T As String
    Dim T1 As String
    Dim Temp As String
    Dim Temp1 As String
    Dim Temp2 As Integer
    Dim Temp3 As Integer
    Dim Temp5 As Integer
    Dim M As Integer
    For i = 0 To N - 1
        Temp = Mid(HTML, index(i)   5, 300)
        '这里取url的长度为300,如果超过则检测不到,这300个字符中可能包含下一个或几个HREF标签,但这不用担心,程序会分析每个标签的。
        '为什么取那么多,是因为很多网页的URL编码可能很长,比如百度推广的广告和陶宝网的网址都很长。
        For j = 2 To Len(Temp)
            T = Mid(Temp, j, 1)
            If T = """" Or T = ">" Or T = "'" Or T = " " Then
                Temp1 = Left(Temp, j - 1)
                Temp1 = Left(Temp, j - 1)
                Temp2 = InStr(Temp1, " ") 'URL中含有 号的(比如<a href="' location.href '">)
                Temp3 = InStr(Temp1, "#") 'URL中含有#号的(比如<a href="#top">)
                Temp5 = InStr(Temp1, "MAILTO") 'URL中含有空格的(比如<a href="mailto:tiancao1001@126.com">)
                '没有能检查所以的情况
                If Temp2 = 0 And Temp3 = 0 And Temp5 = 0 Then
                    ReDim Preserve URLS(M)
                    If Left(Temp1, 1) = """" Or Left(Temp1, 1) = "'" Then Temp1 = Right(Temp1, Len(Temp1) - 1) 'URL前面可能还有个引号或单引号
                    If Temp2 = InStr(Temp1, ":") <> 0 Then
                        '存在冒号,说明是绝对路径(HTTP://),没有用判断HTTP来判断,是因为windows可以用HTTP给文件夹命名,而不可以用冒号
                        If Left(Temp1, 1) = "/" Or Left(Temp1, 1) = "\" Then
                            Temp1 = URL1 & Right(Temp1, Len(Temp1) - 1)
                        Else
                            Temp1 = URL1 & Temp1
                        End If
                    End If
                    URLS(M) = Temp1
                    M = M   1
                    Exit For
                End If
            End If
        Next
    Next

End Function
'返回某一字符串在另一个字符串中出现的次数 index返回出现的位置数组
Public Function inStr_n(str As String, StrIn As String, Optional index As Variant) As Long
    Dim i As Long
    Dim Temp As Long: Temp = 1
    Dim N As Long
    N = 0
    For i = 1 To Len(str)
        Temp = InStr(Temp   1, str, StrIn)
        If Temp = 0 Then
            Exit For
        Else
            If IsMissing(index) = False Then
                ReDim Preserve index(N)
                index(N) = Temp
            End If
            N = N   1
        End If
    Next i
    inStr_n = N
End Function
'查看网页的源码
Function viewSource(URL As String, CodeType As String)
    On Error GoTo E:

    Dim XmlHttp
    Set XmlHttp = CreateObject("Microsoft.XMLHTTP")
    XmlHttp.Open "GET", URL, False
    XmlHttp.setRequestHeader "Content-Type", "text/XML"
    XmlHttp.Send
    Dim HTML
    HTML = Bytes_to_Unicode(XmlHttp.responseBody, CodeType)
    viewSource = HTML

Exit Function

E:
    viewSource = ""
End Function

'只能得到西文的字符串,中文只能显示GB2312编码。
Function bytes2BSTR(vIn)
    Dim strReturn As String
    Dim i As Long
    Dim ThisCharCode As Integer
    Dim NextCharCode As Integer
    Dim ThirdCharCode As Integer
    strReturn = ""
    For i = 1 To LenB(vIn)
        ThisCharCode = AscB(MidB(vIn, i, 1))
        If ThisCharCode < &H80 Then
            strReturn = strReturn & Chr(ThisCharCode)
        Else
            NextCharCode = AscB(MidB(vIn, i   1, 1))
            ThirdCharCode = AscB(MidB(vIn, i   2, 1))
            strReturn = strReturn & UTF8_to_Unicode(ThisCharCode, NextCharCode, ThirdCharCode)
            i = i   2
        End If
    Next
    bytes2BSTR = strReturn
End Function
'字节数值转汉字
Function Bytes_to_Unicode(Bytes, CodeType As String)
    Dim strReturn As String
    Dim i As Long
    Dim ThisCharCode As Integer
    Dim NextCharCode As Integer
    Dim ThirdCharCode As Integer
    strReturn = ""
    For i = 1 To LenB(Bytes)
        ThisCharCode = AscB(MidB(Bytes, i, 1))
        If ThisCharCode < &H80 Then
            strReturn = strReturn & Chr(ThisCharCode)
        Else
            If CodeType = "UTF-8" Or CodeType = "UTF8" Then
                NextCharCode = AscB(MidB(Bytes, i   1, 1))
                ThirdCharCode = AscB(MidB(Bytes, i   2, 1))
                strReturn = strReturn & UTF8_to_Unicode(ThisCharCode, NextCharCode, ThirdCharCode)
                i = i   2
            Else
                NextCharCode = AscB(MidB(Bytes, i   1, 1))
                strReturn = strReturn & Unicode(ThisCharCode, NextCharCode)
                i = i   1
            End If
        End If
    Next
    Bytes_to_Unicode = strReturn
End Function
'二字节汉字转换
Function Unicode(BY1, BY2) As String
    Unicode = Chr(Int(BY1) * 256   Int(BY2))
End Function
'三字节的UTF-8编码转二字节的Unicode编码
Function UTF8_to_Unicode(BY1, BY2, BY3) As String
    Dim BIN_UTF8 As String
    BIN_UTF8 = DEC_to_BIN(Int(BY1)) & DEC_to_BIN(Int(BY2)) & DEC_to_BIN(Int(BY3))
    Dim BIN_Unicode As String
    BIN_Unicode = Mid(BIN_UTF8, 5, 4) & Mid(BIN_UTF8, 11, 6) & Mid(BIN_UTF8, 19, 6)
    Dim DEC_Unicode As Long
    DEC_Unicode = BIN_to_DEC(BIN_Unicode)
    UTF8_to_Unicode = ChrW(DEC_Unicode)
End Function

Private Sub Command2_Click()
    Me.Hide
    Me.Timer2.Enabled = True
End Sub



Private Sub Command3_Click()
    Dim FSO As Object
    Dim FSO_File As Object
    Set FSO = CreateObject("Scripting.FileSystemObject")
    Set FSO_File = FSO.OpenTextFile(App.Path & "/url.txt", ForWriting, True) '读取文件而不创建
    Dim i As Long
    For i = 0 To Me.List1.ListCount - 1
       FSO_File.WriteLine Me.List1.List(i)
    Next
    FSO_File.Close
End Sub

Private Sub Command4_Click()
    Dim FSO  As Object
    Dim FSO_File As Object
    Set FSO = CreateObject("Scripting.FileSystemObject")
    Set FSO_File = FSO.OpenTextFile(App.Path & "/url.txt", ForReading, False) '读取文件而不创建
    Do While Not FSO_File.AtEndOfStream
        Me.List1.AddItem FSO_File.ReadLine
    Loop
    FSO_File.Close
    
    Me.Timer1.Enabled = True

End Sub

Private Sub Form_Load()
    Me.WebBrowser1.Navigate "http://www.tiancao.net/"
    Me.WebBrowser1.Silent = True
    Me.Timer1.Enabled = False
    addHorScrlBarListBox List1
    Me.Combo1.AddItem "UTF-8"
    Me.Combo1.AddItem "GB2312"
    Me.Combo1.AddItem "Unicode"
    Me.Combo1.Text = "GB2312"
    Me.Combo2.Text = "http://www.tiancao.net/"
    Me.Combo2.AddItem "http://www.tiancao.net/"
    Me.Combo2.AddItem "http://tiancao.net"
    Me.Combo2.AddItem "http://ntsjytfgs.w39.cndns.com/"
    Me.Combo2.AddItem "http://tiancao1001.w18.cndns.com/"
End Sub

Private Sub List1_DblClick()
    Me.WebBrowser1.Navigate Me.List1.List(Me.List1.ListIndex)
End Sub

Private Sub List1_MouseDown(Button As Integer, Shift As Integer, X As Single, Y As Single)
    On Error Resume Next
    If Button = 2 Then Me.List1.RemoveItem Me.List1.ListIndex
End Sub

'每一分钟随机打开list中的一个连接
Private Sub Timer1_Timer()
    On Error Resume Next
    Dim j As Integer
    j = Rnd() * Me.List1.ListCount
    Me.WebBrowser1.Navigate Me.List1.List(j)
End Sub

' list加横向滚动条
Public Sub addHorScrlBarListBox(ByVal refControlListBox As Object)

    Dim nRet As Long
    Dim nNewWidth As Integer

    nNewWidth = refControlListBox.Width * 4 ' 新宽度,以像素为单位。
    nRet = SendMessage(refControlListBox.hwnd, _
           LB_SETHORIZONTALEXTENT, nNewWidth, ByVal 0&)
End Sub

Private Sub Timer2_Timer()
    Dim i As Long
    i = GetKeyState(VK_NUMLOCK)
    If i = 0 Then
        Me.Show
        Me.Timer2.Enabled = False
    End If
End Sub