基本信息
源码名称:c++爬虫工具源码(HttpMonitor)
源码大小:0.12M
文件格式:.zip
开发语言:C/C++
更新时间:2016-01-20
友情提示:(无需注册或充值,赞助后即可获取资源下载链接)
嘿,亲!知识可是无价之宝呢,但咱这精心整理的资料也耗费了不少心血呀。小小地破费一下,绝对物超所值哦!如有下载和支付问题,请联系我们QQ(微信同号):813200300
本次赞助数额为: 1 元×
微信扫码支付:1 元
×
请留下您的邮箱,我们将在2小时内将文件发到您的邮箱
源码介绍
// HttpMon.cpp : Implementation of CHttpMon
#include "stdafx.h"
#include "HttpMon.h"
#include "ProtocolCF.h"
// CHttpMon
typedef PassthroughAPP::CMetaFactory<PassthroughAPP::CComClassFactoryProtocol, CTestAPP> MetaFactory;
LONG GetPtr(void * v);
LONG GetUniqueId(LONG l1, LONG l2, WCHAR* w);
LONG GetContainerId(CComPtr<IWinInetHttpInfo> spWinInetHttpInfo);
LONG GetHash(WCHAR* w);
int IndexOf(wchar_t* str, wchar_t* s);
CComBSTR GetQueryInfo(CComPtr<IWinInetHttpInfo> spWinInetHttpInfo, DWORD dwOption, DWORD flags);
wchar_t* Substring(wchar_t *str, int beginIndex, int endIndex);
STDMETHODIMP CHttpMon::get_IEWindow(LONG* pVal)
{
*pVal = m_iEWindow;
return S_OK;
}
STDMETHODIMP CHttpMon::put_IEWindow(LONG newVal)
{
if(isProtocolsRegistered == FALSE) {
CComPtr<IInternetSession> spSession;
CoInternetGetSession(0, &spSession, 0);
MetaFactory::CreateInstance(CLSID_HttpProtocol, &spCFHTTP);
spSession->RegisterNameSpace(spCFHTTP, CLSID_NULL, L"http", 0, 0, 0);
MetaFactory::CreateInstance(CLSID_HttpSProtocol, &spCFHTTPS);
spSession->RegisterNameSpace(spCFHTTPS, CLSID_NULL, L"https", 0, 0, 0);
isProtocolsRegistered = TRUE;
}
m_iEWindow = newVal;
return S_OK;
}
STDMETHODIMP MonitorSink::OnStart(LPCWSTR szUrl, IInternetProtocolSink *pOIProtSink,
IInternetBindInfo *pOIBindInfo, DWORD grfPI, DWORD dwReserved,
IInternetProtocol* pTargetProtocol)
{
myInstance = NULL;
HRESULT hr = BaseClass::OnStart(szUrl, pOIProtSink, pOIBindInfo, grfPI,
dwReserved, pTargetProtocol);
HRESULT hret = IUnknown_QueryService(pOIProtSink, IID_IHTMLWindow2, IID_IHTMLWindow2, (void**)&iWindow);
if(FAILED(hret) || !iWindow)
{
iWindow = NULL;
}
resultReported = false;
CComPtr<IWindowForBindingUI> objWindowForBindingUI;
//HRESULT hret = QueryServiceFromClient(&objWindowForBindingUI);
hret = IUnknown_QueryService(pOIProtSink, IID_IWindowForBindingUI, IID_IWindowForBindingUI, (void**)&objWindowForBindingUI);
if(SUCCEEDED(hret) && objWindowForBindingUI)
{
HWND hwndIEServer = NULL;
HRESULT hret = objWindowForBindingUI->GetWindow(IID_IWindowForBindingUI, &hwndIEServer);
if( (hwndIEServer) && (::IsWindow(hwndIEServer)) )
{
int isize = monitorInstances.GetSize();
for(int i = 0; i < isize; i )
{
CHttpMon *monitorInstance = reinterpret_cast<CHttpMon*>(monitorInstances[i]);
if( (monitorInstance) && (monitorInstance->m_iEWindow == (long)hwndIEServer) )
{
myInstance = monitorInstance;
break;
}
}
}
}
return hr;
}
STDMETHODIMP MonitorSink::Switch(
PROTOCOLDATA *pProtocolData)
{
/*if( (pProtocolData->grfFlags & PD_FORCE_SWITCH) == 0)
pProtocolData->grfFlags |= PD_FORCE_SWITCH;*/
return m_spInternetProtocolSink ?
m_spInternetProtocolSink->Switch(pProtocolData) :
E_UNEXPECTED;
}
STDMETHODIMP MonitorSink::BeginningTransaction(
LPCWSTR szURL,
LPCWSTR szHeaders,
DWORD dwReserved,
LPWSTR *pszAdditionalHeaders)
{
if (pszAdditionalHeaders)
{
*pszAdditionalHeaders = 0;
}
CComPtr<IHttpNegotiate> spHttpNegotiate;
QueryServiceFromClient(&spHttpNegotiate);
HRESULT hr = spHttpNegotiate ?
spHttpNegotiate->BeginningTransaction(szURL, szHeaders,
dwReserved, pszAdditionalHeaders) :
E_UNEXPECTED;
LONG tempContainerId = 0;
CComPtr<IWinInetHttpInfo> spWinInetHttpInfo;
HRESULT hrTemp = m_spTargetProtocol->QueryInterface(IID_IWinInetHttpInfo,
reinterpret_cast<void**>(&spWinInetHttpInfo));
if(SUCCEEDED(hrTemp))
{
tempContainerId = GetContainerId(spWinInetHttpInfo);
}
if(tempContainerId == 0 && pszAdditionalHeaders && *pszAdditionalHeaders)
{
int index = IndexOf(*pszAdditionalHeaders, _T("Referer"));
if(index != -1)
{
WCHAR *subString = Substring(*pszAdditionalHeaders, index, wcslen(*pszAdditionalHeaders));
index = IndexOf(subString, _T("\r\n"));
WCHAR * subString2 = Substring(subString, 0, index);
WCHAR *subString3 = Substring(subString2, 9, wcslen(subString2));
tempContainerId = GetHash(subString3);
delete []subString3;
delete []subString2;
delete []subString;
}
}
if(!myInstance)
{
int isize = monitorInstances.GetSize();
VARIANT_BOOL itsMine = VARIANT_FALSE;
for(int i = 0; i < isize; i )
{
CHttpMon *monitorInstance = reinterpret_cast<CHttpMon*>(monitorInstances[i]);
monitorInstance->Fire_ConfirmRequest(GetUniqueId(GetPtr(iWindow), GetPtr(m_spInternetBindInfo), (WCHAR*)szURL), tempContainerId, W2BSTR(szURL), isize, &itsMine);
if(itsMine)
{
myInstance = monitorInstance;
break;
}
}
}
if(myInstance)
{
myInstance->Fire_GetIServiceProviderOnStart(GetUniqueId(GetPtr(iWindow), GetPtr(m_spInternetBindInfo), (WCHAR*)szURL), tempContainerId, W2BSTR(szURL), GetPtr(m_spServiceProvider.p));
}
return hr;
}
STDMETHODIMP MonitorSink::OnResponse(
DWORD dwResponseCode,
LPCWSTR szResponseHeaders,
LPCWSTR szRequestHeaders,
LPWSTR *pszAdditionalRequestHeaders)
{
if (pszAdditionalRequestHeaders)
{
*pszAdditionalRequestHeaders = 0;
}
CComPtr<IHttpNegotiate> spHttpNegotiate;
QueryServiceFromClient(&spHttpNegotiate);
HRESULT hr = spHttpNegotiate ?
spHttpNegotiate->OnResponse(dwResponseCode, szResponseHeaders,
szRequestHeaders, pszAdditionalRequestHeaders) :
E_UNEXPECTED;
if(myInstance)
{
CComBSTR strResponseHeaders = L"";
WCHAR* pURL = 0;
ULONG cEl = 1;
if(szResponseHeaders)
{
strResponseHeaders = W2BSTR(szResponseHeaders);
}
if(pszAdditionalRequestHeaders && *pszAdditionalRequestHeaders)
{
strResponseHeaders = _T("\r\n");
strResponseHeaders = W2BSTR(*pszAdditionalRequestHeaders);
}
CComPtr<IWinInetHttpInfo> spWinInetHttpInfo;
HRESULT hrTemp = m_spTargetProtocol->QueryInterface(IID_IWinInetHttpInfo,
reinterpret_cast<void**>(&spWinInetHttpInfo));
m_spInternetBindInfo->GetBindString(BINDSTRING_URL, &pURL, cEl, &cEl);
myInstance->Fire_OnResponse(GetUniqueId(GetPtr(iWindow), GetPtr(m_spInternetBindInfo), pURL), GetContainerId(spWinInetHttpInfo), W2BSTR(pURL), dwResponseCode, strResponseHeaders);
CoTaskMemFree(pURL);
}
return hr;
}
STDMETHODIMP CTestAPP::Read(void *pv, ULONG cb, ULONG *pcbRead)
{
USES_CONVERSION;
HRESULT hr = m_spInternetProtocol->Read(pv, cb, pcbRead);
if(m_internetSink.myInstance)
{
BYTE *data = new BYTE[*pcbRead];
memcpy(data, pv, *pcbRead);
VARIANT v;
v.pbVal = data;
VARIANT_BOOL isComplete = VARIANT_FALSE;
if(hr == S_FALSE)
{
isComplete = VARIANT_TRUE;
}
WCHAR *pURL = 0;
ULONG cEl = 1;
CComPtr<IWinInetHttpInfo> spWinInetHttpInfo;
HRESULT hrTemp =this->m_internetSink.m_spTargetProtocol->QueryInterface(IID_IWinInetHttpInfo,
reinterpret_cast<void**>(&spWinInetHttpInfo));
this->m_internetSink.m_spInternetBindInfo->GetBindString(BINDSTRING_URL, &pURL, cEl, &cEl);
m_internetSink.myInstance->Fire_OnDataRecieved(GetUniqueId(GetPtr(this->m_internetSink.iWindow), GetPtr(this->m_internetSink.m_spInternetBindInfo), pURL), GetContainerId(spWinInetHttpInfo), W2BSTR(pURL), data, *pcbRead, isComplete);
CoTaskMemFree(pURL);
delete data;
}
return hr;
}
STDMETHODIMP MonitorSink::ReportProgress(
ULONG ulStatusCode,
LPCWSTR szStatusText)
{
HRESULT hr = m_spInternetProtocolSink ?
m_spInternetProtocolSink->ReportProgress(ulStatusCode, szStatusText) :
E_UNEXPECTED;
if(myInstance)
{
WCHAR* pURL = 0;
LONG tempContainerId = 0;
if (ulStatusCode == BINDSTATUS_REDIRECTING || ulStatusCode == BINDSTATUS_COOKIE_SENT || ulStatusCode == BINDSTATUS_MIMETYPEAVAILABLE ||
ulStatusCode == BINDSTATUS_COOKIE_STATE_LEASH || ulStatusCode == BINDSTATUS_COOKIE_STATE_ACCEPT || ulStatusCode == BINDSTATUS_CACHEFILENAMEAVAILABLE ||
ulStatusCode == BINDSTATUS_P3P_HEADER || ulStatusCode == BINDSTATUS_SENDINGREQUEST)
{
ULONG cEl = 1;
m_spInternetBindInfo->GetBindString(BINDSTRING_URL, &pURL, cEl, &cEl);
CComPtr<IWinInetHttpInfo> spWinInetHttpInfo;
HRESULT hrTemp = m_spTargetProtocol->QueryInterface(IID_IWinInetHttpInfo,
reinterpret_cast<void**>(&spWinInetHttpInfo));
tempContainerId = GetContainerId(spWinInetHttpInfo);
}
if (ulStatusCode == BINDSTATUS_REDIRECTING)
{
CComBSTR strRequestHeaders = L"";
CComBSTR strResposeHeaders = L"";
CComPtr<IWinInetHttpInfo> spWinInetHttpInfo;
HRESULT hrTemp = m_spTargetProtocol->QueryInterface(IID_IWinInetHttpInfo,
reinterpret_cast<void**>(&spWinInetHttpInfo));
if(SUCCEEDED(hrTemp))
{
strRequestHeaders = GetQueryInfo(spWinInetHttpInfo, HTTP_QUERY_RAW_HEADERS_CRLF | HTTP_QUERY_FLAG_REQUEST_HEADERS, 0);
strResposeHeaders = GetQueryInfo(spWinInetHttpInfo, HTTP_QUERY_RAW_HEADERS_CRLF, 0);
}
myInstance->Fire_OnRedirect(GetUniqueId(GetPtr(iWindow), GetPtr(this->m_spInternetBindInfo), pURL), tempContainerId, GetUniqueId(GetPtr(iWindow), GetPtr(this->m_spInternetBindInfo), (WCHAR*)szStatusText), W2BSTR(pURL), W2BSTR(szStatusText), W2BSTR(strResposeHeaders), W2BSTR(strRequestHeaders));
}
else if(ulStatusCode == BINDSTATUS_COOKIE_SENT)
{
CComPtr<IWinInetHttpInfo> spWinInetHttpInfo;
CComBSTR strCookies = L"";
HRESULT hrTemp = m_spTargetProtocol->QueryInterface(IID_IWinInetHttpInfo,
reinterpret_cast<void**>(&spWinInetHttpInfo));
if(SUCCEEDED(hrTemp))
{
DWORD size = 0;
DWORD flags = 0;
DWORD temp = 0;
do
{
strCookies.AppendBSTR(GetQueryInfo(spWinInetHttpInfo, HTTP_QUERY_COOKIE | HTTP_QUERY_FLAG_REQUEST_HEADERS, temp).m_str);
strCookies.Append(_T("\r\n"));
temp ;
} while(size > 0);
}
myInstance->Fire_OnCookieSent(GetUniqueId(GetPtr(iWindow), GetPtr(this->m_spInternetBindInfo), pURL), tempContainerId, W2BSTR(pURL), W2BSTR(strCookies));
}
else if(ulStatusCode == BINDSTATUS_SENDINGREQUEST)
{
CComPtr<IWinInetHttpInfo> spWinInetHttpInfo;
HRESULT hrTemp = m_spTargetProtocol->QueryInterface(IID_IWinInetHttpInfo, reinterpret_cast<void**>(&spWinInetHttpInfo));
if(SUCCEEDED(hrTemp))
{
CComBSTR strRequestHeaders = L"";
CComBSTR strMethod = L"";
if(SUCCEEDED(hrTemp))
{
strRequestHeaders = GetQueryInfo(spWinInetHttpInfo, HTTP_QUERY_RAW_HEADERS_CRLF | HTTP_QUERY_FLAG_REQUEST_HEADERS, 0);
strMethod = GetQueryInfo(spWinInetHttpInfo, HTTP_QUERY_REQUEST_METHOD, 0);
}
BYTE *data = new BYTE[];
long cb = 0;
DWORD grfBINDF;
BINDINFO bindinfo;
bindinfo.cbSize = sizeof(BINDINFO);
hrTemp = m_spInternetBindInfo->GetBindInfo(&grfBINDF, &bindinfo);
if(hrTemp == S_OK)
{
switch (bindinfo.dwBindVerb)
{
case BINDVERB_POST:
switch(bindinfo.stgmedData.tymed)
{
case TYMED_ISTORAGE:
{
ILockBytes *pLockBytes;
CreateILockBytesOnHGlobal(NULL, true, &pLockBytes);
IStorage *pstgOpen;
StgCreateDocfileOnILockBytes(pLockBytes, STGM_CREATE | STGM_SHARE_EXCLUSIVE | STGM_READWRITE, 0, &pstgOpen);
IID rgiidExclude;
OLECHAR *o = W2OLE(_T(""));
SNB snbExclude = &o;
bindinfo.stgmedData.pstg->CopyTo(0, &rgiidExclude, snbExclude, pstgOpen);
pLockBytes->Flush();
pstgOpen->Commit(0);
STATSTG lockBytesStat;
pLockBytes->Stat(&lockBytesStat, 1);
ULARGE_INTEGER lockBytesSize = lockBytesStat.cbSize;
delete data;
data = new BYTE[lockBytesSize.QuadPart];
DWORD cbRead;
ULARGE_INTEGER offset = {0,0};
pLockBytes->ReadAt(offset, data, lockBytesSize.QuadPart, &cbRead);
cb = cbRead;
pLockBytes->Release();
pstgOpen->Release();
}
break;
case TYMED_HGLOBAL:
void *pData;
UINT cPostData;
cPostData = bindinfo.cbstgmedData;
if (!cPostData)
{
break;
}
pData = GlobalLock(bindinfo.stgmedData.hGlobal);
if (pData)
{
delete data;
data = new BYTE[cPostData];
cb = cPostData;
memcpy(data, pData, cPostData);
GlobalUnlock(bindinfo.stgmedData.hGlobal);
}
break;
case TYMED_ISTREAM:
STATSTG iStreamStat;
bindinfo.stgmedData.pstm->Stat(&iStreamStat, 0);
ULARGE_INTEGER iStreamSize = iStreamStat.cbSize;
delete data;
data = new BYTE[iStreamSize.QuadPart];
DWORD cbRead;
bindinfo.stgmedData.pstm->Read(data, iStreamSize.QuadPart, &cbRead);
cb = cbRead;
break;
}
}
}
myInstance->Fire_OnRequest(GetUniqueId(GetPtr(iWindow), GetPtr(this->m_spInternetBindInfo), pURL), tempContainerId, W2BSTR(pURL), strRequestHeaders, strMethod, data, cb);
delete[] data;
}
}
else if(ulStatusCode == BINDSTATUS_MIMETYPEAVAILABLE)
{
myInstance->Fire_OnMimeTypeAvailable(GetUniqueId(GetPtr(iWindow), GetPtr(this->m_spInternetBindInfo), pURL), tempContainerId, W2BSTR(pURL), W2BSTR(szStatusText));
}
else if(ulStatusCode == BINDSTATUS_COOKIE_STATE_LEASH || ulStatusCode == BINDSTATUS_COOKIE_STATE_ACCEPT)
{
CComPtr<IWinInetHttpInfo> spWinInetHttpInfo;
CComBSTR strCookies = L"";
HRESULT hrTemp = m_spTargetProtocol->QueryInterface(IID_IWinInetHttpInfo,
reinterpret_cast<void**>(&spWinInetHttpInfo));
if(SUCCEEDED(hrTemp))
{
DWORD size = 0;
DWORD flags = 0;
DWORD temp = 0;
do
{
strCookies.AppendBSTR(GetQueryInfo(spWinInetHttpInfo, HTTP_QUERY_SET_COOKIE, temp).m_str);
strCookies.Append(_T("\r\n"));
temp ;
} while(size > 0);
}
myInstance->Fire_OnCookieRecieved(GetUniqueId(GetPtr(iWindow), GetPtr(this->m_spInternetBindInfo), pURL), tempContainerId, W2BSTR(pURL), W2BSTR(strCookies));
}
else if(ulStatusCode == BINDSTATUS_CACHEFILENAMEAVAILABLE)
{
myInstance->Fire_OnCacheLoaded(GetUniqueId(GetPtr(iWindow), GetPtr(this->m_spInternetBindInfo), pURL), tempContainerId, W2BSTR(pURL), W2BSTR(szStatusText));
}
else if(ulStatusCode == BINDSTATUS_P3P_HEADER)
{
myInstance->Fire_OnP3PHeaderRecieved(GetUniqueId(GetPtr(iWindow), GetPtr(this->m_spInternetBindInfo), pURL), tempContainerId, W2BSTR(pURL), W2BSTR(szStatusText));
}
CoTaskMemFree(pURL);
}
return hr;
}
STDMETHODIMP MonitorSink::ReportResult(
HRESULT hrResult,
DWORD dwError,
LPCWSTR szResult)
{
if(myInstance && FAILED(hrResult))
{
CComPtr<IWinInetHttpInfo> spWinInetHttpInfo;
HRESULT hrTemp = m_spTargetProtocol->QueryInterface(IID_IWinInetHttpInfo,
reinterpret_cast<void**>(&spWinInetHttpInfo));
if(SUCCEEDED(hrTemp) && spWinInetHttpInfo)
{
WCHAR *pURL;
ULONG cEl = 1;
m_spInternetBindInfo->GetBindString(BINDSTRING_URL, &pURL, cEl, &cEl);
myInstance->Fire_OnError(GetUniqueId(GetPtr(iWindow), GetPtr(m_spInternetBindInfo), pURL), GetContainerId(spWinInetHttpInfo), pURL, hrResult, dwError);
CoTaskMemFree(pURL);
}
}
resultReported = true;
HRESULT hr = m_spInternetProtocolSink ?
m_spInternetProtocolSink->ReportResult(hrResult, dwError, szResult):
E_UNEXPECTED;
return hr;
}
STDMETHODIMP MonitorSink:: ReportData(
DWORD grfBSCF,
ULONG ulProgress,
ULONG ulProgressMax)
{
if(myInstance && !resultReported)
{
CComPtr<IWinInetHttpInfo> spWinInetHttpInfo;
HRESULT hrTemp = m_spInternetProtocolSink->QueryInterface(IID_IWinInetHttpInfo,
reinterpret_cast<void**>(&spWinInetHttpInfo));
if(SUCCEEDED(hrTemp) && spWinInetHttpInfo)
{
WCHAR *pURL;
ULONG cEl = 1;
m_spInternetBindInfo->GetBindString(BINDSTRING_URL, &pURL, cEl, &cEl);
myInstance->Fire_OnProgress(GetUniqueId(GetPtr(iWindow), GetPtr(m_spInternetBindInfo), pURL), GetContainerId(spWinInetHttpInfo), pURL, grfBSCF, ulProgress, ulProgressMax);
CoTaskMemFree(pURL);
}
}
HRESULT hr = m_spInternetProtocolSink ?
m_spInternetProtocolSink->ReportData(grfBSCF, ulProgress, ulProgressMax):
E_UNEXPECTED;
return hr;
}
LONG GetPtr(void * v)
{
if(!v)
{
return 0;
}
char sPtr[50];
sprintf_s(sPtr, "%p", v);
long ptr;
sscanf_s(sPtr, "%x", &ptr);
return ptr;
}
LONG GetUniqueId(LONG l1, LONG l2, WCHAR* w)
{
LONG result = GetHash(w);
result = 31 * result l2;
if(l1 != 0)
{
result = 31 * result l1;
}
return result;
}
LONG GetHash(WCHAR* w)
{
long hash = 0;
for(int i = 0;i < wcslen(w); i ) {
hash = 31 * hash (long)w[i];
}
return hash;
}
LONG GetContainerId(CComPtr<IWinInetHttpInfo> spWinInetHttpInfo)
{
return GetHash(GetQueryInfo(spWinInetHttpInfo, HTTP_QUERY_REFERER | HTTP_QUERY_FLAG_REQUEST_HEADERS, 0));
}
CComBSTR GetQueryInfo(CComPtr<IWinInetHttpInfo> spWinInetHttpInfo, DWORD dwOption, DWORD flags)
{
CComBSTR result = L"";
DWORD flags2 = flags;
DWORD size = 0;
HRESULT hrTemp = spWinInetHttpInfo->QueryInfo(
dwOption,
0, &size, &flags2, 0);
if(SUCCEEDED(hrTemp))
{
flags2 = flags;
LPSTR pbuf = new char[size 1];
pbuf[size] = '\0';
hrTemp = spWinInetHttpInfo->QueryInfo(
dwOption,
pbuf, &size, &flags2, 0);
if(SUCCEEDED(hrTemp))
{
result.Append(pbuf);
}
delete []pbuf;
}
return result;
}
int IndexOf(wchar_t* str, wchar_t* s)
{
int fromIndex = 0;
int size = wcslen(str);
if(!s || size == 0)
{
return -1;
}
int len = wcslen(s);
if(len == 0)
{
return -1;
}
if(len fromIndex > size)
{
return -1;
}
int countMatched = 0;
int firstFoundIndex = 0;
for(int i = fromIndex; i < size; i )
{
firstFoundIndex = i;
countMatched = 0;
if(str[i] == s[countMatched]){//found first letter
do
{
countMatched;
if(countMatched == len)
{
return i; //if all chars in "s" are found in a row, then the search is a success, return the index
}
firstFoundIndex;
}while(firstFoundIndex < size && str[firstFoundIndex] == s[countMatched]); //ensures that the loop does not step over bounds
}
}
return -1;
}
wchar_t* Substring(wchar_t *str, int beginIndex, int endIndex)
{
int size = wcslen(str);
wchar_t* temp = new wchar_t[endIndex - beginIndex 1];
int index = 0;
for(int i = beginIndex; i < endIndex; i){
temp[index] = str[i];
index;
}
temp[index] = '\0';
return temp;
}