Click here to Skip to main content
16,005,222 members
Home / Discussions / C / C++ / MFC
   

C / C++ / MFC

 
GeneralRe: CDC TextOut statements Pin
Eytukan8-Mar-09 7:47
Eytukan8-Mar-09 7:47 
GeneralRe: CDC TextOut statements Pin
prithaa8-Mar-09 7:51
prithaa8-Mar-09 7:51 
QuestionHow to use HTTP in native C++ ? Pin
Yanshof7-Mar-09 21:12
Yanshof7-Mar-09 21:12 
AnswerRe: How to use HTTP in native C++ ? Pin
«_Superman_»7-Mar-09 21:44
professional«_Superman_»7-Mar-09 21:44 
AnswerRe: How to use HTTP in native C++ ? Pin
Hamid_RT8-Mar-09 4:46
Hamid_RT8-Mar-09 4:46 
Question"The application has requested the Runtime to terminate it in an unusual way" CRT error?? Pin
JackPuppy7-Mar-09 19:39
JackPuppy7-Mar-09 19:39 
AnswerRe: "The application has requested the Runtime to terminate it in an unusual way" CRT error?? Pin
Hamid_RT7-Mar-09 19:57
Hamid_RT7-Mar-09 19:57 
GeneralRe: "The application has requested the Runtime to terminate it in an unusual way" CRT error?? [modified] Pin
JackPuppy8-Mar-09 1:22
JackPuppy8-Mar-09 1:22 
#include "stdafx.h"

#include <windows.h>
#include <iostream>
#include <winsock2.h>
#include <ws2tcpip.h>
#include <vector>


///////////////////have to installed boost liberay at first
#include  <boost/regex.hpp>
#include  <boost/lambda/lambda.hpp>
using namespace std;
using namespace boost;

#pragma   comment(lib,   "ws2_32.lib ")

WSAData Wsadata;
WORD wVersion;

//////////this structure is used for grab specific infomation of one page
struct DOC{
	string pcClass;
	string pcNm;
	string pcNmWb;
	string pcDocNm;
	string pcDocWb;
	string pcDy;
	string pcHr;
	string pcRpDy;
	string pcRpHr;
	string pcRead;
	string pcRpCn;
};


HANDLE hSemaThr,hCont,hSema;
int CanThr;


int GetHostName(const string& HttpHead,string& hostname);
int BuildRequest(string& HttpRequest ,const string& url);
///<summary>
///connect the client to the host
///</summary>
int ConnectClient(const string& hostname,SOCKET& client)
{

	char* ip = new char[hostname.size()+1];
	memset(ip,0,hostname.size()+1);
	strcpy(ip,hostname.c_str());
	char* port = "80";
	struct addrinfo aiHints;
	struct addrinfo *aiList = NULL;
	int retVal;

	memset(&aiHints, 0, sizeof(aiHints));
	aiHints.ai_family = AF_INET;
	aiHints.ai_socktype = SOCK_STREAM;
	aiHints.ai_protocol = IPPROTO_TCP;

	if ((retVal = getaddrinfo(ip, port, &aiHints, &aiList)) != 0) {
		cout<<"getaddrinfo() failed.\n"<<GetLastError();
		return 0;
	}
	else if(connect(client,(sockaddr*)aiList->ai_addr,sizeof(*aiList->ai_addr))==SOCKET_ERROR){
		cout<<"connect error"<<WSAGetLastError()<<endl;

		WSACleanup();
		return 0;
	}
	else return 1;

}
///<summary>
///send buf and return a vector
///</summary>
int SendRecive(const string& request,SOCKET& client,string& answer)
{	


	answer.clear();
	string HostName;
	if(!GetHostName(request,HostName))
	{
		return 0;
	}
	if(!ConnectClient(HostName,client))
	{
		return 0;
	}

	char* crequest=new char[request.size()+1];
	memset(crequest,0,request.size()+1);
	strcpy(crequest,request.c_str());
	send(client,crequest,(int)strlen(crequest),NULL);
	delete[] crequest;
	crequest=0;

	char bufrecv[1000];

	int byRecv=0;

	while(byRecv!=SOCKET_ERROR){
		memset(bufrecv,0,1000);
		byRecv=recv(client,bufrecv,999,0);
		if ( byRecv == 0  ){
			cout<<"all received out"<<endl;

			return 1;

		}
		else if(byRecv == WSAECONNRESET){
			cout<<"wsaeconnreset"<<endl;
			return 0;

		}
		answer+=string(bufrecv);

	}

	return 0;
}

int GetHostName(const string& HttpHead,string& hostname)
{

	boost::regex rHostname("(?<=Host: )[\\s\\S]*?(?=\r\n)");
	smatch sm;
	std::string::const_iterator begin,end;
	begin=HttpHead.begin();
	end=HttpHead.end();

	while(regex_search(begin,end,sm,rHostname)){
		hostname=sm[0];
		begin=sm[0].second;
		return 1;
	}
	return 0;
}

DWORD WINAPI Grab(void* doc);
int _tmain(int argc, _TCHAR* argv[])

{
	wVersion=MAKEWORD(1,1);

	int iResult=::WSAStartup(wVersion,&Wsadata);
	switch(iResult){
case WSASYSNOTREADY:
	cout<<"not ready";

	exit(1);

case WSAVERNOTSUPPORTED:
	cout<<"version not supported";

	exit(1);
case WSAEFAULT:
	cout<<"wsadata fault";

	exit(1);

	}
	const char cstrFilename[]="webpage.txt";
	HANDLE hdFile=CreateFile(cstrFilename,GENERIC_READ|GENERIC_WRITE,FILE_SHARE_READ|FILE_SHARE_WRITE,NULL,OPEN_ALWAYS,FILE_ATTRIBUTE_NORMAL,NULL);
	if(hdFile==INVALID_HANDLE_VALUE){
		cout<<"cant't create the file;"<<GetLastError()<<endl;
		return 0;
	}

	HANDLE hErrorFile=CreateFile("ERROR.doc",GENERIC_READ|GENERIC_WRITE,FILE_SHARE_READ|FILE_SHARE_WRITE,NULL,OPEN_ALWAYS,FILE_ATTRIBUTE_NORMAL,NULL);
	if(hdFile==INVALID_HANDLE_VALUE){
		cout<<"cant't create the file;"<<GetLastError()<<endl;
		return 0;
	}



	SetFilePointer(hdFile,0,0,FILE_BEGIN);

	char HttpRequestFile[1000];
	memset(HttpRequestFile,0,1000);
	DWORD nRead;
	BOOL bResult=ReadFile(hdFile,HttpRequestFile,500,&nRead,NULL);
	if(!bResult){
		cout<<"readfile error"<<GetLastError()<<endl;
		return 0;
	}
	string HttpRequest(HttpRequestFile);

	string HostName;
	SOCKET client=socket(AF_INET,SOCK_STREAM,0);
	if(client==INVALID_SOCKET){
		cout<<endl<<"socket error:"<<WSAGetLastError()<<endl;
		WSACleanup();
		return 0;
	}

	string HttpResponse;
	if(!SendRecive(HttpRequest, client,HttpResponse))
	{
		cout<<"Receive none"<<endl<<GetLastError()<<endl;
		return 0;
	}
	closesocket(client);



	hCont=CreateFile("Contents.doc",GENERIC_WRITE|GENERIC_READ,FILE_SHARE_WRITE|FILE_SHARE_READ,0,OPEN_ALWAYS,FILE_ATTRIBUTE_NORMAL,0);
	if(hdFile==INVALID_HANDLE_VALUE){
		cout<<"cant't create the file;"<<GetLastError()<<endl;
		return 0;
	}
	SetFilePointer(hCont,0,0,FILE_END);

	DWORD time=GetTickCount();

	boost::regex rPost("([A-Z])[\\s\\S]*?href='([\\s\\S]+?)'[\\s\\S]+?>([\\s\\S]+?)[\\s\\S]+?)(\\d+?)[\\s\\S]+?tdfont>(\\d+?)[\\s\\S]+?tdfont>([\\s\\S]+?)\\s([\\s\\S]+?)"),rNextPage("上一页\\s<a\\shref>下一页"),rNextPage2("<a\\shref>下一页");
	smatch sm;
	string::const_iterator HttpResponseBegin,HttpResponseEnd;
	string NextPageUrl;
	int i=1;
	dd;
	while(++i){

		if(i/10==0) Sleep(1000);
		_ASSERTE(HttpResponse.size()>0);
		HttpResponseBegin=HttpResponse.begin();
		HttpResponseEnd=HttpResponse.end();

		while(regex_search(HttpResponseBegin,HttpResponseEnd,sm,rPost))
		{
			////if((GetTickCount()-time)>9000){
			//	break;

			//}
			HttpResponseBegin=sm[0].second;
			DOC dc;
			dc.pcClass=sm[1];
			dc.pcDocWb=sm[2];
			dc.pcDocNm=sm[3];
			dc.pcNmWb=HostName;
			dc.pcNmWb+=sm[4];
			dc.pcNm=sm[5];
			dc.pcRead=sm[6];
			dc.pcRpCn=sm[7];
			dc.pcDy=sm[8];
			dc.pcHr=sm[9];
			int nRead=atoi(dc.pcRead.c_str());
			if (nRead<10000) continue;
			string Answer=dc.pcDocNm+"\n"+dc.pcDocWb+"\n"+"阅读次数:"+dc.pcRead+"\n"+"回复次数"+dc.pcRpCn+"\n"+dc.pcDy+"\n\n\n";

			DWORD WordWritten;

			if(!WriteFile(hCont,Answer.c_str(),Answer.size(),&WordWritten,0)){
				cout<<"Cant't Write"<<Answer<<endl<<GetLastError()<<endl;
			}
		}
		HttpResponseBegin=HttpResponse.begin();
		if(!regex_search(HttpResponseBegin,HttpResponseEnd,sm,rNextPage)){

			if(!regex_search(HttpResponseBegin,HttpResponseEnd,sm,rNextPage2) )
			{
				char* Buffer=new char[HttpResponse.size()+1];
				memset(Buffer,0,HttpResponse.size()+1);
				strcpy(Buffer,HttpResponse.c_str());
				DWORD dw;
				WriteFile(hErrorFile,Buffer,HttpResponse.size(),&dw,0);
				break;
			}
			NextPageUrl=sm[1];
		}
		else{

			NextPageUrl="http://www.tianya.cn/new/publicforum/articleslist.asp"+sm[1];
		}


		if(!BuildRequest(HttpRequest,NextPageUrl)){
			cout<<"can't build request"<<endl;
			return 0;
		}
		client=socket(AF_INET,SOCK_STREAM,0);
		if(client==INVALID_SOCKET){
			cout<<endl<<"socket error:"<<WSAGetLastError()<<endl;

			WSACleanup();
			return 0;
		}
		if(!SendRecive(HttpRequest,client,HttpResponse)){
			cout<<"Cant receive "<<endl<<GetLastError()<<endl;

		}
		closesocket(client);



	}

	CloseHandle(hCont);

	CloseHandle(hdFile);

	WSACleanup();
	system("pause");
	return 0;

}




int BuildRequest(string& HttpRequest ,const string& url)
{

	boost::regex rHttpRequest("(?<=http://)([\\s\\S]+?)(/[\\s\\S]+?)$");
	smatch sm;

	string::const_iterator begin,end;
	begin=url.begin();
	end=url.end();
	string HostName,RequestHeader;
	if(!regex_search(begin,end,sm,rHttpRequest)) return 0;
	begin=sm[0].second;
	HostName=sm[1];
	RequestHeader=sm[2];
	HttpRequest="GET "	+ RequestHeader+" HTTP/1.1\r\n"	+"Accept: */*\r\nReferer: http://www.tianya.cn/publicforum/Content/house/1/99375.shtml\r\nAccept-Language: zh-cn\r\nUA-CPU: x86\r\nIf-Modified-Since: Thu, 22 Jan 2009 02:44:12 GMT; length=1088\r\nUser-Agent: Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 2.0.50727; CIBA)\r\n"
		+"Host: "+HostName
		+"\r\nConnection: Keep-Alive\r\n\r\n";

	return 1;

}


</vector></ws2tcpip.h></winsock2.h></iostream></windows.h>


in the webpage.txt file, the content is a httprequest stream.

GET /publicforum/articleslist/0/develop.shtml HTTP/1.1
Accept: image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, */*
Accept-Language: zh-cn
UA-CPU: x86
Accept-Encoding: gzip, deflate
User-Agent: Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 2.0.50727; CIBA)
Host: www.tianya.cn
Connection: Keep-Alive


modified on Sunday, March 8, 2009 7:29 AM

GeneralRe: "The application has requested the Runtime to terminate it in an unusual way" CRT error?? Pin
JackPuppy8-Mar-09 1:32
JackPuppy8-Mar-09 1:32 
GeneralRe: "The application has requested the Runtime to terminate it in an unusual way" CRT error?? Pin
Hamid_RT8-Mar-09 3:51
Hamid_RT8-Mar-09 3:51 
GeneralRe: "The application has requested the Runtime to terminate it in an unusual way" CRT error?? Pin
JackPuppy8-Mar-09 5:17
JackPuppy8-Mar-09 5:17 
AnswerRe: "The application has requested the Runtime to terminate it in an unusual way" CRT error?? Pin
Cedric Moonen7-Mar-09 23:44
Cedric Moonen7-Mar-09 23:44 
QuestionA bug about MFC [modified] Pin
Archy_Yu7-Mar-09 16:57
Archy_Yu7-Mar-09 16:57 
AnswerRe: A bug about MFC Pin
Hamid_RT7-Mar-09 19:58
Hamid_RT7-Mar-09 19:58 
GeneralRe: A bug about MFC Pin
Archy_Yu7-Mar-09 21:52
Archy_Yu7-Mar-09 21:52 
GeneralRe: A bug about MFC Pin
Perisic, Aleksandar8-Mar-09 1:31
Perisic, Aleksandar8-Mar-09 1:31 
GeneralRe: A bug about MFC Pin
Hamid_RT8-Mar-09 3:51
Hamid_RT8-Mar-09 3:51 
QuestionHelp Please. Pin
FISH7867-Mar-09 14:08
FISH7867-Mar-09 14:08 
AnswerRe: Help Please. Pin
«_Superman_»7-Mar-09 14:31
professional«_Superman_»7-Mar-09 14:31 
GeneralRe: Help Please. Pin
FISH7867-Mar-09 14:37
FISH7867-Mar-09 14:37 
GeneralRe: Help Please. Pin
«_Superman_»7-Mar-09 14:40
professional«_Superman_»7-Mar-09 14:40 
GeneralRe: Help Please. Pin
FISH7867-Mar-09 15:04
FISH7867-Mar-09 15:04 
Questionabout MBCS and unicode in Visual C++ 2005 Pin
DSPCottage7-Mar-09 7:42
DSPCottage7-Mar-09 7:42 
AnswerRe: about MBCS and unicode in Visual C++ 2005 Pin
Code-o-mat7-Mar-09 9:32
Code-o-mat7-Mar-09 9:32 
AnswerRe: about MBCS and unicode in Visual C++ 2005 Pin
«_Superman_»7-Mar-09 13:45
professional«_Superman_»7-Mar-09 13:45 

General General    News News    Suggestion Suggestion    Question Question    Bug Bug    Answer Answer    Joke Joke    Praise Praise    Rant Rant    Admin Admin   

Use Ctrl+Left/Right to switch messages, Ctrl+Up/Down to switch threads, Ctrl+Shift+Left/Right to switch pages.