private void searchAndParse(string searchStr) { Dictionary<string, string> queryPara; Dictionary<string, string> headerDict; string curGoogleSearchUrl; string curSearchRespHtml; string curReferer; string googleUrl = "http://www.google.co.in/"; string tmpRespHtml = crifanLib.getUrlRespHtml(googleUrl); string firstGoogleSearchUrl = "http://www.google.co.in/search?"; queryPara = new Dictionary<string, string>(); queryPara.Add("newwindow", "1"); queryPara.Add("safe", "strict"); queryPara.Add("source", "hp"); queryPara.Add("q", searchStr); queryPara.Add("btnK", "Google Search"); queryPara.Add("site", ""); firstGoogleSearchUrl += crifanLib.quoteParas(queryPara, false); curReferer = googleUrl; curGoogleSearchUrl = firstGoogleSearchUrl; while(needContinueSearch) { headerDict = new Dictionary<string,string>(); headerDict.Add("referer", curReferer); curSearchRespHtml = crifanLib.getUrlRespHtml(curGoogleSearchUrl, headerDict: headerDict); parseSinglePageHtml(curSearchRespHtml); if (!needContinueSearch) { break; } HtmlAgilityPack.HtmlDocument htmlDoc = crifanLib.htmlToHtmlDoc(curSearchRespHtml); HtmlNode rootHtmlNode = htmlDoc.DocumentNode; HtmlNode nextHtmlNode = rootHtmlNode.SelectSingleNode("//a[@id='pnnext' and @class='pn']"); if(nextHtmlNode != null) { curReferer = curGoogleSearchUrl; string hrefStr = nextHtmlNode.Attributes["href"].Value; string encodedUrl = "http://www.google.co.in" + hrefStr; string htmlDecoded = HttpUtility.HtmlDecode(encodedUrl); curGoogleSearchUrl = htmlDecoded; needContinueSearch = true; } else { needContinueSearch = false; break; } } }
var
This content, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)