Introduction
First, I get an IHTMLDocument2 interface for the browser component and then get the selection property of the interface to get an IHTMLSelectionObject
interface. Now came the tough part, actually parsing the stuff. I'd assumed that if I was able to create a control range using the createRange
method for the selection, that I'd be able to get a list which would have the HTML tags and their attributes neatly separated.
Now, use IMarkupServices
, I can enum
all elements in selected portion of web browser. Here is the code:
#include <afxwin.h>
#include <afxdisp.h>
#include <atlbase.h>
extern CComModule _Module;
#include <atlcom.h>
#include <mshtml.h>
#include <MsHtmcid.h>
HRESULT GetMarkupServices(IDispatch *pDocument,
IMarkupServices ** pMarkupServices );
HRESULT EnumSelectionElements(IDispatch *pDocument, CString &msg);
HRESULT PrintElement(IHTMLElement *pElement, CString &msg);
HRESULT EnumSelectionElements(IDispatch * pDocument, CString &msg)
{
HRESULT hr = S_OK;
CComQIPtr<IHTMLDOCUMENT2> pDoc;
CComQIPtr<IHTMLSELECTIONOBJECT> pSel;
CComQIPtr<IHTMLTXTRANGE> pRange;
CComQIPtr<IMARKUPSERVICES> pMarkupServices;
CComQIPtr<IMARKUPPOINTER> pHtmlStart;
CComQIPtr<IMARKUPPOINTER> pHtmlEnd;
CComQIPtr<IHTMLELEMENT> pElement;
CComBSTR bstrTagName;
BOOL bRight = FALSE;
CComBSTR bstrinnerText;
msg = L"";
if( ! (pDoc = pDocument) )
return E_FAIL;
hr = pDoc->get_selection( & pSel );
if (hr || (!pSel) )
return E_FAIL;
hr = pSel->createRange((IDispatch **)&pRange);
if (hr || (!pRange))
return E_FAIL;
hr = GetMarkupServices(pDocument, &pMarkupServices);
if (hr || (!pMarkupServices) )
return E_FAIL;
hr = pMarkupServices->CreateMarkupPointer( &pHtmlStart );
if (hr || (!pHtmlStart) )
return E_FAIL;
hr = pMarkupServices->CreateMarkupPointer( &pHtmlEnd );
if (hr || (!pHtmlEnd))
return E_FAIL;
hr = pMarkupServices->MovePointersToRange( pRange,
pHtmlStart, pHtmlEnd );
if (hr)
return E_FAIL;
while (TRUE)
{
pElement = (IUnknown*)NULL;
hr = pHtmlStart->IsRightOf(pHtmlEnd, &bRight);
if( hr )
return E_FAIL;
if( bRight )
break;
hr = pHtmlStart->CurrentScope( &pElement );
if (hr)
return E_FAIL;
hr = pElement->get_tagName( &bstrTagName );
if (hr)
return E_FAIL;
hr = pElement->get_innerText( &bstrinnerText);
if (hr)
return E_FAIL;
CString ele_msg;
PrintElement( pElement, ele_msg);
msg += ele_msg;
hr = pHtmlStart->MoveUnit(MOVEUNIT_NEXTBLOCK);
if (hr)
return E_FAIL;
}
return S_OK;
}
HRESULT GetMarkupServices(IDispatch *pDocument,
IMarkupServices ** pMarkupServices)
{
CComQIPtr<IHTMLDOCUMENT2> pDoc;
CComQIPtr<IHTMLWINDOW2> pWindow;
CComQIPtr<ISERVICEPROVIDER> pService;
HRESULT hr = S_OK;
pDoc = pDocument;
if( ! pDoc)
return E_FAIL;
hr = pDoc->get_parentWindow( &pWindow );
if (hr)
return E_FAIL;
pService = pWindow;
if ( !pService )
return E_FAIL;
hr = pService->QueryService( CLSID_HTMLDocument,
IID_IMarkupServices,
(void **) pMarkupServices);
if (hr)
return E_FAIL;
return S_OK;
}
HRESULT PrintElement(IHTMLElement *pElement, CString &msg)
{
CComQIPtr<IHTMLIMGELEMENT> pImg( pElement );
CComBSTR bstrTagName;
CComBSTR bstrinnerText;
CComBSTR bstrSrc;
HRESULT hr = S_OK;
hr = pElement->get_tagName( &bstrTagName );
if (FAILED(hr))
return hr;
hr = pElement->get_innerText( &bstrinnerText);
if (FAILED(hr))
return hr;
if( pImg )
{
hr = pImg->get_src( &bstrSrc );
if (FAILED(hr))
return hr;
}
CString ele_msg;
ele_msg.Format("tagName=%S", bstrTagName);
if( bstrinnerText.Length())
{
ele_msg += ",innerText=";
ele_msg += CString(bstrinnerText);
}
if( bstrSrc.Length())
{
ele_msg += ",src=";
ele_msg += CString(bstrSrc);
}
ele_msg += "\n";
CComQIPtr<IHTMLELEMENTCOLLECTION> pAll;
hr = pElement->get_all( (IDispatch**)& pAll );
if (FAILED(hr))
return hr;
long count = 0;
hr = pAll->get_length( & count );
if (FAILED(hr))
return hr;
for(long i=0; i<COUNT; CComQIPtr<IDispatch index(i);
CComVariant { i++)> pdisp;
CComQIPtr<IHTMLELEMENT> pitem;
hr = pAll->item( index, index, & pdisp );
if (FAILED(hr))
return hr;
pitem = pdisp;
if( !pitem )
continue;
PrintElement( pitem, ele_msg);
}
msg += ele_msg;
TRACE0( ele_msg );
return S_OK;
}