Click here to Skip to main content
65,938 articles
CodeProject is changing. Read more.
Articles / desktop / MFC

Finding a substring in a text

3.50/5 (2 votes)
15 Aug 2011CPOL 24.7K  
How to find a substring in a text, forward and backward, with Case Sensitive and Match Whole Word options.

Here's how to find a substring into a text, forward and backward, with Case Sensitive and Match Whole Word options (the main function is RichEditFind). If bMatchCase is false, it changes the text case to lower so that we can find a case insensitive match (if it is true, it doesn't alter the text). If bWholeWord is true, it is trying to find a match for which the previous and the next characters are separators. Separators are any characters that are not alphanumerical letters (a-z, A-Z, 0-9). If bWholeWord is false, the previous and next characters are ignored (can be alphanumerical). nPosition specifies the start position for finding a substring in the text. When bReverse is false, CString's Find function is used to find a substring in the text, otherwise the ReverseFind function is used (because the CString class can only search characters in a string).


C++
BOOL IsSeparator(CString strRichEdit, CString strFindText, int nPosition)
{
   // this function is used to check if the previous and next characters are alfanumeric 
   int lenSub = strFindText.GetLength();
   int len = strRichEdit.GetLength();
   int nPrevChar = nPosition - 1;
   int nNextChar = nPosition + lenSub;
 
   // does the previous character is separator?
   if (nPosition > 0)
   {
      if ((strRichEdit.GetAt(nPrevChar) >= _T('A')) &&
         (strRichEdit.GetAt(nPrevChar) <= _T('Z')))
      {
         return FALSE;
      }
      if ((strRichEdit.GetAt(nPrevChar) >= _T('a')) &&
         (strRichEdit.GetAt(nPrevChar) <= _T('z')))
      {
         return FALSE;
      }
      if ((strRichEdit.GetAt(nPrevChar) >= _T('0')) &&
         (strRichEdit.GetAt(nPrevChar) <= _T('9')))
      {
         return FALSE;
      }
   }
 
   // does the next character is separator?
   if (nNextChar < len)
   {
      if ((strRichEdit.GetAt(nNextChar) >= _T('A')) &&
         (strRichEdit.GetAt(nNextChar) <= _T('Z')))
      {
         return FALSE;
      }
      if ((strRichEdit.GetAt(nNextChar) >= _T('a')) &&
         (strRichEdit.GetAt(nNextChar) <= _T('z')))
      {
         return FALSE;
      }
      if ((strRichEdit.GetAt(nNextChar) >= _T('0')) &&
         (strRichEdit.GetAt(nNextChar) <= _T('9')))
      {
         return FALSE;
      }
   }
   return TRUE; // the character before the substring and
                // the character after the substring are separators
}
 
int ReverseFind(LPCTSTR lpszData, LPCTSTR lpszSub, int startpos)
{
   // this function is used to find lpszSub
   // substring in reverse order into lpszData 
   int lenSub = lstrlen( lpszSub );
   int len = lstrlen( lpszData );
 
   if (0 < lenSub && 0 < len)
   {
      if (startpos == -1 || startpos >= len) startpos = len - 1;
      for (LPCTSTR lpszReverse = lpszData + startpos; 
         lpszReverse != lpszData; --lpszReverse)
         if (_tcsncmp(lpszSub, lpszReverse, lenSub ) == 0)
            return (lpszReverse - lpszData);
   }
   return -1;
}
 
int RichEditFind(CString strRichEdit, CString strFindText,
   int nPosition, BOOL bReverse, BOOL bMatchCase, BOOL bWholeWord)
{
   // this function does the actual search with
   // Case Sensitive and Match Whole Word options
   if (nPosition < 0)
      nPosition = strRichEdit.GetLength() - 1;
 
   if (bReverse) // searching a substring in reverse order?
   {
      if (bMatchCase) // is Case Sensitive option enabled?
      {
         if (bWholeWord) // is Match Whole Word option enabled?
         {
            int nRetVal = ReverseFind(strRichEdit, strFindText, nPosition);
            while (nRetVal != -1)
            {
               if (IsSeparator(strRichEdit, strFindText, nRetVal))
                  return nRetVal;
               nRetVal = ReverseFind(strRichEdit, strFindText, --nRetVal);
            }
         }
         else
         {
            return ReverseFind(strRichEdit, strFindText, nPosition);
         }
      }
      else
      {
         strRichEdit.MakeLower();
         strFindText.MakeLower();
 
         if (bWholeWord) // is Match Whole Word option enabled?
         {
            int nRetVal = ReverseFind(strRichEdit, strFindText, nPosition);
            while (nRetVal != -1)
            {
               if (IsSeparator(strRichEdit, strFindText, nRetVal))
                  return nRetVal;
               nRetVal = ReverseFind(strRichEdit, strFindText, --nRetVal);
            }
         }
         else
         {
            return ReverseFind(strRichEdit, strFindText, nPosition);
         }
      }
   }
   else // normal search
   {
      if (bMatchCase) // is Case Sensitive option enabled?
      {
         if (bWholeWord) // is Match Whole Word option enabled?
         {
            int nRetVal = strRichEdit.Find(strFindText, nPosition);
            while (nRetVal != -1)
            {
               if (IsSeparator(strRichEdit, strFindText, nRetVal))
                  return nRetVal;
               nRetVal = strRichEdit.Find(strFindText, ++nRetVal);
            }
         }
         else
         {
            return strRichEdit.Find(strFindText, nPosition);
         }
      }
      else
      {
         strRichEdit.MakeLower();
         strFindText.MakeLower();
 
         if (bWholeWord) // is Match Whole Word option enabled?
         {
            int nRetVal = strRichEdit.Find(strFindText, nPosition);
            while (nRetVal != -1)
            {
               if (IsSeparator(strRichEdit, strFindText, nRetVal))
                  return nRetVal;
               nRetVal = strRichEdit.Find(strFindText, ++nRetVal);
            }
         }
         else
         {
            return strRichEdit.Find(strFindText, nPosition);
         }
      }
   }
   return -1;
}

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)