Click here to Skip to main content
65,938 articles
CodeProject is changing. Read more.
Articles / web / HTML

General string parsing techniques

4.00/5 (1 vote)
1 Nov 2011CPOL 5.2K  
namespace ParseTests{ using System; using System.Collections.Generic; using System.Text.RegularExpressions; public static class ExtensionMethods { /// /// Extracts text from a source string that is between two specified tags. ...
C#
namespace ParseTests
{
    using System;
    using System.Collections.Generic;
    using System.Text.RegularExpressions;

    public static class ExtensionMethods
    {
        /// <summary>
        /// Extracts text from a source string that is between two specified tags.  
        /// Tags can be included by setting 'includeTokens' to true.
        /// </summary>
        /// <param name="source">String to be searched.</param>
        /// <param name="startToken">String that
        ///    identifies the beginning of a match.</param>
        /// <param name="endToken">String that identifies
        ///    the end of a match.</param>
        /// <param name="includeTokens">Value indicating
        ///     whether to include tokens.</param>
        /// <returns>Text between the start and end tokens.</returns>
        public static IEnumerable<string> GetBetween(
            this string source,
            string startToken,
            string endToken,
            bool includeTokens = false)
        {
            string pattern = string.Format(
                "(?:{0})(.*?)(?:{1})",
                Regex.Escape(startToken),
                Regex.Escape(endToken));

            Match match = Regex.Match(source, pattern, RegexOptions.Singleline);

            while (match.Success)
            {
                yield return includeTokens ? match.Value : match.Groups[1].Value;
                match = match.NextMatch();
            }
        }
    }
}

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)