Introduction
We've all seen blogs and other web sites that seem to be able to intersperse hyperlinks throughout their content, almost whimsically, if the given text is "linkable", or, formatted like a domain name or link of some kind.
I recently had the need to do this (for my site, nonsequiturs.com), and couldn't find a great source for a starting point, well, in C# anyway. So, I went ahead and made one using regular expressions and simple string replaces. It works well, and is relatively fast and reliable. I thought I'd share the code in case others had the need for this functionality.
Features: Automatically makes a hyperlink out of anything that appears to be a domain name or URL. Will skip confusing text, like number sequences (e.g., 10.35), and will ignore existing hyperlinks. You can enclose text with a special tag if you want to specify an area that should not be processed (see the code comments for more). You can also add paramaters to the hyperlink tags that are generated, like "target=
" and so forth.
For more code samples, or to contact me directly, visit my site.
public static string AutoHyperlinks(string strvar, string param)
{
string final = strvar;
Regex regex = new Regex(@"<nolink>(.*?)</nolink>",
RegexOptions.IgnoreCase | RegexOptions.Singleline |
RegexOptions.CultureInvariant |
RegexOptions.IgnorePatternWhitespace |
RegexOptions.Compiled);
MatchCollection theMatches = regex.Matches(strvar);
for (int index = 0; index < theMatches.Count; index++)
{
final = final.Replace(theMatches[index].ToString(),
theMatches[index].ToString().Replace(".", "[[[pk:period]]]"));
}
regex = new Regex(@"<a(.*?)</a>", RegexOptions.IgnoreCase |
RegexOptions.Singleline | RegexOptions.CultureInvariant |
RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);
theMatches = regex.Matches(final);
for (int index = 0; index < theMatches.Count; index++)
{
final = final.Replace(theMatches[index].ToString(),
theMatches[index].ToString().Replace(".", "[[[pk:period]]]"));
}
final = Regex.Replace(final, @"(?<=\d)\.(?=\d)", "[[[pk:period]]]");
Regex tags = new Regex(@"([a-zA-Z0-9\:/\-]*[a-zA-Z0-9\-_]\" +
@".[a-zA-Z0-9\-_][a-zA-Z0-9\-_][a-zA-Z0-9\?\" +
@"=&#_\-/\.]*[^<>,;\.\s\)\(\]\[\""])");
final = tags.Replace(final, "<a href=\"http://$&\"" +
param + ">$&</a>");
final = final.Replace("http://https://", "https://");
final = final.Replace("http://http://", "http://");
final = final.Replace("http://ftp://", "ftp://");
final = final.Replace("http://rtsp://", "rtsp://");
final = final.Replace("http://mms://", "mms://");
final = final.Replace("http://pcast://", "pcast://");
final = final.Replace("http://sftp://", "sftp://");
final = final.Replace("[[[pk:period]]]", ".");
final = final.Replace("<nolink>", "");
final = final.Replace("</nolink>", "");
return final;
}