Have you ever wondered how you would show on a web form text that is stripped of HTML tags but you still want to accept HTML tags when saving to your form? Sounds confusing?
To make it clear, I will give a good example. Let's say you have a form that has a Rich Text Box (FTB or FCK) which allows users to cut and paste items that have HTML tags so that you can display it properly like a Blog Article, but there are some instances that you want this to be stripped of the HTML tags like displaying a summary on a Grid. Now my solution is to strip the HTML codes before displaying it when needed.
So when you copy this...
Hello, World!
...it won't show on your grid as this:
<html>
<head>
<title>
Hello World
</title>
</head>
<body>
<font size ="4" color="blue">
Hello, World!
</font>
</body>
</html>
but as this:
"Hello World!"
Now with a mix or Replace and Regular Expressions, I created a class to handle that and here it is:
public string StripHTML(string sInputString)
{
try
{
string sOutputString;
sOutputString = sInputString;
sOutputString = sOutputString.Replace("\r", " ");
sOutputString = sOutputString.Replace("\n", " ");
sOutputString = sOutputString.Replace("\t", string.Empty);
DataTable myDataTable = GetTableDefinition();
myDataTable.DefaultView.Sort = "iID ASC";
foreach (DataRow drCleaningItem in myDataTable.Rows)
{
string sOriginalString = (drCleaningItem["sOriginalString"]).ToString();
string sReplacementString = (drCleaningItem["sReplacementString"]).ToString();
sOutputString = Regex.Replace
(sOutputString, sOriginalString, sReplacementString, RegexOptions.IgnoreCase);
}
string sBreaks = "\r\r\r";
string sTabs = "\t\t\t\t\t";
for (int x = 0; x < sOutputString.Length; x++)
{
sOutputString = sOutputString.Replace(sBreaks, "\r\r");
sOutputString = sOutputString.Replace(sTabs, "\t\t\t\t");
sBreaks = sBreaks + "\r";
sTabs = sTabs + "\t";
}
return sOutputString;
}
catch
{
return sInputString;
}
}
private DataTable GetTableDefinition()
{
DataTable dtCleaningCollection = new DataTable();
dtCleaningCollection.Columns.Add("iID", typeof(int));
dtCleaningCollection.Columns.Add("sOriginalString", typeof(string));
dtCleaningCollection.Columns.Add("sReplacementString", typeof(string));
dtCleaningCollection.Rows.Add(1, @"( )+", " ");
dtCleaningCollection.Rows.Add(2, @"<( )*head([^>])*>", "<head>");
dtCleaningCollection.Rows.Add(3, @"(<( )*(/)( )*head( )*>)", "</head>");
dtCleaningCollection.Rows.Add(4, "(<head>).*(</head>)", string.Empty);
dtCleaningCollection.Rows.Add(5, @"<( )*script([^>])*>", "<script>");
dtCleaningCollection.Rows.Add(6, @"(<( )*(/)( )*script( )*>)", "</script>");
dtCleaningCollection.Rows.Add(7, @"(<script>).*(</script>)", string.Empty);
dtCleaningCollection.Rows.Add(8, @"<( )*style([^>])*>", "<style>");
dtCleaningCollection.Rows.Add(9, @"(<( )*(/)( )*style( )*>)", "</style>");
dtCleaningCollection.Rows.Add(10, "(<style>).*(</style>)", string.Empty);
dtCleaningCollection.Rows.Add(11, @"<( )*td([^>])*>", "\t");
dtCleaningCollection.Rows.Add(12, @"<( )*br( )*>", "\r");
dtCleaningCollection.Rows.Add(13, @"<( )*li( )*>", "\r");
dtCleaningCollection.Rows.Add(14, @"<( )*div([^>])*>", "\r\r");
dtCleaningCollection.Rows.Add(15, @"<( )*tr([^>])*>", "\r\r");
dtCleaningCollection.Rows.Add(16, @"<( )*p([^>])*>", "\r\r");
dtCleaningCollection.Rows.Add(17, @"<[^>]*>", string.Empty);
dtCleaningCollection.Rows.Add(18, @" ", " ");
dtCleaningCollection.Rows.Add(19, @"•", " * ");
dtCleaningCollection.Rows.Add(20, @"‹", "<");
dtCleaningCollection.Rows.Add(21, @"›", ">");
dtCleaningCollection.Rows.Add(22, @"™", "(tm)");
dtCleaningCollection.Rows.Add(23, @"⁄", "/");
dtCleaningCollection.Rows.Add(24, @"<", "<");
dtCleaningCollection.Rows.Add(25, @">", ">");
dtCleaningCollection.Rows.Add(26, @"©", "(c)");
dtCleaningCollection.Rows.Add(27, @"®", "(r)");
dtCleaningCollection.Rows.Add(28, @"¼", "1/4");
dtCleaningCollection.Rows.Add(29, @"½", "1/2");
dtCleaningCollection.Rows.Add(30, @"¾", "3/4");
dtCleaningCollection.Rows.Add(31, @"‘", "'");
dtCleaningCollection.Rows.Add(32, @"’", "'");
dtCleaningCollection.Rows.Add(33, @"“", "\"");
dtCleaningCollection.Rows.Add(34, @"”", "\"");
dtCleaningCollection.Rows.Add(35, @"&(.{2,6});", string.Empty);
dtCleaningCollection.Rows.Add(36, "(\r)( )+(\r)", "\r\r");
dtCleaningCollection.Rows.Add(37, "(\t)( )+(\t)", "\t\t");
dtCleaningCollection.Rows.Add(38, "(\t)( )+(\r)", "\t\r");
dtCleaningCollection.Rows.Add(39, "(\r)( )+(\t)", "\r\t");
dtCleaningCollection.Rows.Add(40, "(\r)(\t)+(\r)", "\r\r");
dtCleaningCollection.Rows.Add(41, "(\r)(\t)+", "\r\t");
return dtCleaningCollection;
}