Introduction
When I was working on a chat application I found that the .NET RichTextBox
control only allows you to save and load files using RTF codes or plain text files (oh! my God).
I also wanted a method for inserting images and ActiveX controls into the RichTextBox
control, please see my article: Inserting images into a RichTextBox control (the OLE way).
Well, I decided to implement a successful solution to save and load "HTML lite" text into the RichTextBox
control. It is named "HTML lite" because I don't handle all the HTML tags, only a small subset of them with some constraints. But, the control can be extended according to your needs to include other features and HTML tags handlers.
Background
I use Win32 APIs to get character and paragraph formatting structures. This should be more efficient than calling the native RichTextBox
methods because I believe every call to a RichTextBox
method makes a system SendMessage
call, and I can use the PARAFORMAT and CHARFORMAT structures to get more information about the RichTextBox
content with only one call at a time. There are many Internet sites and blogs that use this approach.
APIs
Please see the source code for more details.
[StructLayout( LayoutKind.Sequential )]
public struct PARAFORMAT
{
public int cbSize;
public uint dwMask;
...
}
[ StructLayout( LayoutKind.Sequential )]
public struct CHARFORMAT
{
public int cbSize;
public UInt32 dwMask;
public UInt32 dwEffects;
...
}
...
Adding HTML
To insert the HTML content into the control, I use the AddHTML
method. In this function, I look for the starting HTML tag mark '<' and process it according to one of these:
b = bold
i = italic
u = underline
s = strikeout
sup = superscript
sub = subscript
p = paragraph (attributes: align="alignment")
font = font (attributes: face="facename"
color="#rrggbb" size="NN")
li = list item
Here is the source code for the method. Please take a look at how I apply formatting using the APIs and how I ignore the unhandled tags. I'm also trying to adjust the font size value to the most approximate value because it should be a number between 1 and 7:
int nStart = strHTML.IndexOf('<');
if (nStart >= 0)
{
if (nStart > 0)
{
strData = strHTML.Substring(0, nStart);
strHTML = strHTML.Substring(nStart);
}
else
{
int nEnd = strHTML.IndexOf('>', nStart);
if (nEnd > nStart)
{
if ((nEnd - nStart) > 0)
{
string strTag = strHTML.Substring(nStart,
nEnd - nStart + 1);
strTag = strTag.ToLower();
if (strTag == "<b>")
{
cf.dwMask |= CFM_WEIGHT | CFM_BOLD;
cf.dwEffects |= CFE_BOLD;
cf.wWeight = FW_BOLD;
}
else if (strTag == "<i>")
{
cf.dwMask |= CFM_ITALIC;
cf.dwEffects |= CFE_ITALIC;
}
else if (strTag == "<u>")
{
cf.dwMask |= CFM_UNDERLINE | CFM_UNDERLINETYPE;
cf.dwEffects |= CFE_UNDERLINE;
cf.bUnderlineType = CFU_UNDERLINE;
}
else if (strTag == "<s>")
{
cf.dwMask |= CFM_STRIKEOUT;
cf.dwEffects |= CFE_STRIKEOUT;
}
else if (strTag == "<sup>")
{
cf.dwMask |= CFM_SUPERSCRIPT;
cf.dwEffects |= CFE_SUPERSCRIPT;
}
else if (strTag == "<sub>")
{
cf.dwMask |= CFM_SUBSCRIPT;
cf.dwEffects |= CFE_SUBSCRIPT;
}
else if ((strTag.Length > 2) &&
(strTag.Substring(0, 2) == "<p"))
{
if (strTag.IndexOf("align=\"left\"") > 0)
{
pf.dwMask |= PFM_ALIGNMENT;
pf.wAlignment = (short)PFA_LEFT;
}
else if (strTag.IndexOf("align=\"right\"") > 0)
{
pf.dwMask |= PFM_ALIGNMENT;
pf.wAlignment = (short)PFA_RIGHT;
}
else if (strTag.IndexOf("align=\"center\"") > 0)
{
pf.dwMask |= PFM_ALIGNMENT;
pf.wAlignment = (short)PFA_CENTER;
}
}
else if ((strTag.Length > 5) &&
(strTag.Substring(0, 5) == "<font")
{
string strFont = new string(cf.szFaceName);
strFont = strFont.Trim(chtrim);
int crFont = cf.crTextColor;
int yHeight = cf.yHeight;
int nFace = strTag.IndexOf("face=");
if (nFace > 0)
{
int nFaceEnd = strTag.IndexOf("\""", nFace + 6);
if (nFaceEnd > nFace)
strFont =
strTag.Substring(nFace + 6, nFaceEnd - nFace - 6);
}
int nSize = strTag.IndexOf("size=");
if (nSize > 0)
{
int nSizeEnd = strTag.IndexOf("\""", nSize + 6);
if (nSizeEnd > nSize)
{
yHeight = int.Parse(strTag.Substring(nSize + 6,
nSizeEnd - nSize - 6));
yHeight *= (20 * 5);
}
}
int nColor = strTag.IndexOf("color=");
if (nColor > 0)
{
int nColorEnd = strTag.IndexOf("\""", nColor + 7);
if (nColorEnd > nColor)
{
if (strTag.Substring(nColor + 7, 1) == "#")
{
string strCr = strTag.Substring(nColor + 8,
nColorEnd - nColor - 8);
int nCr = Convert.ToInt32(strCr, 16);
Color color = Color.FromArgb(nCr);
crFont = GetCOLORREF(color);
}
else
{
crFont = int.Parse(strTag.Substring(nColor + 7,
nColorEnd - nColor - 7));
}
}
}
cf.szFaceName = new char[LF_FACESIZE];
strFont.CopyTo(0, cf.szFaceName, 0,
Math.Min(LF_FACESIZE - 1, strFont.Length));
//cf.szFaceName = strFont.ToCharArray(0,
Math.Min(strFont.Length, LF_FACESIZE));
cf.crTextColor = crFont;
cf.yHeight = yHeight;
cf.dwMask |= CFM_COLOR | CFM_SIZE | CFM_FACE;
cf.dwEffects &= ~CFE_AUTOCOLOR;
}
else if (strTag == "<li>")
{
if (pf.wNumbering != PFN_BULLET)
{
pf.dwMask |= PFM_NUMBERING;
pf.wNumbering = (short)PFN_BULLET;
}
}
else if (strTag == "</b>")
{
cf.dwEffects &= ~CFE_BOLD;
cf.wWeight = FW_NORMAL;
}
else if (strTag == "</i>")
{
cf.dwEffects &= ~CFE_ITALIC;
}
else if (strTag == "</u>")
{
cf.dwEffects &= ~CFE_UNDERLINE;
}
else if (strTag == "</s>")
{
cf.dwEffects &= ~CFM_STRIKEOUT;
}
else if (strTag == "</sup>")
{
cf.dwEffects &= ~CFE_SUPERSCRIPT;
}
else if (strTag == "</sub>")
{
cf.dwEffects &= ~CFE_SUBSCRIPT;
}
else if (strTag == "</font>")
{
}
else if (strTag == "</p>")
{
}
else if (strTag == "")
{
}
//-------------------------------
// now, remove tag from HTML
int nStart2 = strHTML.IndexOf("<", nEnd + 1);
if (nStart2 > 0)
{
// extract partial data
strData = strHTML.Substring(nEnd + 1, nStart2 - nEnd - 1);
strHTML = strHTML.Substring(nStart2);
}
else
{
// get remain text and finish
if ((nEnd + 1) < strHTML.Length)
strData = strHTML.Substring(nEnd + 1);
else
strData = "";
strHTML = "";
}
//-------------------------------s
//-------------------------------
// have we any continuos tag ?
if (strData.Length > 0)
{
// yes, ok, goto to reinit
if (strData[0] == '<')
goto reinit;
}
//-------------------------------
}
else
{
// we have not found any valid tag
strHTML = "";
}
}
else
{
// we have not found any valid tag
strHTML = "";
}
}
}
else
{
// we have not found any tag
strHTML = "";
}
To apply formatting through PARAFORMAT and CHARFORMAT, I use properties (a good tip taken from the Internet). Please, see the source code for more details:
public PARAFORMAT ParaFormat
{
get
{
PARAFORMAT pf = new PARAFORMAT();
pf.cbSize = Marshal.SizeOf( pf );
SendMessage( new HandleRef( this, Handle ),
EM_GETPARAFORMAT,
SCF_SELECTION, ref pf );
return pf;
}
set
{
PARAFORMAT pf = value;
pf.cbSize = Marshal.SizeOf( pf );
SendMessage( new HandleRef( this, Handle ),
EM_SETPARAFORMAT,
SCF_SELECTION, ref pf );
}
}
public PARAFORMAT DefaultParaFormat
{
...
}
public CHARFORMAT CharFormat
{
...
}
public CHARFORMAT DefaultCharFormat
{
...
}
And here is how I write the text formatting information to the control using its new properties. The variable strData
contains the plain text before applying the format:
if (strData.Length > 0)
{
strData = strData.Replace("&", "&");
strData = strData.Replace("<", "<");
strData = strData.Replace(">", ">");
strData = strData.Replace("'", "'");
strData = strData.Replace(""", "\""");
//-------------------------------
string strAux = strData; // use another copy
while (strAux.Length > 0)
{
//-----------------------
int nLen = strAux.Length;
//-----------------------
//-------------------------------
// now, add text to control
int nStartCache = this.SelectionStart;
string strt = strAux.Substring(0, nLen);
this.SelectedText = strt;
strAux = strAux.Remove(0, nLen);
this.SelectionStart = nStartCache;
this.SelectionLength = strt.Length;
//-------------------------------
//-------------------------------
// apply format
this.ParaFormat = pf;
this.CharFormat = cf;
//-------------------------------
// reposition to final
this.SelectionStart = this.TextLength+1;
this.SelectionLength = 0;
}
// reposition to final
this.SelectionStart = this.TextLength+1;
this.SelectionLength = 0;
//-------------------------------
// new paragraph requires to reset alignment
if ((strData.IndexOf("\r\n", 0) >= 0) ||
(strData.IndexOf("\n", 0) >= 0))
{
pf.dwMask = PFM_ALIGNMENT|PFM_NUMBERING;
pf.wAlignment = (short)PFA_LEFT;
pf.wNumbering = 0;
}
//-------------------------------
Getting HTML content from the control
To get the HTML content from the control I use the following approach: character by character (If someone knows an alternative method, please let me know).
I perform formatting analysis one by one on the characters in the control and extract the information about its style, if at any moment the character format or the paragraph format is changed, I add an HTML tag to the raw text.
This is done by using an internal structure cMyREFormat
that stores the related information such as the position and the tag that should be there in that place:
private enum uMyREType
{
U_MYRE_TYPE_TAG,
U_MYRE_TYPE_EMO,
U_MYRE_TYPE_ENTITY,
}
private struct cMyREFormat
{
public uMyREType nType;
public int nLen;
public int nPos;
public string strValue;
}
Step 1
Find the entities ( &, <, >, ", ' ) and store their positions:
char[] ch = {'&', '<', '>', '""', '\''};
string[] strreplace = {"&", "<", ">",
""", "'"};
for (i = 0; i < ch.Length; i++)
{
char[] ch2 = {ch[i]};
int n = this.Find(ch2, 0);
while (n != -1)
{
mfr = new cMyREFormat();
mfr.nPos = n;
mfr.nLen = 1;
mfr.nType = uMyREType.U_MYRE_TYPE_ENTITY;
mfr.strValue = strreplace[i];
colFormat.Add(mfr);
n = this.Find(ch2, n+1);
}
}
Step 2
Look for font
changes:
cf = this.CharFormat;
pf = this.ParaFormat;
string strfname = new string(cf.szFaceName);
strfname = strfname.Trim(chtrim);
if ((strFont != strfname) || (crFont != cf.crTextColor) ||
(yHeight != cf.yHeight))
{
if (strFont != "")
{
mfr = new cMyREFormat();
mfr.nPos = i;
mfr.nLen = 0;
mfr.nType = uMyREType.U_MYRE_TYPE_TAG;
mfr.strValue = "</font>";
colFormat.Add(mfr);
}
strFont = strfname;
crFont = cf.crTextColor;
yHeight = cf.yHeight;
int fsize = yHeight / (20 * 5);
color = GetColor(crFont);
mfr = new cMyREFormat();
string strcolor = string.Concat("#",
(color.ToArgb() & 0x00FFFFFF).ToString("X6"));
mfr.nPos = i;
mfr.nLen = 0;
mfr.nType = uMyREType.U_MYRE_TYPE_TAG;
mfr.strValue = "<font face=\"" + strFont + "\" color=\"" +
strcolor + "\" size=\"" + fsize + "\">";;
colFormat.Add(mfr);
Step 3
Look for paragraph format changes and close the previous tags if we are in a new paragraph. This is done by using states:
- none: no format applied,
- new: apply new format style (<b>,<i>,<p>... etc.),
- continue: format is same as that of the previous (no changes),
- reset: close and start again (</b>,</i>,</p>... etc.).
if ((strChar == "\r") || (strChar == "\n"))
{
if (bParaFormat)
{
bnumbering = ctformatStates.nctNone;
baleft = ctformatStates.nctNone;
baright = ctformatStates.nctNone;
bacenter = ctformatStates.nctNone;
}
if (bitalic != ctformatStates.nctNone)
{
mfr = new cMyREFormat();
mfr.nPos = i;
mfr.nLen = 0;
mfr.nType = uMyREType.U_MYRE_TYPE_TAG;
mfr.strValue = "</i>";
colFormat.Add(mfr);
bitalic = ctformatStates.nctNone;
}
if (bold != ctformatStates.nctNone)
{
...
}
...
}
if (bParaFormat)
{
if (pf.wAlignment == PFA_CENTER)
{
if (bacenter == ctformatStates.nctNone)
bacenter = ctformatStates.nctNew;
else
bacenter = ctformatStates.nctContinue;
}
else
{
if (bacenter != ctformatStates.nctNone)
bacenter = ctformatStates.nctReset;
}
if (bacenter == ctformatStates.nctNew)
{
mfr = new cMyREFormat();
mfr.nPos = i;
mfr.nLen = 0;
mfr.nType = uMyREType.U_MYRE_TYPE_TAG;
mfr.strValue = "<p align='\"center\"'>";
colFormat.Add(mfr);
}
else if (bacenter == ctformatStates.nctReset)
bacenter = ctformatStates.nctNone;
if (pf.wAlignment == PFA_LEFT)
{
...
}
if (pf.wAlignment == PFA_RIGHT)
{
...
}
if (pf.wNumbering == PFN_BULLET)
{
...
}
}
Step 4
Look for changes in style: bold, italic, underline, strikeout (with the same method, using states):
if ((cf.dwEffects & CFE_BOLD) == CFE_BOLD)
{
if (bold == ctformatStates.nctNone)
bold = ctformatStates.nctNew;
else
bold = ctformatStates.nctContinue;
}
else
{
if (bold != ctformatStates.nctNone)
bold = ctformatStates.nctReset;
}
if (bold == ctformatStates.nctNew)
{
mfr = new cMyREFormat();
mfr.nPos = i;
mfr.nLen = 0;
mfr.nType = uMyREType.U_MYRE_TYPE_TAG;
mfr.strValue = "<b>";
colFormat.Add(mfr);
}
else if (bold == ctformatStates.nctReset)
{
mfr = new cMyREFormat();
mfr.nPos = i;
mfr.nLen = 0;
mfr.nType = uMyREType.U_MYRE_TYPE_TAG;
mfr.strValue = "</b>";
colFormat.Add(mfr);
bold = ctformatStates.nctNone;
}
if ((cf.dwEffects & CFE_ITALIC) == CFE_ITALIC)
{
...
}
...
Step 5
Sort the formatting array and apply styles by adding the characters and tags one by one until the HTML text is completed:
k = colFormat.Count;
for (i = 0; i < k - 1; i++)
{
for (int j = i + 1; j < k; j++)
{
mfr = (cMyREFormat)colFormat[i];
cMyREFormat mfr2 = (cMyREFormat)colFormat[j];
if (mfr2.nPos < mfr.nPos)
{
colFormat.RemoveAt(j);
colFormat.Insert(i, mfr2);
j--;
}
else if ((mfr2.nPos == mfr.nPos) &&
(mfr2.nLen < mfr.nLen))
{
colFormat.RemoveAt(j);
colFormat.Insert(i, mfr2);
j--;
}
}
}
int nAcum = 0;
for (i = 0; i < k; i++)
{
mfr = (cMyREFormat)colFormat[i];
strHTML +=
strT.Substring(nAcum, mfr.nPos - nAcum) + mfr.strValue;
nAcum = mfr.nPos + mfr.nLen;
}
if (nAcum < strT.Length)
strHTML += strT.Substring(nAcum);
Points of interest
To avoid constant screen updates when character and paragraph formats are applied, I use the Faster Updating approach given in the article Extending RichTextBox by Pete Vidler.
This is done by sending two messages to the control: EM_SETEVENTMASK
to prevent the control from raising any events and WM_SETREDRAW
to prevent the control from redrawing itself:
public void BeginUpdate()
{
++updating;
if ( updating > 1 )
return;
oldEventMask = SendMessage( new HandleRef( this, Handle ),
EM_SETEVENTMASK, 0, 0 );
SendMessage( new HandleRef( this, Handle ),
WM_SETREDRAW, 0, 0 );
}
public void EndUpdate()
{
--updating;
if ( updating > 0 )
return;
SendMessage( new HandleRef( this, Handle ),
WM_SETREDRAW, 1, 0 );
SendMessage( new HandleRef( this, Handle ),
EM_SETEVENTMASK, 0, oldEventMask );
}
public bool InternalUpdating
{
get
{
return (updating != 0);
}
}
Using the code
To use the code, simply add reference to the HmlRichTextBox and call the methods AddHTML
and GetHTML
.
I use a toolbar with formatting buttons. To update the button states, I handle the event OnSelectionChanged
. Keep in mind that you must use the property InternalUpdating
to improve the performance when you are converting from/to HTML text:
private void richTextBox1_SelectionChanged(object sender,
System.EventArgs e)
{
if (!richTextBox1.InternalUpdating)
UpdateToolbar();
}
public void UpdateToolbar()
{
Font fnt;
if (richTextBox1.SelectionFont != null)
fnt = richTextBox1.SelectionFont;
else
fnt = richTextBox1.Font;
tbbBold.Pushed = fnt.Bold;
tbbItalic.Pushed = fnt.Italic;
tbbUnderline.Pushed = fnt.Underline;
tbbStrikeout.Pushed = fnt.Strikeout;
tbbLeft.Pushed = (richTextBox1.SelectionAlignment ==
HorizontalAlignment.Left);
tbbCenter.Pushed = (richTextBox1.SelectionAlignment ==
HorizontalAlignment.Center);
tbbRight.Pushed = (richTextBox1.SelectionAlignment ==
HorizontalAlignment.Right);
}
References and credits
History
- 5th Nov, 2005: Version 1.0
- 5th Dec, 2005: Version 1.1
- Superscript and subscript styles added.
Note
Please make your comments, corrections or requirements for credits. Your feedback is most welcome.