Introduction
Some times situation is like we have to parse the string which has both
Unicode and ASCII characters in single string, at that time Encoding
functions in .NET will not be help ful,
so i have created two use full functions for that situations...
Background
this article requires some headache with working with UTF 8 characters. just kidding..
you should know the string formate in .NET and how one character is recognize as unicode in 16 bits(2 bytes)
Using the code
These two function will solve our problem fro converting UTF characters to bytes and from bytes to UTF characters.
public static string GetUTF8StringFrombytes(byte[] byteVal)
{
byte[] btOne = new byte[1];
StringBuilder sb = new StringBuilder("");
char uniChar;
for (int i = 0; i < byteVal.Length; i++)
{
btOne[0] = byteVal[i];
if (btOne[0] > 127)
{
uniChar = Convert.ToChar(btOne[0]);
sb.Append(uniChar);
}
else
sb.Append(Encoding.UTF8.GetString(btOne));
}
return sb.ToString();
}
public static byte[] GetBytesFromUTF8Chars(string strVal)
{
if (strVal != string.Empty || strVal != null)
{
byte btChar;
byte[] btArr = new byte[strVal.Length * 2];
byte[] tempArr;
int arrIndex = 0;
for (int i = 0; i < strVal.Length; i++)
{
btChar = (byte)strVal[i];
if (btChar > 127 && btChar < 256)
{
btArr[arrIndex] = btChar;
arrIndex++;
}
else
{
tempArr = Encoding.UTF8.GetBytes(strVal[i].ToString());
Array.Copy(tempArr, 0, btArr, arrIndex, tempArr.Length);
arrIndex += tempArr.Length;
tempArr = null;
}
}
byte[] retVal = new byte[arrIndex];
Array.Copy(btArr, 0, retVal, 0, arrIndex);
return retVal;
}
else
return new byte[0];
}
Points of Interest
Just providing some solution which i have found while have problem, so that other will not face them.
History
If any improvements are suggested then they are welcomes.