Introduction
Conversion of bytes to hex string and vice versa is a common task with a variety of implementations. The performance key point for each to/from
conversion is the (perpetual) repetition of the same if blocks and calculations that is the standard approach for all implementations
I've seen. I'm using unsafe code in an effort to achieve the best possible performance. My concept is based on direct assignments.
Byte array to hex string
By definition, the result of a byte array to hex string conversion is a combination/repetition of static values (0-255 or "00"-"FF").
In other words we can avoid calculations as well as formatting (leading zero for bytes 0-9). Due to the fact that each byte will be transformed
to two characters (2 x 2 bytes), we may say that each byte is assigned to a 32-bit integer. Therefore for n bytes we'll define n integers
instead of n*2 characters.
Hex string to byte array
Different case, different facts. Valid values within a hex string
are the characters ranges '0'-'9', 'A'-'F' and 'a'-'f'. The existence of upper and lower case characters and the valid character ranges ASCII gaps,
remove the concept of assignments by "instinct". Needless to mention that "instinct" is wrong in this case. The solution is two
byte arrays with 103 values each where only 22 of them are valid (per array). As a result a pair of characters will become a byte using only 4
if conditions and 1 summation, while most implementations need 12 if conditions, 3 subtructions, 1 left shift or multiplication and 1 summation.
The code
Source code includes comments line by line. These comments will also help you to easily adapt the code to a safe version that will be faster than any other prêt à porter code.
using System;
namespace DRDigit
{
public unsafe sealed partial class Fast
{
#region from/to hex
static readonly int[] toHexTable = new int[] {
3145776, 3211312, 3276848, 3342384, 3407920, 3473456, 3538992, 3604528, 3670064, 3735600,
4259888, 4325424, 4390960, 4456496, 4522032, 4587568, 3145777, 3211313, 3276849, 3342385,
3407921, 3473457, 3538993, 3604529, 3670065, 3735601, 4259889, 4325425, 4390961, 4456497,
4522033, 4587569, 3145778, 3211314, 3276850, 3342386, 3407922, 3473458, 3538994, 3604530,
3670066, 3735602, 4259890, 4325426, 4390962, 4456498, 4522034, 4587570, 3145779, 3211315,
3276851, 3342387, 3407923, 3473459, 3538995, 3604531, 3670067, 3735603, 4259891, 4325427,
4390963, 4456499, 4522035, 4587571, 3145780, 3211316, 3276852, 3342388, 3407924, 3473460,
3538996, 3604532, 3670068, 3735604, 4259892, 4325428, 4390964, 4456500, 4522036, 4587572,
3145781, 3211317, 3276853, 3342389, 3407925, 3473461, 3538997, 3604533, 3670069, 3735605,
4259893, 4325429, 4390965, 4456501, 4522037, 4587573, 3145782, 3211318, 3276854, 3342390,
3407926, 3473462, 3538998, 3604534, 3670070, 3735606, 4259894, 4325430, 4390966, 4456502,
4522038, 4587574, 3145783, 3211319, 3276855, 3342391, 3407927, 3473463, 3538999, 3604535,
3670071, 3735607, 4259895, 4325431, 4390967, 4456503, 4522039, 4587575, 3145784, 3211320,
3276856, 3342392, 3407928, 3473464, 3539000, 3604536, 3670072, 3735608, 4259896, 4325432,
4390968, 4456504, 4522040, 4587576, 3145785, 3211321, 3276857, 3342393, 3407929, 3473465,
3539001, 3604537, 3670073, 3735609, 4259897, 4325433, 4390969, 4456505, 4522041, 4587577,
3145793, 3211329, 3276865, 3342401, 3407937, 3473473, 3539009, 3604545, 3670081, 3735617,
4259905, 4325441, 4390977, 4456513, 4522049, 4587585, 3145794, 3211330, 3276866, 3342402,
3407938, 3473474, 3539010, 3604546, 3670082, 3735618, 4259906, 4325442, 4390978, 4456514,
4522050, 4587586, 3145795, 3211331, 3276867, 3342403, 3407939, 3473475, 3539011, 3604547,
3670083, 3735619, 4259907, 4325443, 4390979, 4456515, 4522051, 4587587, 3145796, 3211332,
3276868, 3342404, 3407940, 3473476, 3539012, 3604548, 3670084, 3735620, 4259908, 4325444,
4390980, 4456516, 4522052, 4587588, 3145797, 3211333, 3276869, 3342405, 3407941, 3473477,
3539013, 3604549, 3670085, 3735621, 4259909, 4325445, 4390981, 4456517, 4522053, 4587589,
3145798, 3211334, 3276870, 3342406, 3407942, 3473478, 3539014, 3604550, 3670086, 3735622,
4259910, 4325446, 4390982, 4456518, 4522054, 4587590
};
public static string ToHexString(byte[] source)
{
return ToHexString(source, false);
}
public static string ToHexString(byte[] source, bool hexIndicator)
{
fixed (int* hexRef = toHexTable)
fixed (byte* sourceRef = source)
{
byte* s = sourceRef;
int resultLen = (source.Length << 1);
if (hexIndicator)
resultLen += 2;
string result = new string(' ', resultLen);
fixed (char* resultRef = result)
{
int* pair = (int*)resultRef;
if (hexIndicator)
*pair++ = 7864368;
while (*pair != 0)
*pair++ = hexRef[*s++];
return result;
}
}
}
static readonly byte[] fromHexTable = new byte[] {
255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 0, 1,
2, 3, 4, 5, 6, 7, 8, 9, 255, 255,
255, 255, 255, 255, 255, 10, 11, 12, 13, 14,
15, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 10, 11, 12,
13, 14, 15
};
static readonly byte[] fromHexTable16 = new byte[] {
255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 0, 16,
32, 48, 64, 80, 96, 112, 128, 144, 255, 255,
255, 255, 255, 255, 255, 160, 176, 192, 208, 224,
240, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 160, 176, 192,
208, 224, 240
};
public static byte[] FromHexString(string source)
{
if (string.IsNullOrEmpty(source))
return new byte[0]; if (source.Length % 2 == 1) throw new ArgumentException();
int
index = 0, len = source.Length >> 1; fixed (char* sourceRef = source)
{
if (*(int*)sourceRef == 7864368) {
if (source.Length == 2) throw new ArgumentException();
index += 2; len -= 1; }
byte add = 0; byte[] result = new byte[len]; fixed (byte* hiRef = fromHexTable16)
fixed (byte* lowRef = fromHexTable)
fixed (byte* resultRef = result)
{
char* s = (char*)&sourceRef[index];
byte* r = resultRef;
while (*s != 0)
{
if (
*s > 102 ||
(*r = hiRef[*s++]) == 255 ||
*s > 102 ||
(add = lowRef[*s++]) == 255
)
throw new ArgumentException();
*r++ += add;
}
return result;
}
}
}
#endregion
}
}
Performance
CLR provides a method for generating a hex string from a byte array that I’ve met in many sources:
string hex = BitConverter.ToString(myByteArray).Replace("-", "");
This is probably the worst choice performance wise. Anyway; my implementation is more than 10 times (10x or 1000%) faster and consumes 5 times less memory.
I was looking for a very recent published implementation of hex string to byte array and I found
this one at MSDN blogs. I know that this
is not the best implementation ever, but it’s one of the best samples for what I've written above. This time, my implementation is about 5 times (5x or 500%) faster.
The code (console app) I used for performance testing is the following:
byte[] exp = File.ReadAllBytes(@"F:\words.txt");
System.Diagnostics.Stopwatch clock;
long memory = 0;
for (int n = 0; n < 10; n++)
{
clock = Stopwatch.StartNew(); memory = GC.GetTotalMemory(true);
string s1 = DRDigit.Fast.ToHexString(exp, false);
clock.Stop();
memory = GC.GetTotalMemory(false) - memory;
Console.Write("{0} [{1}] vs ", clock.Elapsed.TotalMilliseconds, memory);
clock = Stopwatch.StartNew();
m = GC.GetTotalMemory(true);
string s2 = BitConverter.ToString(exp).Replace("-", "");
clock.Stop();
memory = GC.GetTotalMemory(false) - memory;
Console.WriteLine("{0} [{1}] -> {2} ",
clock.Elapsed.TotalMilliseconds, memory, s1 == s2);
clock = Stopwatch.StartNew();
byte[] b1 = DRDigit.Fast.FromHexString(s1);
clock.Stop();
Console.Write("fromHex: {0} vs ", clock.Elapsed.TotalMilliseconds);
clock = Stopwatch.StartNew();
byte[] b2 = Utils.ConvertToByteArray(s1);
clock.Stop(); Console.WriteLine(clock.Elapsed.TotalMilliseconds);
Console.WriteLine("");
}
Console.ReadLine();
These are the results of running the above code in release mode:
toHex memory consumption | toHex execution time in ms | fromHex execution time in ms |
toHexString | BitConverter |
toHexString | BitConverter |
fromHexString | MSDN blog peeked implementation |
9,956,992 | 39,795,168 |
53.5169 | 266.2852 |
22.7573 | 92.5146 |
9,948,800 | 39,795,168 |
14.0968 | 254.9983 |
18.2375 | 86.3444 |
9,948,800 | 39,795,168 |
21.0198 | 274.6193 |
18.1744 | 110.9573 |
9,948,800 | 39,795,168 |
21.1050 | 275.9161 |
18.1831 | 122.8697 |
9,948,800 | 39,795,168 |
19.6050 | 244.4094 |
16.7560 | 110.7480 |
9,948,800 | 39,795,168 |
21.0470 | 275.6667 |
18.2452 | 90.8027 |
9,948,800 | 39,795,168 |
20.9721 | 275.5815 |
18.2375 | 110.6746 |
9,948,800 | 39,795,168 |
20.8356 | 276.2732 |
18.1651 | 122.4027 |
9,948,800 | 39,795,168 |
21.2687 | 273.4108 |
18.1887 | 90.1535 |
9,948,800 | 39,795,168 |
15.5049 | 254.4780 |
18.1718 | 86.2561 |
I'm not so sure that I'm allowed to paste the code of BitConverter.ToString as a result of decompiling using reflector.
What I can do is to describe what it does affecting the performance.
Assume a byte array filled with 100 zeros. This is translated
to 100 divisions (0 / 16), 100 modulos (0 % 16) and 200 conversions of 0 to '0' requiring 200 if conditions (0 < 10) and 200
summations (0 + 48 to generate char '0').
Using the code
This is what you expected, isn't it?
string myHexString = DRDigit.Fast.ToHexString(myByteArray, true);
string myHexString = DRDigit.Fast.ToHexString(myByteArray);
byte[] myByteArray = DRDigit.Fast.FromHexString(myHexString);