Introduction
Often, it is possible to increase performance by using not strings, but their numeric representations. It is useful to calculate hash value at compile-time, because it will influence the speed of work significantly.
IT IS IMPORTANT 1: This method is good for calculating the hash of literal string
s at compile-time, but not at runtime (because of recursive call).
IT IS IMPORTANT 2: murmur2 has a flaw: collisions on short string
s. If you can check collisions offline and fix them, using murmur2
is a good choice, because of its simplicity and speed.
Process
- Initialization:
static constexpr uint32_t __init__(uint32_t len)
{
return 0 ^ len;
}
template<typename __Type> static constexpr uint32_t murmur2(__Type& data, uint32_t len)
{
return __proc__(data, len, 0, __init__(len), 0x5bd1e995, 24);
}
- Basic operations:
template<typename __T> static constexpr uint32_t __load__(__T& data, uint32_t offset)
{
return
(data[offset + 0]) |
(data[offset + 1] << 8) |
(data[offset + 2] << 16) |
(data[offset + 3] << 24);
}
static constexpr uint32_t __mul__(uint32_t val1, uint32_t val2)
{
return val1 * val2;
}
static constexpr uint32_t __sl__(uint32_t value, uint32_t count)
{
return (value << count);
}
static constexpr uint32_t __sr__(uint32_t value, uint32_t count)
{
return (value >> count);
}
static constexpr uint32_t __xor__(uint32_t h, uint32_t k)
{
return h ^ k;
}
static constexpr uint32_t __xor_with_sr__(uint32_t k, uint32_t r)
{
return __xor__(k, __sr__(k, r));
}
- Loop for calculating hash value. At compile-time, we organize the loop as recursive call, increasing the input data buffer offset and decreasing the rest of the buffer length by the same value:
template<typename __Type>
static constexpr uint32_t __proc__(
__Type& data,
uint32_t len,
uint32_t offset,
uint32_t h, uint32_t m, uint32_t r)
{
return
len >= 4 ? __proc__(data, len - 4, offset + 4, __xor__(__mul__(h, m),
__mul__(__xor_with_sr__(__mul__(__load__(data, offset), m), r), m)), m, r) :
len == 3 ? __proc__(data, len - 1, offset, __xor__(h, __sl__(data[offset + 2], 16)), m, r) :
len == 2 ? __proc__(data, len - 1, offset, __xor__(h, __sl__(data[offset + 1], 8)), m, r) :
len == 1 ? __proc__(data, len - 1, offset, __xor__(h, data[offset]) * m, m, r) :
__xor__(__mul__(__xor_with_sr__(h, 13), m), __sr__(__mul__(__xor_with_sr__(h, 13), m), 15));
}
- For calculating hash value of literal
string
s, it is useful to define (-1
discard null
-terminator):
#ifndef LITER_STR_HASH
# define LITER_STR_HASH(x) hash::murmur2(x, (sizeof(x) / sizeof(x[0])) - 1)
#endif//LITER_STR_HASH
Conclusion
Testing the method:
int main()
{
enum
{
val1 = LITER_STR_HASH(STR_TEST1_A),
val2 = LITER_STR_HASH(STR_TEST2),
val3 = LITER_STR_HASH(STR_TEST3),
val4 = LITER_STR_HASH(STR_TEST1_W),
};
const auto v1 = MurmurHash2(STR_TEST1_A, sizeof(STR_TEST1_A) / sizeof(STR_TEST1_A[0]) - 1);
const auto v2 = MurmurHash2(STR_TEST2, sizeof(STR_TEST2) / sizeof(STR_TEST2[0]) - 1);
const auto v3 = MurmurHash2(STR_TEST3, sizeof(STR_TEST3) / sizeof(STR_TEST3[0]) - 1);
printf("0x%Xlu - 0x%Xlu - 0x%Xlu\n", val1, v1, val4);
printf("0x%Xlu - 0x%Xlu\n", val2, v2);
printf("0x%Xlu - 0x%Xlu\n", val3, v3);
return 0;
}
The code was tested in Visual Studio 2015.
Have a nice code!