Since copying the
magic-voodo-bit-twiddling code would be cheating, here's the obvious and slightly more understandable C# solution:
public static int CountSetBits(int value)
{
int result = 0;
for (int i = 0; i < 8; i++)
{
byte nibble = (byte)(value & 0xF);
value >>= 4;
switch (nibble)
{
case 1:
case 2:
case 4:
case 8:
{
result += 1;
break;
}
case 3:
case 5:
case 6:
case 9:
case 10:
case 12:
{
result += 2;
break;
}
case 7:
case 11:
case 13:
case 14:
{
result += 3;
break;
}
case 15:
{
result += 4;
break;
}
}
}
return result;
}
Benchmarking via
BenchmarkDotNet[
^]:
public static int CountSetBitsSlow(int value)
{
int result = 0;
for (int i = 0, bit = 1; i < 32; i++, bit <<= 1)
{
if ((value & bit) != 0)
{
result++;
}
}
return result;
}
...
public class CountBitsBenchmark
{
private readonly int _value;
public CountBitsBenchmark()
{
_value = new Random().Next();
}
[Benchmark]
public int CountSetBits()
{
return Int32BitCounter.CountSetBits(_value);
}
[Benchmark]
public int CountSetBitsSlow()
{
return Int32BitCounter.CountSetBitsSlow(_value);
}
}
Results:
Method | Mean | StdDev |
----------------- |----------- |---------- |
CountSetBits | 11.2227 ns | 0.0482 ns |
CountSetBitsSlow | 26.1762 ns | 0.2472 ns |
EDIT: A small improvement from unrolling the loop, although it does make the code significantly longer:
[StructLayout(LayoutKind.Explicit)]
private struct Int32Bytes
{
[FieldOffset(0)]
public int Int32;
[FieldOffset(0)]
public readonly byte Byte0;
[FieldOffset(1)]
public readonly byte Byte1;
[FieldOffset(2)]
public readonly byte Byte2;
[FieldOffset(3)]
public readonly byte Byte3;
}
public static int CountSetBits2(int value)
{
int result = 0;
var bytes = new Int32Bytes { Int32 = value };
switch (bytes.Byte0 & 0xF)
{
case 1:
case 2:
case 4:
case 8:
{
result += 1;
break;
}
case 3:
case 5:
case 6:
case 9:
case 10:
case 12:
{
result += 2;
break;
}
case 7:
case 11:
case 13:
case 14:
{
result += 3;
break;
}
case 15:
{
result += 4;
break;
}
}
switch (bytes.Byte0 >> 4)
{
case 1:
case 2:
case 4:
case 8:
{
result += 1;
break;
}
case 3:
case 5:
case 6:
case 9:
case 10:
case 12:
{
result += 2;
break;
}
case 7:
case 11:
case 13:
case 14:
{
result += 3;
break;
}
case 15:
{
result += 4;
break;
}
}
switch (bytes.Byte1 & 0xF)
{
case 1:
case 2:
case 4:
case 8:
{
result += 1;
break;
}
case 3:
case 5:
case 6:
case 9:
case 10:
case 12:
{
result += 2;
break;
}
case 7:
case 11:
case 13:
case 14:
{
result += 3;
break;
}
case 15:
{
result += 4;
break;
}
}
switch (bytes.Byte1 >> 4)
{
case 1:
case 2:
case 4:
case 8:
{
result += 1;
break;
}
case 3:
case 5:
case 6:
case 9:
case 10:
case 12:
{
result += 2;
break;
}
case 7:
case 11:
case 13:
case 14:
{
result += 3;
break;
}
case 15:
{
result += 4;
break;
}
}
switch (bytes.Byte2 & 0xF)
{
case 1:
case 2:
case 4:
case 8:
{
result += 1;
break;
}
case 3:
case 5:
case 6:
case 9:
case 10:
case 12:
{
result += 2;
break;
}
case 7:
case 11:
case 13:
case 14:
{
result += 3;
break;
}
case 15:
{
result += 4;
break;
}
}
switch (bytes.Byte2 >> 4)
{
case 1:
case 2:
case 4:
case 8:
{
result += 1;
break;
}
case 3:
case 5:
case 6:
case 9:
case 10:
case 12:
{
result += 2;
break;
}
case 7:
case 11:
case 13:
case 14:
{
result += 3;
break;
}
case 15:
{
result += 4;
break;
}
}
switch (bytes.Byte3 & 0xF)
{
case 1:
case 2:
case 4:
case 8:
{
result += 1;
break;
}
case 3:
case 5:
case 6:
case 9:
case 10:
case 12:
{
result += 2;
break;
}
case 7:
case 11:
case 13:
case 14:
{
result += 3;
break;
}
case 15:
{
result += 4;
break;
}
}
switch (bytes.Byte3 >> 4)
{
case 1:
case 2:
case 4:
case 8:
{
result += 1;
break;
}
case 3:
case 5:
case 6:
case 9:
case 10:
case 12:
{
result += 2;
break;
}
case 7:
case 11:
case 13:
case 14:
{
result += 3;
break;
}
case 15:
{
result += 4;
break;
}
}
return result;
}
Results:
Method | Mean | StdDev |
----------------- |----------- |---------- |
CountSetBits | 11.1346 ns | 0.1539 ns |
CountSetBits2 | 8.2397 ns | 0.1248 ns |
CountSetBitsSlow | 19.5581 ns | 0.1278 ns |
Edit 2:
With a
switch
statement for each byte, rather than each nibble, the speed increases again. But so does the length of the code.
public static int CountSetBits3(int value)
{
int result = 0;
var bytes = new Int32Bytes { Int32 = value };
switch (bytes.Byte0)
{
case 1:
case 2:
case 4:
case 8:
case 16:
case 32:
case 64:
case 128:
{
result += 1;
break;
}
case 3:
case 5:
case 6:
case 9:
case 10:
case 12:
case 17:
case 18:
case 20:
case 24:
case 33:
case 34:
case 36:
case 40:
case 48:
case 65:
case 66:
case 68:
case 72:
case 80:
case 96:
case 129:
case 130:
case 132:
case 136:
case 144:
case 160:
case 192:
{
result += 2;
break;
}
case 7:
case 11:
case 13:
case 14:
case 19:
case 21:
case 22:
case 25:
case 26:
case 28:
case 35:
case 37:
case 38:
case 41:
case 42:
case 44:
case 49:
case 50:
case 52:
case 56:
case 67:
case 69:
case 70:
case 73:
case 74:
case 76:
case 81:
case 82:
case 84:
case 88:
case 97:
case 98:
case 100:
case 104:
case 112:
case 131:
case 133:
case 134:
case 137:
case 138:
case 140:
case 145:
case 146:
case 148:
case 152:
case 161:
case 162:
case 164:
case 168:
case 176:
case 193:
case 194:
case 196:
case 200:
case 208:
case 224:
{
result += 3;
break;
}
case 15:
case 23:
case 27:
case 29:
case 30:
case 39:
case 43:
case 45:
case 46:
case 51:
case 53:
case 54:
case 57:
case 58:
case 60:
case 71:
case 75:
case 77:
case 78:
case 83:
case 85:
case 86:
case 89:
case 90:
case 92:
case 99:
case 101:
case 102:
case 105:
case 106:
case 108:
case 113:
case 114:
case 116:
case 120:
case 135:
case 139:
case 141:
case 142:
case 147:
case 149:
case 150:
case 153:
case 154:
case 156:
case 163:
case 165:
case 166:
case 169:
case 170:
case 172:
case 177:
case 178:
case 180:
case 184:
case 195:
case 197:
case 198:
case 201:
case 202:
case 204:
case 209:
case 210:
case 212:
case 216:
case 225:
case 226:
case 228:
case 232:
case 240:
{
result += 4;
break;
}
case 31:
case 47:
case 55:
case 59:
case 61:
case 62:
case 79:
case 87:
case 91:
case 93:
case 94:
case 103:
case 107:
case 109:
case 110:
case 115:
case 117:
case 118:
case 121:
case 122:
case 124:
case 143:
case 151:
case 155:
case 157:
case 158:
case 167:
case 171:
case 173:
case 174:
case 179:
case 181:
case 182:
case 185:
case 186:
case 188:
case 199:
case 203:
case 205:
case 206:
case 211:
case 213:
case 214:
case 217:
case 218:
case 220:
case 227:
case 229:
case 230:
case 233:
case 234:
case 236:
case 241:
case 242:
case 244:
case 248:
{
result += 5;
break;
}
case 63:
case 95:
case 111:
case 119:
case 123:
case 125:
case 126:
case 159:
case 175:
case 183:
case 187:
case 189:
case 190:
case 207:
case 215:
case 219:
case 221:
case 222:
case 231:
case 235:
case 237:
case 238:
case 243:
case 245:
case 246:
case 249:
case 250:
case 252:
{
result += 6;
break;
}
case 127:
case 191:
case 223:
case 239:
case 247:
case 251:
case 253:
case 254:
{
result += 7;
break;
}
case 255:
{
result += 8;
break;
}
}
return result;
}
Method | Mean | StdDev |
----------------- |----------- |---------- |
CountSetBits | 9.1868 ns | 0.0255 ns |
CountSetBits2 | 8.1354 ns | 0.0107 ns |
CountSetBits3 | 4.3636 ns | 0.0087 ns |
CountSetBitsSlow | 23.5334 ns | 0.1123 ns |
Edit 3:
Final attempt, using an array to cache the results for a byte:
private static readonly byte[] BitCache =
{
0, 1, 1, 2, 1, 2, 2, 3,
1, 2, 2, 3, 2, 3, 3, 4,
1, 2, 2, 3, 2, 3, 3, 4,
2, 3, 3, 4, 3, 4, 4, 5,
1, 2, 2, 3, 2, 3, 3, 4,
2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5,
3, 4, 4, 5, 4, 5, 5, 6,
1, 2, 2, 3, 2, 3, 3, 4,
2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5,
3, 4, 4, 5, 4, 5, 5, 6,
2, 3, 3, 4, 3, 4, 4, 5,
3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6,
4, 5, 5, 6, 5, 6, 6, 7,
1, 2, 2, 3, 2, 3, 3, 4,
2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5,
3, 4, 4, 5, 4, 5, 5, 6,
2, 3, 3, 4, 3, 4, 4, 5,
3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6,
4, 5, 5, 6, 5, 6, 6, 7,
2, 3, 3, 4, 3, 4, 4, 5,
3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6,
4, 5, 5, 6, 5, 6, 6, 7,
3, 4, 4, 5, 4, 5, 5, 6,
4, 5, 5, 6, 5, 6, 6, 7,
4, 5, 5, 6, 5, 6, 6, 7,
5, 6, 6, 7, 6, 7, 7, 8,
};
public static int CountSetBits4(int value)
{
var bytes = new Int32Bytes { Int32 = value };
return BitCache[bytes.Byte0] + BitCache[bytes.Byte1] + BitCache[bytes.Byte2] + BitCache[bytes.Byte3];
}
This seems to be slightly faster than method 3
(Edit 2), with much smaller code:
Method | Mean | StdDev |
----------------- |----------- |---------- |
CountSetBits | 10.6005 ns | 0.0578 ns |
CountSetBits2 | 8.6753 ns | 0.1706 ns |
CountSetBits3 | 4.3420 ns | 0.0383 ns |
CountSetBits4 | 3.1724 ns | 0.0032 ns |
CountSetBitsSlow | 21.4448 ns | 0.3445 ns |