Functions to read and write binary data portably, IEEE 754 floating point format numbers and two's complement signed binary integers, regardless of the binary representation of numbers on the host machine.
Introduction
Binary files are often considered non-portable, as integer and floating point representations vary between computers, and so simple calls to read functions fail. But fundamental theory tells us that portable solutions are possible. This tip tells you how to achieve that, with two's complement signed integers and IEEE 754 floating point. Whilst written in C, the method is applicable to any language which allows low level binary IO.
Background
The world has more of less standardised on IEEE 754 for floating point arithmetic, but not quite. You might encounter a machine which uses a different system. And whilst it is tempting, if you know that this isn't a risk, to load a binary floating point with fread(), this is actually a bit dangerous, as it could be one of the not-a-number representations, and you might get some rather unexpected results.
IEEE 754 uses the following system for 64 bit floating point numbers:
Sign | | Exponent | | Mantissa |
1 bit | | 11 bits | | 52 bits |
set for negative | | Sign magnitude
0 = special | | 1.xxxxx binary number |
And so to read and write portably, we need to pull these out, and reconstruct the value, and also handle the special cases, zero, denormalised numbers, non-finite numbers, and not-a-number representations. Two's complement integers are eaier to handle portably, but not quite obvious. You can't use the shift operators if you don't know the host's binary representation, and in C you can't easily sign extend a number. Whilst it often works as expected it is in fact usually undefined behaviour when this is attempted.
Using the code
You need to call these routines as a matter of habit every time you load or save an IEEE floating point number as binary.
double freadieee754(FILE *fp, int bigendian)
{
unsigned char buff[8];
int i;
double fnorm = 0.0;
unsigned char temp;
int sign;
int exponent;
double bitval;
int maski, mask;
int expbits = 11;
int significandbits = 52;
int shift;
double answer;
for (i = 0; i < 8; i++)
buff[i] = fgetc(fp);
if (!bigendian)
{
for (i = 0; i <= 4; i++)
{
temp = buff[i];
buff[i] = buff[8 - i - 1];
buff[8 - i - 1] = temp;
}
}
sign = buff[0] & 0x80 ? -1 : 1;
exponent = ((buff[0] & 0x7F) << 4) | ((buff[1] & 0xF0) >> 4);
bitval = 0.5;
maski = 1;
mask = 0x08;
for (i = 0; i < significandbits; i++)
{
if (buff[maski] & mask)
fnorm += bitval;
bitval /= 2.0;
mask >>= 1;
if (mask == 0)
{
mask = 0x80;
maski++;
}
}
if (exponent == 0 && fnorm == 0)
return 0.0;
shift = exponent - ((1 << (expbits - 1)) - 1);
if (shift == 1024 && fnorm != 0)
return sqrt(-1.0);
if (shift == 1024 && fnorm == 0)
{
#ifdef INFINITY
return sign == 1 ? INFINITY : -INFINITY;
#endif
return (sign * 1.0) / 0.0;
}
if (shift > -1023)
{
answer = ldexp(fnorm + 1.0, shift);
return answer * sign;
}
else
{
if (fnorm == 0.0)
return 0.0;
shift = -1022;
while (fnorm < 1.0)
{
fnorm *= 2;
shift--;
}
answer = ldexp(fnorm, shift);
return answer * sign;
}
}
int fwriteieee754(double x, FILE *fp, int bigendian)
{
int shift;
unsigned long sign, exp, hibits, hilong, lowlong;
double fnorm, significand;
int expbits = 11;
int significandbits = 52;
if (x == 0)
{
hilong = 0;
lowlong = 0;
goto writedata;
}
if (x > DBL_MAX)
{
hilong = 1024 + ((1 << (expbits - 1)) - 1);
hilong <<= (31 - expbits);
lowlong = 0;
goto writedata;
}
if (x <= -DBL_MAX)
{
hilong = 1024 + ((1 << (expbits - 1)) - 1);
hilong <<= (31 - expbits);
hilong |= (1 << 31);
lowlong = 0;
goto writedata;
}
if (x != x)
{
hilong = 1024 + ((1 << (expbits - 1)) - 1);
hilong <<= (31 - expbits);
lowlong = 1234;
goto writedata;
}
if (x < 0) { sign = 1; fnorm = -x; }
else { sign = 0; fnorm = x; }
shift = 0;
while (fnorm >= 2.0) { fnorm /= 2.0; shift++; }
while (fnorm < 1.0) { fnorm *= 2.0; shift--; }
if (shift <= -1022)
{
while (shift < -1022) { fnorm /= 2.0; shift++; }
shift = -1023;
}
else if (shift > 1023)
{
hilong = 1024 + ((1 << (expbits - 1)) - 1);
hilong <<= (31 - expbits);
hilong |= (sign << 31);
lowlong = 0;
goto writedata;
}
else
fnorm = fnorm - 1.0;
significand = fnorm * ((1LL << significandbits) + 0.5f);
exp = shift + ((1 << (expbits - 1)) - 1);
hibits = (long)(significand / 4294967296);
hilong = (sign << 31) | (exp << (31 - expbits)) | hibits;
x = significand - hibits * 4294967296;
lowlong = (unsigned long)(significand - hibits * 4294967296);
writedata:
if (bigendian)
{
fputc((hilong >> 24) & 0xFF, fp);
fputc((hilong >> 16) & 0xFF, fp);
fputc((hilong >> 8) & 0xFF, fp);
fputc(hilong & 0xFF, fp);
fputc((lowlong >> 24) & 0xFF, fp);
fputc((lowlong >> 16) & 0xFF, fp);
fputc((lowlong >> 8) & 0xFF, fp);
fputc(lowlong & 0xFF, fp);
}
else
{
fputc(lowlong & 0xFF, fp);
fputc((lowlong >> 8) & 0xFF, fp);
fputc((lowlong >> 16) & 0xFF, fp);
fputc((lowlong >> 24) & 0xFF, fp);
fputc(hilong & 0xFF, fp);
fputc((hilong >> 8) & 0xFF, fp);
fputc((hilong >> 16) & 0xFF, fp);
fputc((hilong >> 24) & 0xFF, fp);
}
return ferror(fp);
}
And use these to read two's complement integers.
int fget16be(FILE *fp)
{
int c1, c2;
c2 = fgetc(fp);
c1 = fgetc(fp);
return ((c2 ^ 128) - 128) * 256 + c1;
}
long fget32be(FILE *fp)
{
int c1, c2, c3, c4;
c4 = fgetc(fp);
c3 = fgetc(fp);
c2 = fgetc(fp);
c1 = fgetc(fp);
return ((c4 ^ 128) - 128) * 256 * 256 * 256 + c3 * 256 * 256 + c2 * 256 + c1;
}
Points of Interest
It's just a little bit trickier than it looks, but not too bad. The problem is getting non-IEEE and non CHAR_BIT 8 hardware to test these on.