mirror of
https://github.com/sockspls/badfish
synced 2025-04-30 08:43:09 +00:00
133 lines
3.5 KiB
C++
133 lines
3.5 KiB
C++
#ifndef __HALF_FLOAT_H__
|
|
#define __HALF_FLOAT_H__
|
|
|
|
// Half Float Library by yaneurao
|
|
// (16-bit float)
|
|
|
|
// Floating point operation by 16bit type
|
|
// Assume that the float type code generated by the compiler is in IEEE 754 format and use it.
|
|
|
|
#include "../types.h"
|
|
|
|
namespace HalfFloat
|
|
{
|
|
// IEEE 754 float 32 format is :
|
|
// sign(1bit) + exponent(8bits) + fraction(23bits) = 32bits
|
|
//
|
|
// Our float16 format is :
|
|
// sign(1bit) + exponent(5bits) + fraction(10bits) = 16bits
|
|
union float32_converter
|
|
{
|
|
int32_t n;
|
|
float f;
|
|
};
|
|
|
|
|
|
// 16-bit float
|
|
struct float16
|
|
{
|
|
// --- constructors
|
|
|
|
float16() {}
|
|
float16(int16_t n) { from_float((float)n); }
|
|
float16(int32_t n) { from_float((float)n); }
|
|
float16(float n) { from_float(n); }
|
|
float16(double n) { from_float((float)n); }
|
|
|
|
// build from a float
|
|
void from_float(float f) { *this = to_float16(f); }
|
|
|
|
// --- implicit converters
|
|
|
|
operator int32_t() const { return (int32_t)to_float(*this); }
|
|
operator float() const { return to_float(*this); }
|
|
operator double() const { return double(to_float(*this)); }
|
|
|
|
// --- operators
|
|
|
|
float16 operator += (float16 rhs) { from_float(to_float(*this) + to_float(rhs)); return *this; }
|
|
float16 operator -= (float16 rhs) { from_float(to_float(*this) - to_float(rhs)); return *this; }
|
|
float16 operator *= (float16 rhs) { from_float(to_float(*this) * to_float(rhs)); return *this; }
|
|
float16 operator /= (float16 rhs) { from_float(to_float(*this) / to_float(rhs)); return *this; }
|
|
float16 operator + (float16 rhs) const { return float16(*this) += rhs; }
|
|
float16 operator - (float16 rhs) const { return float16(*this) -= rhs; }
|
|
float16 operator * (float16 rhs) const { return float16(*this) *= rhs; }
|
|
float16 operator / (float16 rhs) const { return float16(*this) /= rhs; }
|
|
float16 operator - () const { return float16(-to_float(*this)); }
|
|
bool operator == (float16 rhs) const { return this->v_ == rhs.v_; }
|
|
bool operator != (float16 rhs) const { return !(*this == rhs); }
|
|
|
|
static void UnitTest() { unit_test(); }
|
|
|
|
private:
|
|
|
|
// --- entity
|
|
|
|
uint16_t v_;
|
|
|
|
// --- conversion between float and float16
|
|
|
|
static float16 to_float16(float f)
|
|
{
|
|
float32_converter c;
|
|
c.f = f;
|
|
u32 n = c.n;
|
|
|
|
// The sign bit is MSB in common.
|
|
uint16_t sign_bit = (n >> 16) & 0x8000;
|
|
|
|
// The exponent of IEEE 754's float 32 is biased +127 , so we change this bias into +15 and limited to 5-bit.
|
|
uint16_t exponent = (((n >> 23) - 127 + 15) & 0x1f) << 10;
|
|
|
|
// The fraction is limited to 10-bit.
|
|
uint16_t fraction = (n >> (23-10)) & 0x3ff;
|
|
|
|
float16 f_;
|
|
f_.v_ = sign_bit | exponent | fraction;
|
|
|
|
return f_;
|
|
}
|
|
|
|
static float to_float(float16 v)
|
|
{
|
|
u32 sign_bit = (v.v_ & 0x8000) << 16;
|
|
u32 exponent = ((((v.v_ >> 10) & 0x1f) - 15 + 127) & 0xff) << 23;
|
|
u32 fraction = (v.v_ & 0x3ff) << (23 - 10);
|
|
|
|
float32_converter c;
|
|
c.n = sign_bit | exponent | fraction;
|
|
return c.f;
|
|
}
|
|
|
|
// It is not a unit test, but I confirmed that it can be calculated. I'll fix the code later (maybe).
|
|
static void unit_test()
|
|
{
|
|
float16 a, b, c, d;
|
|
a = 1;
|
|
std::cout << (float)a << std::endl;
|
|
b = -118.625;
|
|
std::cout << (float)b << std::endl;
|
|
c = 2.5;
|
|
std::cout << (float)c << std::endl;
|
|
d = a + c;
|
|
std::cout << (float)d << std::endl;
|
|
|
|
c *= 1.5;
|
|
std::cout << (float)c << std::endl;
|
|
|
|
b /= 3;
|
|
std::cout << (float)b << std::endl;
|
|
|
|
float f1 = 1.5;
|
|
a += f1;
|
|
std::cout << (float)a << std::endl;
|
|
|
|
a += f1 * (float)a;
|
|
std::cout << (float)a << std::endl;
|
|
}
|
|
|
|
};
|
|
|
|
}
|
|
|
|
#endif // __HALF_FLOAT_H__
|