1
0
Fork 0
mirror of https://github.com/sockspls/badfish synced 2025-04-30 08:43:09 +00:00
BadFish/src/learn/half_float.h
FireFather f5cc77bc7c EOL
add eol at eof
2020-06-29 08:27:41 +09:00

133 lines
3.5 KiB
C++

#ifndef __HALF_FLOAT_H__
#define __HALF_FLOAT_H__
// Half Float Library by yaneurao
// (16-bit float)
// Floating point operation by 16bit type
// Assume that the float type code generated by the compiler is in IEEE 754 format and use it.
#include "../types.h"
namespace HalfFloat
{
// IEEE 754 float 32 format is :
// sign(1bit) + exponent(8bits) + fraction(23bits) = 32bits
//
// Our float16 format is :
// sign(1bit) + exponent(5bits) + fraction(10bits) = 16bits
union float32_converter
{
int32_t n;
float f;
};
// 16-bit float
struct float16
{
// --- constructors
float16() {}
float16(int16_t n) { from_float((float)n); }
float16(int32_t n) { from_float((float)n); }
float16(float n) { from_float(n); }
float16(double n) { from_float((float)n); }
// build from a float
void from_float(float f) { *this = to_float16(f); }
// --- implicit converters
operator int32_t() const { return (int32_t)to_float(*this); }
operator float() const { return to_float(*this); }
operator double() const { return double(to_float(*this)); }
// --- operators
float16 operator += (float16 rhs) { from_float(to_float(*this) + to_float(rhs)); return *this; }
float16 operator -= (float16 rhs) { from_float(to_float(*this) - to_float(rhs)); return *this; }
float16 operator *= (float16 rhs) { from_float(to_float(*this) * to_float(rhs)); return *this; }
float16 operator /= (float16 rhs) { from_float(to_float(*this) / to_float(rhs)); return *this; }
float16 operator + (float16 rhs) const { return float16(*this) += rhs; }
float16 operator - (float16 rhs) const { return float16(*this) -= rhs; }
float16 operator * (float16 rhs) const { return float16(*this) *= rhs; }
float16 operator / (float16 rhs) const { return float16(*this) /= rhs; }
float16 operator - () const { return float16(-to_float(*this)); }
bool operator == (float16 rhs) const { return this->v_ == rhs.v_; }
bool operator != (float16 rhs) const { return !(*this == rhs); }
static void UnitTest() { unit_test(); }
private:
// --- entity
uint16_t v_;
// --- conversion between float and float16
static float16 to_float16(float f)
{
float32_converter c;
c.f = f;
u32 n = c.n;
// The sign bit is MSB in common.
uint16_t sign_bit = (n >> 16) & 0x8000;
// The exponent of IEEE 754's float 32 is biased +127 , so we change this bias into +15 and limited to 5-bit.
uint16_t exponent = (((n >> 23) - 127 + 15) & 0x1f) << 10;
// The fraction is limited to 10-bit.
uint16_t fraction = (n >> (23-10)) & 0x3ff;
float16 f_;
f_.v_ = sign_bit | exponent | fraction;
return f_;
}
static float to_float(float16 v)
{
u32 sign_bit = (v.v_ & 0x8000) << 16;
u32 exponent = ((((v.v_ >> 10) & 0x1f) - 15 + 127) & 0xff) << 23;
u32 fraction = (v.v_ & 0x3ff) << (23 - 10);
float32_converter c;
c.n = sign_bit | exponent | fraction;
return c.f;
}
// It is not a unit test, but I confirmed that it can be calculated. I'll fix the code later (maybe).
static void unit_test()
{
float16 a, b, c, d;
a = 1;
std::cout << (float)a << std::endl;
b = -118.625;
std::cout << (float)b << std::endl;
c = 2.5;
std::cout << (float)c << std::endl;
d = a + c;
std::cout << (float)d << std::endl;
c *= 1.5;
std::cout << (float)c << std::endl;
b /= 3;
std::cout << (float)b << std::endl;
float f1 = 1.5;
a += f1;
std::cout << (float)a << std::endl;
a += f1 * (float)a;
std::cout << (float)a << std::endl;
}
};
}
#endif // __HALF_FLOAT_H__