BadFish/src/learn/half_float.h

#ifndef __HALF_FLOAT_H__
#define __HALF_FLOAT_H__

// Half Float Library by yaneurao
// (16-bit float)

// Floating point operation by 16bit type
// Assume that the float type code generated by the compiler is in IEEE 754 format and use it.

#include "../types.h"

namespace HalfFloat
{
	// IEEE 754 float 32 format is :
	//   sign(1bit) + exponent(8bits) + fraction(23bits) = 32bits
	//
	// Our float16 format is :
	//   sign(1bit) + exponent(5bits) + fraction(10bits) = 16bits
	union float32_converter
	{
		int32_t n;
		float f;
	};


	// 16-bit float
	struct float16
	{
		// --- constructors

		float16() {}
		float16(int16_t n) { from_float((float)n);  }
		float16(int32_t n) { from_float((float)n); }
		float16(float n) { from_float(n); }
		float16(double n) { from_float((float)n); }

		// build from a float
		void from_float(float f) { *this = to_float16(f); }

		// --- implicit converters

		operator int32_t() const { return (int32_t)to_float(*this); }
		operator float() const { return to_float(*this); }
		operator double() const { return double(to_float(*this)); }

		// --- operators

		float16 operator += (float16 rhs) { from_float(to_float(*this) + to_float(rhs)); return *this; }
		float16 operator -= (float16 rhs) { from_float(to_float(*this) - to_float(rhs)); return *this; }
		float16 operator *= (float16 rhs) { from_float(to_float(*this) * to_float(rhs)); return *this; }
		float16 operator /= (float16 rhs) { from_float(to_float(*this) / to_float(rhs)); return *this; }
		float16 operator + (float16 rhs) const { return float16(*this) += rhs; }
		float16 operator - (float16 rhs) const { return float16(*this) -= rhs; }
		float16 operator * (float16 rhs) const { return float16(*this) *= rhs; }
		float16 operator / (float16 rhs) const { return float16(*this) /= rhs; }
		float16 operator - () const { return float16(-to_float(*this)); }
		bool operator == (float16 rhs) const { return this->v_ == rhs.v_; }
		bool operator != (float16 rhs) const { return !(*this == rhs); }

		static void UnitTest() { unit_test(); }

	private:

		// --- entity

		uint16_t v_;

		// --- conversion between float and float16

		static float16 to_float16(float f)
		{
			float32_converter c;
			c.f = f;
			u32 n = c.n;

			// The sign bit is MSB in common.
			uint16_t sign_bit = (n >> 16) & 0x8000;

			// The exponent of IEEE 754's float 32 is biased +127 , so we change this bias into +15 and limited to 5-bit.
			uint16_t exponent = (((n >> 23) - 127 + 15) & 0x1f) << 10;

			// The fraction is limited to 10-bit.
			uint16_t fraction = (n >> (23-10)) & 0x3ff;

			float16 f_;
			f_.v_ = sign_bit | exponent | fraction;

			return f_;
		}

		static float to_float(float16 v)
		{
			u32 sign_bit = (v.v_ & 0x8000) << 16;
			u32 exponent = ((((v.v_ >> 10) & 0x1f) - 15 + 127) & 0xff) << 23;
			u32 fraction = (v.v_ & 0x3ff) << (23 - 10);

			float32_converter c;
			c.n = sign_bit | exponent | fraction;
			return c.f;
		}

		// It is not a unit test, but I confirmed that it can be calculated. I'll fix the code later (maybe).
		static void unit_test()
		{
			float16 a, b, c, d;
			a = 1;
			std::cout << (float)a << std::endl;
			b = -118.625;
			std::cout << (float)b << std::endl;
			c = 2.5;
			std::cout << (float)c << std::endl;
			d = a + c;
			std::cout << (float)d << std::endl;

			c *= 1.5;
			std::cout << (float)c << std::endl;

			b /= 3;
			std::cout << (float)b << std::endl;

			float f1 = 1.5;
			a += f1;
			std::cout << (float)a << std::endl;

			a += f1 * (float)a;
			std::cout << (float)a << std::endl;
		}

	};

}

#endif // __HALF_FLOAT_H__