1
0
Fork 0
mirror of https://github.com/sockspls/badfish synced 2025-04-30 16:53:09 +00:00

Start documenting decompress_pairs

Documenta and rename this difficult part
of the code. Still lot to do.
This commit is contained in:
Marco Costalba 2016-05-14 00:03:08 +02:00
parent 61a27cdadd
commit c0476e12db

View file

@ -52,20 +52,31 @@ inline WDLScore operator+(WDLScore d1, WDLScore d2) { return WDLScore(int(d1) +
inline Square operator^=(Square& s, int i) { return s = Square(int(s) ^ i); } inline Square operator^=(Square& s, int i) { return s = Square(int(s) ^ i); }
inline Square operator^(Square s, int i) { return Square(int(s) ^ i); } inline Square operator^(Square s, int i) { return Square(int(s) ^ i); }
// Each table has a set of flags: all of them refer to DTZ tables, the last one to WDL tables
enum TBFlag { STM = 1, Mapped = 2, WinPlies = 4, LossPlies = 8, SingleValue = 128 };
// Little endian numbers of index in blockLenghts[] and offet within the block
struct SparseEntry {
char block[4];
char offset[2];
};
static_assert(sizeof(SparseEntry) == 6, "SparseEntry must be 6 bytes");
struct PairsData { struct PairsData {
int flags; int flags;
int blocksize; size_t sizeofBlock; // Block size in bytes
int idxbits; int idxbits; // Every 1 << idxbits index values there is a blockEntryIndex[] entry
int num_indices;
int real_num_blocks; int real_num_blocks;
int num_blocks; int num_blocks;
int max_len; int max_len;
int min_len; int min_len;
uint16_t* offset; uint16_t* offset;
uint8_t* sympat; uint8_t* sympat;
uint8_t* indextable; uint16_t* blockLenghts; // Number of stored positions (minus one) for each block
uint16_t* sizetable; SparseEntry* sparseIndex; // Partial indices into blockLenghts[]
uint8_t* data; int num_indices; // Size of SparseIndex[] table
uint8_t* data; // Start of Huffman compressed data
std::vector<uint64_t> base; std::vector<uint64_t> base;
std::vector<uint8_t> symlen; std::vector<uint8_t> symlen;
Piece pieces[TBPIECES]; Piece pieces[TBPIECES];
@ -107,9 +118,6 @@ struct WDLEntry : public Atomic {
}; };
struct DTZEntry { struct DTZEntry {
enum Flag { STM = 1, Mapped = 2, WinPlies = 4, LossPlies = 8 };
DTZEntry(const WDLEntry& wdl); DTZEntry(const WDLEntry& wdl);
~DTZEntry(); ~DTZEntry();
@ -447,27 +455,68 @@ void HashTable::insert(const std::vector<PieceType>& pieces)
insert(keys[BLACK], &WDLTable.back()); insert(keys[BLACK], &WDLTable.back());
} }
// TB are compressed with canonical Huffman code. The the compressed data is divided into
// blocks of size d->blocksize, and each block stores a variable number of symbols.
// Each symbol represents either a WDL or (remapped) DTZ value, or a pair of other symbols
// (recursively). If you keep expanding the symbols in a block, you end up with up to 65536
// WDL or DTZ values. Each symbol represents up to 256 values and will correspond after
// Huffman coding to at least 1 bit. So a block of 32 bytes corresponds to at most
// 32 x 8 x 256 = 65536 values. This maximum is only reached for tables that consist mostly
// of draws or mostly of wins, but such tables are actually quite common. In principle, the
// blocks in WDL tables are 64 bytes long (and will be aligned on cache lines). But for
// mostly-draw or mostly-win tables this can leave many 64-byte blocks only half-filled, so
// in such cases blocks are 32 bytes long. The blocks of DTZ tables are up to 1024 bytes long.
// The generator picks the size that leads to the smallest table. The "book" of symbols and
// Huffman codes is the same for all blocks in the table (a non-symmetric pawnless TB file
// will have one table for wtm and one for btm, a TB file with pawns will have tables per
// file a,b,c,d).
int decompress_pairs(PairsData* d, uint64_t idx) int decompress_pairs(PairsData* d, uint64_t idx)
{ {
if (!d->idxbits) // Special case where all table positions store the same value
if (d->flags & TBFlag::SingleValue)
return d->min_len; return d->min_len;
// idx = blockidx | litidx where litidx is a signed number of lenght d->idxbits // First we need to locate the right block that stores the value at index "idx".
uint32_t blockidx = (uint32_t)(idx >> d->idxbits); // Because each block n stores blockLenghts[n] + 1 values, the index i of the block
int litidx = (idx & ((1ULL << d->idxbits) - 1)) - (1ULL << (d->idxbits - 1)); // that contains the value at position idx is:
//
// for (i = 0; idx < sum; i++)
// sum += blockLenghts[i] + 1;
//
// This can be slow, so we use SparseIndex[] populated with a set of SparseEntry that
// point to known indices into blockLenghts[]. Namely SparseIndex[k] is a SparseEntry
// that stores the blockLenghts[] index and the offset within that block of the value
// with index N(k), where:
//
// N(k) = k * (1 << d->idxbits) + (1 << (d->idxbits - 1)) (1)
// indextable points to an array of blocks of 6 bytes representing numbers in // First step is to get the 'k' of the N(k) nearest to our idx, using defintion (1)
// little endian. The low 4 bytes are the block, the high 2 bytes the idxOffset. uint32_t k = (uint32_t)(idx >> d->idxbits);
uint32_t block = number<uint32_t, LittleEndian>(d->indextable + 6 * blockidx);
litidx += number<uint16_t, LittleEndian>(d->indextable + 6 * blockidx + 4);
while (litidx < 0) // Then we read the corresponding SparseIndex[] entry
litidx += d->sizetable[--block] + 1; uint32_t block = number<uint32_t, LittleEndian>(&d->sparseIndex[k].block);
int idxOffset = number<uint16_t, LittleEndian>(&d->sparseIndex[k].offset);
while (litidx > d->sizetable[block]) // Now compute the difference idx - N(k). From defintion of k we know that
litidx -= d->sizetable[block++] + 1; //
// idx = k * (1 << d->idxbits) + (idx & ((1ULL << d->idxbits) - 1)) (2)
//
// So, from (2) and (1) we can compute idx - N(K):
int diff = (idx & ((1ULL << d->idxbits) - 1)) - (1 << (d->idxbits - 1));
uint32_t* ptr = (uint32_t*)(d->data + (block << d->blocksize)); // Sum to idxOffset to find the offset corresponding to our idx
idxOffset += diff;
// Move to previous or next block, until we reach the correct block that contains idx,
// that is when 0 <= idxOffset <= d->sizetable[block]
while (idxOffset < 0)
idxOffset += d->blockLenghts[--block] + 1;
while (idxOffset > d->blockLenghts[block])
idxOffset -= d->blockLenghts[block++] + 1;
// Finally, we find the start address of our block
uint32_t* ptr = (uint32_t*)(d->data + block * d->sizeofBlock);
uint64_t code = number<uint64_t, BigEndian>(ptr); uint64_t code = number<uint64_t, BigEndian>(ptr);
int m = d->min_len; int m = d->min_len;
@ -486,10 +535,10 @@ int decompress_pairs(PairsData* d, uint64_t idx)
sym = number<uint16_t, LittleEndian>(offset + l); sym = number<uint16_t, LittleEndian>(offset + l);
sym += (int)((code - d->base[l - d->min_len]) >> (64 - l)); sym += (int)((code - d->base[l - d->min_len]) >> (64 - l));
if (litidx < (int)d->symlen[sym] + 1) if (idxOffset < (int)d->symlen[sym] + 1)
break; break;
litidx -= (int)d->symlen[sym] + 1; idxOffset -= (int)d->symlen[sym] + 1;
code <<= l; code <<= l;
bitcnt += l; bitcnt += l;
@ -505,10 +554,10 @@ int decompress_pairs(PairsData* d, uint64_t idx)
uint8_t* w = sympat + (3 * sym); uint8_t* w = sympat + (3 * sym);
int s1 = ((w[1] & 0xf) << 8) | w[0]; int s1 = ((w[1] & 0xf) << 8) | w[0];
if (litidx < (int)d->symlen[s1] + 1) if (idxOffset < (int)d->symlen[s1] + 1)
sym = s1; sym = s1;
else { else {
litidx -= (int)d->symlen[s1] + 1; idxOffset -= (int)d->symlen[s1] + 1;
sym = (w[2] << 4) | (w[1] >> 4); sym = (w[2] << 4) | (w[1] >> 4);
} }
} }
@ -525,7 +574,7 @@ bool check_dtz_stm(DTZEntry* entry, File f, int stm) {
int flags = entry->hasPawns ? entry->pawn.file[f].precomp->flags int flags = entry->hasPawns ? entry->pawn.file[f].precomp->flags
: entry->piece.precomp->flags; : entry->piece.precomp->flags;
return (flags & DTZEntry::Flag::STM) == stm return (flags & TBFlag::STM) == stm
|| ((entry->key == entry->key2) && !entry->hasPawns); || ((entry->key == entry->key2) && !entry->hasPawns);
} }
@ -550,14 +599,14 @@ int map_score(DTZEntry* entry, File f, int value, WDLScore wdl) {
uint16_t* idx = entry->hasPawns ? entry->pawn.file[f].map_idx uint16_t* idx = entry->hasPawns ? entry->pawn.file[f].map_idx
: entry->piece.map_idx; : entry->piece.map_idx;
if (flags & DTZEntry::Flag::Mapped) if (flags & TBFlag::Mapped)
value = map[idx[WDLMap[wdl + 2]] + value]; value = map[idx[WDLMap[wdl + 2]] + value];
// DTZ tables store distance to zero in number of moves but // DTZ tables store distance to zero in number of moves but
// under some conditions we want to return plies, so we have // under some conditions we want to return plies, so we have
// to multiply score by 2. // to multiply score by 2.
if ( (wdl == WDLWin && !(flags & DTZEntry::Flag::WinPlies)) if ( (wdl == WDLWin && !(flags & TBFlag::WinPlies))
|| (wdl == WDLLoss && !(flags & DTZEntry::Flag::LossPlies)) || (wdl == WDLLoss && !(flags & TBFlag::LossPlies))
|| wdl == WDLCursedWin || wdl == WDLCursedWin
|| wdl == WDLCursedLoss) || wdl == WDLCursedLoss)
value *= 2; value *= 2;
@ -869,14 +918,14 @@ uint8_t* set_sizes(PairsData* d, uint8_t* data, uint64_t tb_size)
{ {
d->flags = *data++; d->flags = *data++;
if (d->flags & 0x80) { if (d->flags & TBFlag::SingleValue) {
d->idxbits = d->real_num_blocks = d->idxbits = d->real_num_blocks =
d->num_blocks = d->num_indices = 0; // Broken MSVC zero-init d->num_blocks = d->num_indices = 0; // Broken MSVC zero-init
d->min_len = *data++; d->min_len = *data++;
return data; return data;
} }
d->blocksize = *data++; d->sizeofBlock = 1 << *data++;
d->idxbits = *data++; d->idxbits = *data++;
d->num_indices = (tb_size + (1ULL << d->idxbits) - 1) >> d->idxbits; // Divide and round upward, like ceil() d->num_indices = (tb_size + (1ULL << d->idxbits) - 1) >> d->idxbits; // Divide and round upward, like ceil()
d->num_blocks = number<uint8_t, LittleEndian>(data++); d->num_blocks = number<uint8_t, LittleEndian>(data++);
@ -918,7 +967,7 @@ uint8_t* set_dtz_map(DTZEntry&, T& p, uint8_t* data, File maxFile) {
p.map = data; p.map = data;
for (File f = FILE_A; f <= maxFile; ++f) { for (File f = FILE_A; f <= maxFile; ++f) {
if (item(p, 0, f).precomp->flags & DTZEntry::Flag::Mapped) if (item(p, 0, f).precomp->flags & TBFlag::Mapped)
for (int i = 0; i < 4; ++i) { // Sequence like 3,x,x,x,1,x,0,2,x,x for (int i = 0; i < 4; ++i) { // Sequence like 3,x,x,x,1,x,0,2,x,x
item(p, 0, f).map_idx[i] = (uint16_t)(data - p.map + 1); item(p, 0, f).map_idx[i] = (uint16_t)(data - p.map + 1);
data += *data + 1; data += *data + 1;
@ -984,21 +1033,21 @@ void do_init(Entry& e, T& p, uint8_t* data)
for (File f = FILE_A; f <= maxFile; ++f) for (File f = FILE_A; f <= maxFile; ++f)
for (int k = 0; k <= split; k++) { for (int k = 0; k <= split; k++) {
(d = item(p, k, f).precomp)->indextable = data; (d = item(p, k, f).precomp)->sparseIndex = (SparseEntry*)data;
data += 6ULL * d->num_indices; data += d->num_indices * sizeof(SparseEntry) ;
} }
for (File f = FILE_A; f <= maxFile; ++f) for (File f = FILE_A; f <= maxFile; ++f)
for (int k = 0; k <= split; k++) { for (int k = 0; k <= split; k++) {
(d = item(p, k, f).precomp)->sizetable = (uint16_t*)data; (d = item(p, k, f).precomp)->blockLenghts = (uint16_t*)data;
data += 2ULL * d->num_blocks; data += d->num_blocks * sizeof(uint16_t);
} }
for (File f = FILE_A; f <= maxFile; ++f) for (File f = FILE_A; f <= maxFile; ++f)
for (int k = 0; k <= split; k++) { for (int k = 0; k <= split; k++) {
data = (uint8_t*)(((uintptr_t)data + 0x3F) & ~0x3F); // 64 byte alignment data = (uint8_t*)(((uintptr_t)data + 0x3F) & ~0x3F); // 64 byte alignment
(d = item(p, k, f).precomp)->data = data; (d = item(p, k, f).precomp)->data = data;
data += (1ULL << d->blocksize) * d->real_num_blocks; data += d->real_num_blocks * d->sizeofBlock;
} }
} }