From c0476e12db6dcbd751bb1e51e16ea3b2f83a5350 Mon Sep 17 00:00:00 2001 From: Marco Costalba Date: Sat, 14 May 2016 00:03:08 +0200 Subject: [PATCH] Start documenting decompress_pairs Documenta and rename this difficult part of the code. Still lot to do. --- src/syzygy/tbprobe.cpp | 125 ++++++++++++++++++++++++++++------------- 1 file changed, 87 insertions(+), 38 deletions(-) diff --git a/src/syzygy/tbprobe.cpp b/src/syzygy/tbprobe.cpp index 70eab9d1..33e01c1a 100644 --- a/src/syzygy/tbprobe.cpp +++ b/src/syzygy/tbprobe.cpp @@ -52,20 +52,31 @@ inline WDLScore operator+(WDLScore d1, WDLScore d2) { return WDLScore(int(d1) + inline Square operator^=(Square& s, int i) { return s = Square(int(s) ^ i); } inline Square operator^(Square s, int i) { return Square(int(s) ^ i); } +// Each table has a set of flags: all of them refer to DTZ tables, the last one to WDL tables +enum TBFlag { STM = 1, Mapped = 2, WinPlies = 4, LossPlies = 8, SingleValue = 128 }; + +// Little endian numbers of index in blockLenghts[] and offet within the block +struct SparseEntry { + char block[4]; + char offset[2]; +}; + +static_assert(sizeof(SparseEntry) == 6, "SparseEntry must be 6 bytes"); + struct PairsData { int flags; - int blocksize; - int idxbits; - int num_indices; + size_t sizeofBlock; // Block size in bytes + int idxbits; // Every 1 << idxbits index values there is a blockEntryIndex[] entry int real_num_blocks; int num_blocks; int max_len; int min_len; uint16_t* offset; uint8_t* sympat; - uint8_t* indextable; - uint16_t* sizetable; - uint8_t* data; + uint16_t* blockLenghts; // Number of stored positions (minus one) for each block + SparseEntry* sparseIndex; // Partial indices into blockLenghts[] + int num_indices; // Size of SparseIndex[] table + uint8_t* data; // Start of Huffman compressed data std::vector base; std::vector symlen; Piece pieces[TBPIECES]; @@ -107,9 +118,6 @@ struct WDLEntry : public Atomic { }; struct DTZEntry { - - enum Flag { STM = 1, Mapped = 2, WinPlies = 4, LossPlies = 8 }; - DTZEntry(const WDLEntry& wdl); ~DTZEntry(); @@ -447,27 +455,68 @@ void HashTable::insert(const std::vector& pieces) insert(keys[BLACK], &WDLTable.back()); } +// TB are compressed with canonical Huffman code. The the compressed data is divided into +// blocks of size d->blocksize, and each block stores a variable number of symbols. +// Each symbol represents either a WDL or (remapped) DTZ value, or a pair of other symbols +// (recursively). If you keep expanding the symbols in a block, you end up with up to 65536 +// WDL or DTZ values. Each symbol represents up to 256 values and will correspond after +// Huffman coding to at least 1 bit. So a block of 32 bytes corresponds to at most +// 32 x 8 x 256 = 65536 values. This maximum is only reached for tables that consist mostly +// of draws or mostly of wins, but such tables are actually quite common. In principle, the +// blocks in WDL tables are 64 bytes long (and will be aligned on cache lines). But for +// mostly-draw or mostly-win tables this can leave many 64-byte blocks only half-filled, so +// in such cases blocks are 32 bytes long. The blocks of DTZ tables are up to 1024 bytes long. +// The generator picks the size that leads to the smallest table. The "book" of symbols and +// Huffman codes is the same for all blocks in the table (a non-symmetric pawnless TB file +// will have one table for wtm and one for btm, a TB file with pawns will have tables per +// file a,b,c,d). int decompress_pairs(PairsData* d, uint64_t idx) { - if (!d->idxbits) + // Special case where all table positions store the same value + if (d->flags & TBFlag::SingleValue) return d->min_len; - // idx = blockidx | litidx where litidx is a signed number of lenght d->idxbits - uint32_t blockidx = (uint32_t)(idx >> d->idxbits); - int litidx = (idx & ((1ULL << d->idxbits) - 1)) - (1ULL << (d->idxbits - 1)); + // First we need to locate the right block that stores the value at index "idx". + // Because each block n stores blockLenghts[n] + 1 values, the index i of the block + // that contains the value at position idx is: + // + // for (i = 0; idx < sum; i++) + // sum += blockLenghts[i] + 1; + // + // This can be slow, so we use SparseIndex[] populated with a set of SparseEntry that + // point to known indices into blockLenghts[]. Namely SparseIndex[k] is a SparseEntry + // that stores the blockLenghts[] index and the offset within that block of the value + // with index N(k), where: + // + // N(k) = k * (1 << d->idxbits) + (1 << (d->idxbits - 1)) (1) - // indextable points to an array of blocks of 6 bytes representing numbers in - // little endian. The low 4 bytes are the block, the high 2 bytes the idxOffset. - uint32_t block = number(d->indextable + 6 * blockidx); - litidx += number(d->indextable + 6 * blockidx + 4); + // First step is to get the 'k' of the N(k) nearest to our idx, using defintion (1) + uint32_t k = (uint32_t)(idx >> d->idxbits); - while (litidx < 0) - litidx += d->sizetable[--block] + 1; + // Then we read the corresponding SparseIndex[] entry + uint32_t block = number(&d->sparseIndex[k].block); + int idxOffset = number(&d->sparseIndex[k].offset); - while (litidx > d->sizetable[block]) - litidx -= d->sizetable[block++] + 1; + // Now compute the difference idx - N(k). From defintion of k we know that + // + // idx = k * (1 << d->idxbits) + (idx & ((1ULL << d->idxbits) - 1)) (2) + // + // So, from (2) and (1) we can compute idx - N(K): + int diff = (idx & ((1ULL << d->idxbits) - 1)) - (1 << (d->idxbits - 1)); - uint32_t* ptr = (uint32_t*)(d->data + (block << d->blocksize)); + // Sum to idxOffset to find the offset corresponding to our idx + idxOffset += diff; + + // Move to previous or next block, until we reach the correct block that contains idx, + // that is when 0 <= idxOffset <= d->sizetable[block] + while (idxOffset < 0) + idxOffset += d->blockLenghts[--block] + 1; + + while (idxOffset > d->blockLenghts[block]) + idxOffset -= d->blockLenghts[block++] + 1; + + // Finally, we find the start address of our block + uint32_t* ptr = (uint32_t*)(d->data + block * d->sizeofBlock); uint64_t code = number(ptr); int m = d->min_len; @@ -486,10 +535,10 @@ int decompress_pairs(PairsData* d, uint64_t idx) sym = number(offset + l); sym += (int)((code - d->base[l - d->min_len]) >> (64 - l)); - if (litidx < (int)d->symlen[sym] + 1) + if (idxOffset < (int)d->symlen[sym] + 1) break; - litidx -= (int)d->symlen[sym] + 1; + idxOffset -= (int)d->symlen[sym] + 1; code <<= l; bitcnt += l; @@ -505,10 +554,10 @@ int decompress_pairs(PairsData* d, uint64_t idx) uint8_t* w = sympat + (3 * sym); int s1 = ((w[1] & 0xf) << 8) | w[0]; - if (litidx < (int)d->symlen[s1] + 1) + if (idxOffset < (int)d->symlen[s1] + 1) sym = s1; else { - litidx -= (int)d->symlen[s1] + 1; + idxOffset -= (int)d->symlen[s1] + 1; sym = (w[2] << 4) | (w[1] >> 4); } } @@ -525,7 +574,7 @@ bool check_dtz_stm(DTZEntry* entry, File f, int stm) { int flags = entry->hasPawns ? entry->pawn.file[f].precomp->flags : entry->piece.precomp->flags; - return (flags & DTZEntry::Flag::STM) == stm + return (flags & TBFlag::STM) == stm || ((entry->key == entry->key2) && !entry->hasPawns); } @@ -550,14 +599,14 @@ int map_score(DTZEntry* entry, File f, int value, WDLScore wdl) { uint16_t* idx = entry->hasPawns ? entry->pawn.file[f].map_idx : entry->piece.map_idx; - if (flags & DTZEntry::Flag::Mapped) + if (flags & TBFlag::Mapped) value = map[idx[WDLMap[wdl + 2]] + value]; // DTZ tables store distance to zero in number of moves but // under some conditions we want to return plies, so we have // to multiply score by 2. - if ( (wdl == WDLWin && !(flags & DTZEntry::Flag::WinPlies)) - || (wdl == WDLLoss && !(flags & DTZEntry::Flag::LossPlies)) + if ( (wdl == WDLWin && !(flags & TBFlag::WinPlies)) + || (wdl == WDLLoss && !(flags & TBFlag::LossPlies)) || wdl == WDLCursedWin || wdl == WDLCursedLoss) value *= 2; @@ -869,14 +918,14 @@ uint8_t* set_sizes(PairsData* d, uint8_t* data, uint64_t tb_size) { d->flags = *data++; - if (d->flags & 0x80) { + if (d->flags & TBFlag::SingleValue) { d->idxbits = d->real_num_blocks = d->num_blocks = d->num_indices = 0; // Broken MSVC zero-init d->min_len = *data++; return data; } - d->blocksize = *data++; + d->sizeofBlock = 1 << *data++; d->idxbits = *data++; d->num_indices = (tb_size + (1ULL << d->idxbits) - 1) >> d->idxbits; // Divide and round upward, like ceil() d->num_blocks = number(data++); @@ -918,7 +967,7 @@ uint8_t* set_dtz_map(DTZEntry&, T& p, uint8_t* data, File maxFile) { p.map = data; for (File f = FILE_A; f <= maxFile; ++f) { - if (item(p, 0, f).precomp->flags & DTZEntry::Flag::Mapped) + if (item(p, 0, f).precomp->flags & TBFlag::Mapped) for (int i = 0; i < 4; ++i) { // Sequence like 3,x,x,x,1,x,0,2,x,x item(p, 0, f).map_idx[i] = (uint16_t)(data - p.map + 1); data += *data + 1; @@ -984,21 +1033,21 @@ void do_init(Entry& e, T& p, uint8_t* data) for (File f = FILE_A; f <= maxFile; ++f) for (int k = 0; k <= split; k++) { - (d = item(p, k, f).precomp)->indextable = data; - data += 6ULL * d->num_indices; + (d = item(p, k, f).precomp)->sparseIndex = (SparseEntry*)data; + data += d->num_indices * sizeof(SparseEntry) ; } for (File f = FILE_A; f <= maxFile; ++f) for (int k = 0; k <= split; k++) { - (d = item(p, k, f).precomp)->sizetable = (uint16_t*)data; - data += 2ULL * d->num_blocks; + (d = item(p, k, f).precomp)->blockLenghts = (uint16_t*)data; + data += d->num_blocks * sizeof(uint16_t); } for (File f = FILE_A; f <= maxFile; ++f) for (int k = 0; k <= split; k++) { data = (uint8_t*)(((uintptr_t)data + 0x3F) & ~0x3F); // 64 byte alignment (d = item(p, k, f).precomp)->data = data; - data += (1ULL << d->blocksize) * d->real_num_blocks; + data += d->real_num_blocks * d->sizeofBlock; } }