Clarify why blockLengthSize > blocksNum

Ronald de Man says: This difference has to do with the fact that the "sparse index" does not point to "k * d->span", but to "k * d->span + d->span / 2". Since k = idx / d->span, we know that k * d->span <= idx, so k * d->span is a valid index into the table. But k * d->span + d->span / 2 might be a value that is bigger than the largest index for the table (if "idx" happens to be near the end of the table). So the last valid entry in the SparseIndex[] array might have to a point to a block and (sub)index that is not part of the real table but comes "after" it. To make this work, the generator adds entries for a few "fake" blocks, each of maximum size 65536, to the blockLength[] array so that there is something to point to for the last valid entry in the SparseIndex[] array. These fake blocks do not correspond to any compressed data. So the fake blocks avoid the need to detect and handle this special case in decompress_pairs().
2025-04-30 00:33:09 +00:00 · 2016-05-15 18:03:30 +02:00 · 2016-05-15 18:03:30 +02:00 · 306561431b
commit 306561431b
parent 7448fce808
1 changed files with 9 additions and 8 deletions
--- a/src/syzygy/tbprobe.cpp
+++ b/src/syzygy/tbprobe.cpp
@ -102,19 +102,19 @@ struct PairsData {
    int flags;
    size_t sizeofBlock;            // Block size in bytes
    size_t span;                   // About every span values there is a SparseIndex[] entry
-    int real_num_blocks;
+    int blocksNum;                 // Number of blocks in the TB file
    int maxSymLen;                 // Maximum length in bits of the Huffman symbols
    int minSymLen;                 // Minimum length in bits of the Huffman symbols
    Sym* lowestSym;                // Value of the lowest symbol of length l is lowestSym[l]
    LR* btree;                     // btree[sym] stores the left and right symbols that expand sym
    uint16_t* blockLength;         // Number of stored positions (minus one) for each block: 1..65536
-    int blockLengthSize;           // Size of blockLength[] table
+    int blockLengthSize;           // Size of blockLength[] table: padded so it's bigger than blocksNum
    SparseEntry* sparseIndex;      // Partial indices into blockLength[]
    size_t sparseIndexSize;        // Size of SparseIndex[] table
    uint8_t* data;                 // Start of Huffman compressed data
    std::vector<uint64_t> base64;  // Smallest symbol of length l padded to 64 bits is at base64[l - min_sym_len]
    std::vector<uint8_t> symlen;   // Number of values (-1) represented by a given Huffman symbol: 1..256
-    Piece pieces[TBPIECES];
+    Piece pieces[TBPIECES];        // Sequence of the pieces: order is critical to ensure the best compression
    uint64_t groupSize[TBPIECES];  // Size needed by a given subset of pieces: KRKN -> (KRK) + (N)
    uint8_t groupLen[TBPIECES];    // Number of pieces in a given group: KRKN -> (3) + (1)
 };
@ -972,7 +972,7 @@ uint8_t* set_sizes(PairsData* d, uint8_t* data, uint64_t tb_size)
    d->flags = *data++;

    if (d->flags & TBFlag::SingleValue) {
-        d->real_num_blocks = d->span =
+        d->blocksNum = d->span =
        d->blockLengthSize = d->sparseIndexSize = 0; // Broken MSVC zero-init
        d->minSymLen = *data++; // Here we store the single value
        return data;
@ -981,9 +981,10 @@ uint8_t* set_sizes(PairsData* d, uint8_t* data, uint64_t tb_size)
    d->sizeofBlock = 1ULL << *data++;
    d->span = 1ULL << *data++;
    d->sparseIndexSize = (tb_size + d->span - 1) / d->span; // Round up
-    d->blockLengthSize = number<uint8_t, LittleEndian>(data++);
-    d->real_num_blocks = number<uint32_t, LittleEndian>(data); data += sizeof(uint32_t);
-    d->blockLengthSize += d->real_num_blocks;
+    int padding = number<uint8_t, LittleEndian>(data++);
+    d->blocksNum = number<uint32_t, LittleEndian>(data); data += sizeof(uint32_t);
+    d->blockLengthSize = d->blocksNum + padding; // Padded to ensure SparseIndex[]
+                                                 // does not go out of range.
    d->maxSymLen = *data++;
    d->minSymLen = *data++;
    d->lowestSym = (Sym*)data;
@ -1105,7 +1106,7 @@ void do_init(Entry& e, T& p, uint8_t* data)
        for (int k = 0; k <= split; k++) {
            data = (uint8_t*)(((uintptr_t)data + 0x3F) & ~0x3F); // 64 byte alignment
            (d = item(p, k, f).precomp)->data = data;
-            data += d->real_num_blocks * d->sizeofBlock;
+            data += d->blocksNum * d->sizeofBlock;
        }
 }