1
0
Fork 0
mirror of https://github.com/sockspls/badfish synced 2025-04-30 16:53:09 +00:00

Clarify why blockLengthSize > blocksNum

Ronald de Man says:

This difference has to do with the fact that the "sparse index"
does not point to "k * d->span", but to "k * d->span + d->span / 2".

Since k = idx / d->span, we know that k * d->span <= idx,
so k * d->span is a valid index into the table.
But k * d->span + d->span / 2 might be a value that is bigger than
the largest index for the table (if "idx" happens to be near the
end of the table).

So the last valid entry in the SparseIndex[] array might have
to a point to a block and (sub)index that is not part of the
real table but comes "after" it. To make this work, the generator
adds entries for a few "fake" blocks, each of maximum size 65536,
to the blockLength[] array so that there is something to point to
for the last valid entry in the SparseIndex[] array.

These fake blocks do not correspond to any compressed data.

So the fake blocks avoid the need to detect and handle this special
case in decompress_pairs().
This commit is contained in:
Marco Costalba 2016-05-15 18:03:30 +02:00
parent 7448fce808
commit 306561431b

View file

@ -102,19 +102,19 @@ struct PairsData {
int flags;
size_t sizeofBlock; // Block size in bytes
size_t span; // About every span values there is a SparseIndex[] entry
int real_num_blocks;
int blocksNum; // Number of blocks in the TB file
int maxSymLen; // Maximum length in bits of the Huffman symbols
int minSymLen; // Minimum length in bits of the Huffman symbols
Sym* lowestSym; // Value of the lowest symbol of length l is lowestSym[l]
LR* btree; // btree[sym] stores the left and right symbols that expand sym
uint16_t* blockLength; // Number of stored positions (minus one) for each block: 1..65536
int blockLengthSize; // Size of blockLength[] table
int blockLengthSize; // Size of blockLength[] table: padded so it's bigger than blocksNum
SparseEntry* sparseIndex; // Partial indices into blockLength[]
size_t sparseIndexSize; // Size of SparseIndex[] table
uint8_t* data; // Start of Huffman compressed data
std::vector<uint64_t> base64; // Smallest symbol of length l padded to 64 bits is at base64[l - min_sym_len]
std::vector<uint8_t> symlen; // Number of values (-1) represented by a given Huffman symbol: 1..256
Piece pieces[TBPIECES];
Piece pieces[TBPIECES]; // Sequence of the pieces: order is critical to ensure the best compression
uint64_t groupSize[TBPIECES]; // Size needed by a given subset of pieces: KRKN -> (KRK) + (N)
uint8_t groupLen[TBPIECES]; // Number of pieces in a given group: KRKN -> (3) + (1)
};
@ -972,7 +972,7 @@ uint8_t* set_sizes(PairsData* d, uint8_t* data, uint64_t tb_size)
d->flags = *data++;
if (d->flags & TBFlag::SingleValue) {
d->real_num_blocks = d->span =
d->blocksNum = d->span =
d->blockLengthSize = d->sparseIndexSize = 0; // Broken MSVC zero-init
d->minSymLen = *data++; // Here we store the single value
return data;
@ -981,9 +981,10 @@ uint8_t* set_sizes(PairsData* d, uint8_t* data, uint64_t tb_size)
d->sizeofBlock = 1ULL << *data++;
d->span = 1ULL << *data++;
d->sparseIndexSize = (tb_size + d->span - 1) / d->span; // Round up
d->blockLengthSize = number<uint8_t, LittleEndian>(data++);
d->real_num_blocks = number<uint32_t, LittleEndian>(data); data += sizeof(uint32_t);
d->blockLengthSize += d->real_num_blocks;
int padding = number<uint8_t, LittleEndian>(data++);
d->blocksNum = number<uint32_t, LittleEndian>(data); data += sizeof(uint32_t);
d->blockLengthSize = d->blocksNum + padding; // Padded to ensure SparseIndex[]
// does not go out of range.
d->maxSymLen = *data++;
d->minSymLen = *data++;
d->lowestSym = (Sym*)data;
@ -1105,7 +1106,7 @@ void do_init(Entry& e, T& p, uint8_t* data)
for (int k = 0; k <= split; k++) {
data = (uint8_t*)(((uintptr_t)data + 0x3F) & ~0x3F); // 64 byte alignment
(d = item(p, k, f).precomp)->data = data;
data += d->real_num_blocks * d->sizeofBlock;
data += d->blocksNum * d->sizeofBlock;
}
}