summaryrefslogtreecommitdiff
path: root/lib/zstd/compress/huf_compress.c
diff options
context:
space:
mode:
authorNick Terrell <terrelln@meta.com>2025-03-08 12:09:33 -0800
committerNick Terrell <terrelln@meta.com>2025-03-13 13:25:58 -0700
commit65d1f5507ed2c78c64fce40e44e5574a9419eb09 (patch)
tree4a1b819db2ea7f2a322e9bcc7582d946d0a4ea29 /lib/zstd/compress/huf_compress.c
parent7eb172143d5508b4da468ed59ee857c6e5e01da6 (diff)
zstd: Import upstream v1.5.7
In addition to keeping the kernel's copy of zstd up to date, this update was requested by Intel to expose upstream's APIs that allow QAT to accelerate the LZ match finding stage of Zstd. This patch is imported from the upstream tag v1.5.7-kernel [0], which is signed with upstream's signing key EF8FE99528B52FFD [1]. It was imported from upstream using this command: export ZSTD=/path/to/repo/zstd/ export LINUX=/path/to/repo/linux/ cd "$ZSTD/contrib/linux-kernel" git checkout v1.5.7-kernel make import LINUX="$LINUX" This patch has been tested on x86-64, and has been boot tested with a zstd compressed kernel & initramfs on i386 and aarch64. I benchmarked the patch on x86-64 with gcc-14.2.1 on an Intel i9-9900K by measruing the performance of compressed filesystem reads and writes. Component, Level, Size delta, C. time delta, D. time delta Btrfs , 1, +0.00%, -6.1%, +1.4% Btrfs , 3, +0.00%, -9.8%, +3.0% Btrfs , 5, +0.00%, +1.7%, +1.4% Btrfs , 7, +0.00%, -1.9%, +2.7% Btrfs , 9, +0.00%, -3.4%, +3.7% Btrfs , 15, +0.00%, -0.3%, +3.6% SquashFS , 1, +0.00%, N/A, +1.9% The major changes that impact the kernel use cases for each version are: v1.5.7: https://github.com/facebook/zstd/releases/tag/v1.5.7 * Add zstd_compress_sequences_and_literals() for use by Intel's QAT driver to implement Zstd compression acceleration in the kernel. * Fix an underflow bug in 32-bit builds that can cause data corruption when processing more than 4GB of data with a single `ZSTD_CCtx` object, when an input crosses the 4GB boundry. I don't believe this impacts any current kernel use cases, because the `ZSTD_CCtx` is typically reconstructed between compressions. * Levels 1-4 see 5-10% compression speed improvements for inputs smaller than 128KB. v1.5.6: https://github.com/facebook/zstd/releases/tag/v1.5.6 * Improved compression ratio for the highest compression levels. I don't expect these see much use however, due to their slow speeds. v1.5.5: https://github.com/facebook/zstd/releases/tag/v1.5.5 * Fix a rare corruption bug that can trigger on levels 13 and above. * Improve compression speed of levels 5-11 on incompressible data. v1.5.4: https://github.com/facebook/zstd/releases/tag/v1.5.4 * Improve copmression speed of levels 5-11 on ARM. * Improve dictionary compression speed. Signed-off-by: Nick Terrell <terrelln@fb.com>
Diffstat (limited to 'lib/zstd/compress/huf_compress.c')
-rw-r--r--lib/zstd/compress/huf_compress.c441
1 files changed, 284 insertions, 157 deletions
diff --git a/lib/zstd/compress/huf_compress.c b/lib/zstd/compress/huf_compress.c
index 74ef0db47621..0b229f5d2ae2 100644
--- a/lib/zstd/compress/huf_compress.c
+++ b/lib/zstd/compress/huf_compress.c
@@ -1,6 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
/* ******************************************************************
* Huffman encoder, part of New Generation Entropy library
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
*
* You can contact the author at :
* - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
@@ -26,9 +27,9 @@
#include "hist.h"
#define FSE_STATIC_LINKING_ONLY /* FSE_optimalTableLog_internal */
#include "../common/fse.h" /* header compression */
-#define HUF_STATIC_LINKING_ONLY
#include "../common/huf.h"
#include "../common/error_private.h"
+#include "../common/bits.h" /* ZSTD_highbit32 */
/* **************************************************************
@@ -39,13 +40,67 @@
/* **************************************************************
-* Utils
+* Required declarations
****************************************************************/
-unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
+typedef struct nodeElt_s {
+ U32 count;
+ U16 parent;
+ BYTE byte;
+ BYTE nbBits;
+} nodeElt;
+
+
+/* **************************************************************
+* Debug Traces
+****************************************************************/
+
+#if DEBUGLEVEL >= 2
+
+static size_t showU32(const U32* arr, size_t size)
{
- return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1);
+ size_t u;
+ for (u=0; u<size; u++) {
+ RAWLOG(6, " %u", arr[u]); (void)arr;
+ }
+ RAWLOG(6, " \n");
+ return size;
}
+static size_t HUF_getNbBits(HUF_CElt elt);
+
+static size_t showCTableBits(const HUF_CElt* ctable, size_t size)
+{
+ size_t u;
+ for (u=0; u<size; u++) {
+ RAWLOG(6, " %zu", HUF_getNbBits(ctable[u])); (void)ctable;
+ }
+ RAWLOG(6, " \n");
+ return size;
+
+}
+
+static size_t showHNodeSymbols(const nodeElt* hnode, size_t size)
+{
+ size_t u;
+ for (u=0; u<size; u++) {
+ RAWLOG(6, " %u", hnode[u].byte); (void)hnode;
+ }
+ RAWLOG(6, " \n");
+ return size;
+}
+
+static size_t showHNodeBits(const nodeElt* hnode, size_t size)
+{
+ size_t u;
+ for (u=0; u<size; u++) {
+ RAWLOG(6, " %u", hnode[u].nbBits); (void)hnode;
+ }
+ RAWLOG(6, " \n");
+ return size;
+}
+
+#endif
+
/* *******************************************************
* HUF : Huffman block compression
@@ -86,7 +141,10 @@ typedef struct {
S16 norm[HUF_TABLELOG_MAX+1];
} HUF_CompressWeightsWksp;
-static size_t HUF_compressWeights(void* dst, size_t dstSize, const void* weightTable, size_t wtSize, void* workspace, size_t workspaceSize)
+static size_t
+HUF_compressWeights(void* dst, size_t dstSize,
+ const void* weightTable, size_t wtSize,
+ void* workspace, size_t workspaceSize)
{
BYTE* const ostart = (BYTE*) dst;
BYTE* op = ostart;
@@ -137,7 +195,7 @@ static size_t HUF_getNbBitsFast(HUF_CElt elt)
static size_t HUF_getValue(HUF_CElt elt)
{
- return elt & ~0xFF;
+ return elt & ~(size_t)0xFF;
}
static size_t HUF_getValueFast(HUF_CElt elt)
@@ -160,6 +218,25 @@ static void HUF_setValue(HUF_CElt* elt, size_t value)
}
}
+HUF_CTableHeader HUF_readCTableHeader(HUF_CElt const* ctable)
+{
+ HUF_CTableHeader header;
+ ZSTD_memcpy(&header, ctable, sizeof(header));
+ return header;
+}
+
+static void HUF_writeCTableHeader(HUF_CElt* ctable, U32 tableLog, U32 maxSymbolValue)
+{
+ HUF_CTableHeader header;
+ HUF_STATIC_ASSERT(sizeof(ctable[0]) == sizeof(header));
+ ZSTD_memset(&header, 0, sizeof(header));
+ assert(tableLog < 256);
+ header.tableLog = (BYTE)tableLog;
+ assert(maxSymbolValue < 256);
+ header.maxSymbolValue = (BYTE)maxSymbolValue;
+ ZSTD_memcpy(ctable, &header, sizeof(header));
+}
+
typedef struct {
HUF_CompressWeightsWksp wksp;
BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; /* precomputed conversion table */
@@ -175,6 +252,11 @@ size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize,
U32 n;
HUF_WriteCTableWksp* wksp = (HUF_WriteCTableWksp*)HUF_alignUpWorkspace(workspace, &workspaceSize, ZSTD_ALIGNOF(U32));
+ HUF_STATIC_ASSERT(HUF_CTABLE_WORKSPACE_SIZE >= sizeof(HUF_WriteCTableWksp));
+
+ assert(HUF_readCTableHeader(CTable).maxSymbolValue == maxSymbolValue);
+ assert(HUF_readCTableHeader(CTable).tableLog == huffLog);
+
/* check conditions */
if (workspaceSize < sizeof(HUF_WriteCTableWksp)) return ERROR(GENERIC);
if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
@@ -204,16 +286,6 @@ size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize,
return ((maxSymbolValue+1)/2) + 1;
}
-/*! HUF_writeCTable() :
- `CTable` : Huffman tree to save, using huf representation.
- @return : size of saved CTable */
-size_t HUF_writeCTable (void* dst, size_t maxDstSize,
- const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog)
-{
- HUF_WriteCTableWksp wksp;
- return HUF_writeCTable_wksp(dst, maxDstSize, CTable, maxSymbolValue, huffLog, &wksp, sizeof(wksp));
-}
-
size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* hasZeroWeights)
{
@@ -231,7 +303,9 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall);
- CTable[0] = tableLog;
+ *maxSymbolValuePtr = nbSymbols - 1;
+
+ HUF_writeCTableHeader(CTable, tableLog, *maxSymbolValuePtr);
/* Prepare base value per rank */
{ U32 n, nextRankStart = 0;
@@ -263,74 +337,71 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
{ U32 n; for (n=0; n<nbSymbols; n++) HUF_setValue(ct + n, valPerRank[HUF_getNbBits(ct[n])]++); }
}
- *maxSymbolValuePtr = nbSymbols - 1;
return readSize;
}
U32 HUF_getNbBitsFromCTable(HUF_CElt const* CTable, U32 symbolValue)
{
- const HUF_CElt* ct = CTable + 1;
+ const HUF_CElt* const ct = CTable + 1;
assert(symbolValue <= HUF_SYMBOLVALUE_MAX);
+ if (symbolValue > HUF_readCTableHeader(CTable).maxSymbolValue)
+ return 0;
return (U32)HUF_getNbBits(ct[symbolValue]);
}
-typedef struct nodeElt_s {
- U32 count;
- U16 parent;
- BYTE byte;
- BYTE nbBits;
-} nodeElt;
-
/*
* HUF_setMaxHeight():
- * Enforces maxNbBits on the Huffman tree described in huffNode.
+ * Try to enforce @targetNbBits on the Huffman tree described in @huffNode.
*
- * It sets all nodes with nbBits > maxNbBits to be maxNbBits. Then it adjusts
- * the tree to so that it is a valid canonical Huffman tree.
+ * It attempts to convert all nodes with nbBits > @targetNbBits
+ * to employ @targetNbBits instead. Then it adjusts the tree
+ * so that it remains a valid canonical Huffman tree.
*
* @pre The sum of the ranks of each symbol == 2^largestBits,
* where largestBits == huffNode[lastNonNull].nbBits.
* @post The sum of the ranks of each symbol == 2^largestBits,
- * where largestBits is the return value <= maxNbBits.
+ * where largestBits is the return value (expected <= targetNbBits).
*
- * @param huffNode The Huffman tree modified in place to enforce maxNbBits.
+ * @param huffNode The Huffman tree modified in place to enforce targetNbBits.
+ * It's presumed sorted, from most frequent to rarest symbol.
* @param lastNonNull The symbol with the lowest count in the Huffman tree.
- * @param maxNbBits The maximum allowed number of bits, which the Huffman tree
+ * @param targetNbBits The allowed number of bits, which the Huffman tree
* may not respect. After this function the Huffman tree will
- * respect maxNbBits.
- * @return The maximum number of bits of the Huffman tree after adjustment,
- * necessarily no more than maxNbBits.
+ * respect targetNbBits.
+ * @return The maximum number of bits of the Huffman tree after adjustment.
*/
-static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
+static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 targetNbBits)
{
const U32 largestBits = huffNode[lastNonNull].nbBits;
- /* early exit : no elt > maxNbBits, so the tree is already valid. */
- if (largestBits <= maxNbBits) return largestBits;
+ /* early exit : no elt > targetNbBits, so the tree is already valid. */
+ if (largestBits <= targetNbBits) return largestBits;
+
+ DEBUGLOG(5, "HUF_setMaxHeight (targetNbBits = %u)", targetNbBits);
/* there are several too large elements (at least >= 2) */
{ int totalCost = 0;
- const U32 baseCost = 1 << (largestBits - maxNbBits);
+ const U32 baseCost = 1 << (largestBits - targetNbBits);
int n = (int)lastNonNull;
- /* Adjust any ranks > maxNbBits to maxNbBits.
+ /* Adjust any ranks > targetNbBits to targetNbBits.
* Compute totalCost, which is how far the sum of the ranks is
* we are over 2^largestBits after adjust the offending ranks.
*/
- while (huffNode[n].nbBits > maxNbBits) {
+ while (huffNode[n].nbBits > targetNbBits) {
totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits));
- huffNode[n].nbBits = (BYTE)maxNbBits;
+ huffNode[n].nbBits = (BYTE)targetNbBits;
n--;
}
- /* n stops at huffNode[n].nbBits <= maxNbBits */
- assert(huffNode[n].nbBits <= maxNbBits);
- /* n end at index of smallest symbol using < maxNbBits */
- while (huffNode[n].nbBits == maxNbBits) --n;
+ /* n stops at huffNode[n].nbBits <= targetNbBits */
+ assert(huffNode[n].nbBits <= targetNbBits);
+ /* n end at index of smallest symbol using < targetNbBits */
+ while (huffNode[n].nbBits == targetNbBits) --n;
- /* renorm totalCost from 2^largestBits to 2^maxNbBits
+ /* renorm totalCost from 2^largestBits to 2^targetNbBits
* note : totalCost is necessarily a multiple of baseCost */
- assert((totalCost & (baseCost - 1)) == 0);
- totalCost >>= (largestBits - maxNbBits);
+ assert(((U32)totalCost & (baseCost - 1)) == 0);
+ totalCost >>= (largestBits - targetNbBits);
assert(totalCost > 0);
/* repay normalized cost */
@@ -339,19 +410,19 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
/* Get pos of last (smallest = lowest cum. count) symbol per rank */
ZSTD_memset(rankLast, 0xF0, sizeof(rankLast));
- { U32 currentNbBits = maxNbBits;
+ { U32 currentNbBits = targetNbBits;
int pos;
for (pos=n ; pos >= 0; pos--) {
if (huffNode[pos].nbBits >= currentNbBits) continue;
- currentNbBits = huffNode[pos].nbBits; /* < maxNbBits */
- rankLast[maxNbBits-currentNbBits] = (U32)pos;
+ currentNbBits = huffNode[pos].nbBits; /* < targetNbBits */
+ rankLast[targetNbBits-currentNbBits] = (U32)pos;
} }
while (totalCost > 0) {
/* Try to reduce the next power of 2 above totalCost because we
* gain back half the rank.
*/
- U32 nBitsToDecrease = BIT_highbit32((U32)totalCost) + 1;
+ U32 nBitsToDecrease = ZSTD_highbit32((U32)totalCost) + 1;
for ( ; nBitsToDecrease > 1; nBitsToDecrease--) {
U32 const highPos = rankLast[nBitsToDecrease];
U32 const lowPos = rankLast[nBitsToDecrease-1];
@@ -391,7 +462,7 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
rankLast[nBitsToDecrease] = noSymbol;
else {
rankLast[nBitsToDecrease]--;
- if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease)
+ if (huffNode[rankLast[nBitsToDecrease]].nbBits != targetNbBits-nBitsToDecrease)
rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */
}
} /* while (totalCost > 0) */
@@ -403,11 +474,11 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
* TODO.
*/
while (totalCost < 0) { /* Sometimes, cost correction overshoot */
- /* special case : no rank 1 symbol (using maxNbBits-1);
- * let's create one from largest rank 0 (using maxNbBits).
+ /* special case : no rank 1 symbol (using targetNbBits-1);
+ * let's create one from largest rank 0 (using targetNbBits).
*/
if (rankLast[1] == noSymbol) {
- while (huffNode[n].nbBits == maxNbBits) n--;
+ while (huffNode[n].nbBits == targetNbBits) n--;
huffNode[n+1].nbBits--;
assert(n >= 0);
rankLast[1] = (U32)(n+1);
@@ -421,7 +492,7 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
} /* repay normalized cost */
} /* there are several too large elements (at least >= 2) */
- return maxNbBits;
+ return targetNbBits;
}
typedef struct {
@@ -429,7 +500,7 @@ typedef struct {
U16 curr;
} rankPos;
-typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32];
+typedef nodeElt huffNodeTable[2 * (HUF_SYMBOLVALUE_MAX + 1)];
/* Number of buckets available for HUF_sort() */
#define RANK_POSITION_TABLE_SIZE 192
@@ -448,8 +519,8 @@ typedef struct {
* Let buckets 166 to 192 represent all remaining counts up to RANK_POSITION_MAX_COUNT_LOG using log2 bucketing.
*/
#define RANK_POSITION_MAX_COUNT_LOG 32
-#define RANK_POSITION_LOG_BUCKETS_BEGIN (RANK_POSITION_TABLE_SIZE - 1) - RANK_POSITION_MAX_COUNT_LOG - 1 /* == 158 */
-#define RANK_POSITION_DISTINCT_COUNT_CUTOFF RANK_POSITION_LOG_BUCKETS_BEGIN + BIT_highbit32(RANK_POSITION_LOG_BUCKETS_BEGIN) /* == 166 */
+#define RANK_POSITION_LOG_BUCKETS_BEGIN ((RANK_POSITION_TABLE_SIZE - 1) - RANK_POSITION_MAX_COUNT_LOG - 1 /* == 158 */)
+#define RANK_POSITION_DISTINCT_COUNT_CUTOFF (RANK_POSITION_LOG_BUCKETS_BEGIN + ZSTD_highbit32(RANK_POSITION_LOG_BUCKETS_BEGIN) /* == 166 */)
/* Return the appropriate bucket index for a given count. See definition of
* RANK_POSITION_DISTINCT_COUNT_CUTOFF for explanation of bucketing strategy.
@@ -457,7 +528,7 @@ typedef struct {
static U32 HUF_getIndex(U32 const count) {
return (count < RANK_POSITION_DISTINCT_COUNT_CUTOFF)
? count
- : BIT_highbit32(count) + RANK_POSITION_LOG_BUCKETS_BEGIN;
+ : ZSTD_highbit32(count) + RANK_POSITION_LOG_BUCKETS_BEGIN;
}
/* Helper swap function for HUF_quickSortPartition() */
@@ -580,7 +651,7 @@ static void HUF_sort(nodeElt huffNode[], const unsigned count[], U32 const maxSy
/* Sort each bucket. */
for (n = RANK_POSITION_DISTINCT_COUNT_CUTOFF; n < RANK_POSITION_TABLE_SIZE - 1; ++n) {
- U32 const bucketSize = rankPosition[n].curr-rankPosition[n].base;
+ int const bucketSize = rankPosition[n].curr - rankPosition[n].base;
U32 const bucketStartIdx = rankPosition[n].base;
if (bucketSize > 1) {
assert(bucketStartIdx < maxSymbolValue1);
@@ -591,6 +662,7 @@ static void HUF_sort(nodeElt huffNode[], const unsigned count[], U32 const maxSy
assert(HUF_isSorted(huffNode, maxSymbolValue1));
}
+
/* HUF_buildCTable_wksp() :
* Same as HUF_buildCTable(), but using externally allocated scratch buffer.
* `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as sizeof(HUF_buildCTable_wksp_tables).
@@ -611,6 +683,7 @@ static int HUF_buildTree(nodeElt* huffNode, U32 maxSymbolValue)
int lowS, lowN;
int nodeNb = STARTNODE;
int n, nodeRoot;
+ DEBUGLOG(5, "HUF_buildTree (alphabet size = %u)", maxSymbolValue + 1);
/* init for parents */
nonNullRank = (int)maxSymbolValue;
while(huffNode[nonNullRank].count == 0) nonNullRank--;
@@ -637,6 +710,8 @@ static int HUF_buildTree(nodeElt* huffNode, U32 maxSymbolValue)
for (n=0; n<=nonNullRank; n++)
huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
+ DEBUGLOG(6, "Initial distribution of bits completed (%zu sorted symbols)", showHNodeBits(huffNode, maxSymbolValue+1));
+
return nonNullRank;
}
@@ -671,31 +746,40 @@ static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, i
HUF_setNbBits(ct + huffNode[n].byte, huffNode[n].nbBits); /* push nbBits per symbol, symbol order */
for (n=0; n<alphabetSize; n++)
HUF_setValue(ct + n, valPerRank[HUF_getNbBits(ct[n])]++); /* assign value within rank, symbol order */
- CTable[0] = maxNbBits;
+
+ HUF_writeCTableHeader(CTable, maxNbBits, maxSymbolValue);
}
-size_t HUF_buildCTable_wksp (HUF_CElt* CTable, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize)
+size_t
+HUF_buildCTable_wksp(HUF_CElt* CTable, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits,
+ void* workSpace, size_t wkspSize)
{
- HUF_buildCTable_wksp_tables* const wksp_tables = (HUF_buildCTable_wksp_tables*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(U32));
+ HUF_buildCTable_wksp_tables* const wksp_tables =
+ (HUF_buildCTable_wksp_tables*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(U32));
nodeElt* const huffNode0 = wksp_tables->huffNodeTbl;
nodeElt* const huffNode = huffNode0+1;
int nonNullRank;
+ HUF_STATIC_ASSERT(HUF_CTABLE_WORKSPACE_SIZE == sizeof(HUF_buildCTable_wksp_tables));
+
+ DEBUGLOG(5, "HUF_buildCTable_wksp (alphabet size = %u)", maxSymbolValue+1);
+
/* safety checks */
if (wkspSize < sizeof(HUF_buildCTable_wksp_tables))
- return ERROR(workSpace_tooSmall);
+ return ERROR(workSpace_tooSmall);
if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
if (maxSymbolValue > HUF_SYMBOLVALUE_MAX)
- return ERROR(maxSymbolValue_tooLarge);
+ return ERROR(maxSymbolValue_tooLarge);
ZSTD_memset(huffNode0, 0, sizeof(huffNodeTable));
/* sort, decreasing order */
HUF_sort(huffNode, count, maxSymbolValue, wksp_tables->rankPosition);
+ DEBUGLOG(6, "sorted symbols completed (%zu symbols)", showHNodeSymbols(huffNode, maxSymbolValue+1));
/* build tree */
nonNullRank = HUF_buildTree(huffNode, maxSymbolValue);
- /* enforce maxTableLog */
+ /* determine and enforce maxTableLog */
maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits);
if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */
@@ -716,13 +800,20 @@ size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count,
}
int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) {
- HUF_CElt const* ct = CTable + 1;
- int bad = 0;
- int s;
- for (s = 0; s <= (int)maxSymbolValue; ++s) {
- bad |= (count[s] != 0) & (HUF_getNbBits(ct[s]) == 0);
- }
- return !bad;
+ HUF_CTableHeader header = HUF_readCTableHeader(CTable);
+ HUF_CElt const* ct = CTable + 1;
+ int bad = 0;
+ int s;
+
+ assert(header.tableLog <= HUF_TABLELOG_ABSOLUTEMAX);
+
+ if (header.maxSymbolValue < maxSymbolValue)
+ return 0;
+
+ for (s = 0; s <= (int)maxSymbolValue; ++s) {
+ bad |= (count[s] != 0) & (HUF_getNbBits(ct[s]) == 0);
+ }
+ return !bad;
}
size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
@@ -804,7 +895,7 @@ FORCE_INLINE_TEMPLATE void HUF_addBits(HUF_CStream_t* bitC, HUF_CElt elt, int id
#if DEBUGLEVEL >= 1
{
size_t const nbBits = HUF_getNbBits(elt);
- size_t const dirtyBits = nbBits == 0 ? 0 : BIT_highbit32((U32)nbBits) + 1;
+ size_t const dirtyBits = nbBits == 0 ? 0 : ZSTD_highbit32((U32)nbBits) + 1;
(void)dirtyBits;
/* Middle bits are 0. */
assert(((elt >> dirtyBits) << (dirtyBits + nbBits)) == 0);
@@ -884,7 +975,7 @@ static size_t HUF_closeCStream(HUF_CStream_t* bitC)
{
size_t const nbBits = bitC->bitPos[0] & 0xFF;
if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */
- return (bitC->ptr - bitC->startPtr) + (nbBits > 0);
+ return (size_t)(bitC->ptr - bitC->startPtr) + (nbBits > 0);
}
}
@@ -964,17 +1055,17 @@ HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize,
const void* src, size_t srcSize,
const HUF_CElt* CTable)
{
- U32 const tableLog = (U32)CTable[0];
+ U32 const tableLog = HUF_readCTableHeader(CTable).tableLog;
HUF_CElt const* ct = CTable + 1;
const BYTE* ip = (const BYTE*) src;
BYTE* const ostart = (BYTE*)dst;
BYTE* const oend = ostart + dstSize;
- BYTE* op = ostart;
HUF_CStream_t bitC;
/* init */
if (dstSize < 8) return 0; /* not enough space to compress */
- { size_t const initErr = HUF_initCStream(&bitC, op, (size_t)(oend-op));
+ { BYTE* op = ostart;
+ size_t const initErr = HUF_initCStream(&bitC, op, (size_t)(oend-op));
if (HUF_isError(initErr)) return 0; }
if (dstSize < HUF_tightCompressBound(srcSize, (size_t)tableLog) || tableLog > 11)
@@ -1045,9 +1136,9 @@ HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize,
static size_t
HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
const void* src, size_t srcSize,
- const HUF_CElt* CTable, const int bmi2)
+ const HUF_CElt* CTable, const int flags)
{
- if (bmi2) {
+ if (flags & HUF_flags_bmi2) {
return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable);
}
return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable);
@@ -1058,28 +1149,23 @@ HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
static size_t
HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
const void* src, size_t srcSize,
- const HUF_CElt* CTable, const int bmi2)
+ const HUF_CElt* CTable, const int flags)
{
- (void)bmi2;
+ (void)flags;
return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
}
#endif
-size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
+size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags)
{
- return HUF_compress1X_usingCTable_bmi2(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
-}
-
-size_t HUF_compress1X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2)
-{
- return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, bmi2);
+ return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, flags);
}
static size_t
HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
const void* src, size_t srcSize,
- const HUF_CElt* CTable, int bmi2)
+ const HUF_CElt* CTable, int flags)
{
size_t const segmentSize = (srcSize+3)/4; /* first 3 segments */
const BYTE* ip = (const BYTE*) src;
@@ -1093,7 +1179,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
op += 6; /* jumpTable */
assert(op <= oend);
- { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
+ { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) );
if (cSize == 0 || cSize > 65535) return 0;
MEM_writeLE16(ostart, (U16)cSize);
op += cSize;
@@ -1101,7 +1187,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
ip += segmentSize;
assert(op <= oend);
- { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
+ { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) );
if (cSize == 0 || cSize > 65535) return 0;
MEM_writeLE16(ostart+2, (U16)cSize);
op += cSize;
@@ -1109,7 +1195,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
ip += segmentSize;
assert(op <= oend);
- { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
+ { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) );
if (cSize == 0 || cSize > 65535) return 0;
MEM_writeLE16(ostart+4, (U16)cSize);
op += cSize;
@@ -1118,7 +1204,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
ip += segmentSize;
assert(op <= oend);
assert(ip <= iend);
- { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, bmi2) );
+ { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, flags) );
if (cSize == 0 || cSize > 65535) return 0;
op += cSize;
}
@@ -1126,14 +1212,9 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
return (size_t)(op-ostart);
}
-size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
-{
- return HUF_compress4X_usingCTable_bmi2(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
-}
-
-size_t HUF_compress4X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2)
+size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags)
{
- return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, bmi2);
+ return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, flags);
}
typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e;
@@ -1141,11 +1222,11 @@ typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e;
static size_t HUF_compressCTable_internal(
BYTE* const ostart, BYTE* op, BYTE* const oend,
const void* src, size_t srcSize,
- HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int bmi2)
+ HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int flags)
{
size_t const cSize = (nbStreams==HUF_singleStream) ?
- HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2) :
- HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2);
+ HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, flags) :
+ HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, flags);
if (HUF_isError(cSize)) { return cSize; }
if (cSize==0) { return 0; } /* uncompressible */
op += cSize;
@@ -1168,6 +1249,81 @@ typedef struct {
#define SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE 4096
#define SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO 10 /* Must be >= 2 */
+unsigned HUF_cardinality(const unsigned* count, unsigned maxSymbolValue)
+{
+ unsigned cardinality = 0;
+ unsigned i;
+
+ for (i = 0; i < maxSymbolValue + 1; i++) {
+ if (count[i] != 0) cardinality += 1;
+ }
+
+ return cardinality;
+}
+
+unsigned HUF_minTableLog(unsigned symbolCardinality)
+{
+ U32 minBitsSymbols = ZSTD_highbit32(symbolCardinality) + 1;
+ return minBitsSymbols;
+}
+
+unsigned HUF_optimalTableLog(
+ unsigned maxTableLog,
+ size_t srcSize,
+ unsigned maxSymbolValue,
+ void* workSpace, size_t wkspSize,
+ HUF_CElt* table,
+ const unsigned* count,
+ int flags)
+{
+ assert(srcSize > 1); /* Not supported, RLE should be used instead */
+ assert(wkspSize >= sizeof(HUF_buildCTable_wksp_tables));
+
+ if (!(flags & HUF_flags_optimalDepth)) {
+ /* cheap evaluation, based on FSE */
+ return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1);
+ }
+
+ { BYTE* dst = (BYTE*)workSpace + sizeof(HUF_WriteCTableWksp);
+ size_t dstSize = wkspSize - sizeof(HUF_WriteCTableWksp);
+ size_t hSize, newSize;
+ const unsigned symbolCardinality = HUF_cardinality(count, maxSymbolValue);
+ const unsigned minTableLog = HUF_minTableLog(symbolCardinality);
+ size_t optSize = ((size_t) ~0) - 1;
+ unsigned optLog = maxTableLog, optLogGuess;
+
+ DEBUGLOG(6, "HUF_optimalTableLog: probing huf depth (srcSize=%zu)", srcSize);
+
+ /* Search until size increases */
+ for (optLogGuess = minTableLog; optLogGuess <= maxTableLog; optLogGuess++) {
+ DEBUGLOG(7, "checking for huffLog=%u", optLogGuess);
+
+ { size_t maxBits = HUF_buildCTable_wksp(table, count, maxSymbolValue, optLogGuess, workSpace, wkspSize);
+ if (ERR_isError(maxBits)) continue;
+
+ if (maxBits < optLogGuess && optLogGuess > minTableLog) break;
+
+ hSize = HUF_writeCTable_wksp(dst, dstSize, table, maxSymbolValue, (U32)maxBits, workSpace, wkspSize);
+ }
+
+ if (ERR_isError(hSize)) continue;
+
+ newSize = HUF_estimateCompressedSize(table, count, maxSymbolValue) + hSize;
+
+ if (newSize > optSize + 1) {
+ break;
+ }
+
+ if (newSize < optSize) {
+ optSize = newSize;
+ optLog = optLogGuess;
+ }
+ }
+ assert(optLog <= HUF_TABLELOG_MAX);
+ return optLog;
+ }
+}
+
/* HUF_compress_internal() :
* `workSpace_align4` must be aligned on 4-bytes boundaries,
* and occupies the same space as a table of HUF_WORKSPACE_SIZE_U64 unsigned */
@@ -1177,14 +1333,14 @@ HUF_compress_internal (void* dst, size_t dstSize,
unsigned maxSymbolValue, unsigned huffLog,
HUF_nbStreams_e nbStreams,
void* workSpace, size_t wkspSize,
- HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat,
- const int bmi2, unsigned suspectUncompressible)
+ HUF_CElt* oldHufTable, HUF_repeat* repeat, int flags)
{
HUF_compress_tables_t* const table = (HUF_compress_tables_t*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(size_t));
BYTE* const ostart = (BYTE*)dst;
BYTE* const oend = ostart + dstSize;
BYTE* op = ostart;
+ DEBUGLOG(5, "HUF_compress_internal (srcSize=%zu)", srcSize);
HUF_STATIC_ASSERT(sizeof(*table) + HUF_WORKSPACE_MAX_ALIGNMENT <= HUF_WORKSPACE_SIZE);
/* checks & inits */
@@ -1198,16 +1354,17 @@ HUF_compress_internal (void* dst, size_t dstSize,
if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT;
/* Heuristic : If old table is valid, use it for small inputs */
- if (preferRepeat && repeat && *repeat == HUF_repeat_valid) {
+ if ((flags & HUF_flags_preferRepeat) && repeat && *repeat == HUF_repeat_valid) {
return HUF_compressCTable_internal(ostart, op, oend,
src, srcSize,
- nbStreams, oldHufTable, bmi2);
+ nbStreams, oldHufTable, flags);
}
/* If uncompressible data is suspected, do a smaller sampling first */
DEBUG_STATIC_ASSERT(SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO >= 2);
- if (suspectUncompressible && srcSize >= (SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE * SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO)) {
+ if ((flags & HUF_flags_suspectUncompressible) && srcSize >= (SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE * SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO)) {
size_t largestTotal = 0;
+ DEBUGLOG(5, "input suspected incompressible : sampling to check");
{ unsigned maxSymbolValueBegin = maxSymbolValue;
CHECK_V_F(largestBegin, HIST_count_simple (table->count, &maxSymbolValueBegin, (const BYTE*)src, SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) );
largestTotal += largestBegin;
@@ -1224,6 +1381,7 @@ HUF_compress_internal (void* dst, size_t dstSize,
if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */
if (largest <= (srcSize >> 7)+4) return 0; /* heuristic : probably not compressible enough */
}
+ DEBUGLOG(6, "histogram detail completed (%zu symbols)", showU32(table->count, maxSymbolValue+1));
/* Check validity of previous table */
if ( repeat
@@ -1232,25 +1390,20 @@ HUF_compress_internal (void* dst, size_t dstSize,
*repeat = HUF_repeat_none;
}
/* Heuristic : use existing table for small inputs */
- if (preferRepeat && repeat && *repeat != HUF_repeat_none) {
+ if ((flags & HUF_flags_preferRepeat) && repeat && *repeat != HUF_repeat_none) {
return HUF_compressCTable_internal(ostart, op, oend,
src, srcSize,
- nbStreams, oldHufTable, bmi2);
+ nbStreams, oldHufTable, flags);
}
/* Build Huffman Tree */
- huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
+ huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue, &table->wksps, sizeof(table->wksps), table->CTable, table->count, flags);
{ size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count,
maxSymbolValue, huffLog,
&table->wksps.buildCTable_wksp, sizeof(table->wksps.buildCTable_wksp));
CHECK_F(maxBits);
huffLog = (U32)maxBits;
- }
- /* Zero unused symbols in CTable, so we can check it for validity */
- {
- size_t const ctableSize = HUF_CTABLE_SIZE_ST(maxSymbolValue);
- size_t const unusedSize = sizeof(table->CTable) - ctableSize * sizeof(HUF_CElt);
- ZSTD_memset(table->CTable + ctableSize, 0, unusedSize);
+ DEBUGLOG(6, "bit distribution completed (%zu symbols)", showCTableBits(table->CTable + 1, maxSymbolValue+1));
}
/* Write table description header */
@@ -1263,7 +1416,7 @@ HUF_compress_internal (void* dst, size_t dstSize,
if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) {
return HUF_compressCTable_internal(ostart, op, oend,
src, srcSize,
- nbStreams, oldHufTable, bmi2);
+ nbStreams, oldHufTable, flags);
} }
/* Use the new huffman table */
@@ -1275,61 +1428,35 @@ HUF_compress_internal (void* dst, size_t dstSize,
}
return HUF_compressCTable_internal(ostart, op, oend,
src, srcSize,
- nbStreams, table->CTable, bmi2);
-}
-
-
-size_t HUF_compress1X_wksp (void* dst, size_t dstSize,
- const void* src, size_t srcSize,
- unsigned maxSymbolValue, unsigned huffLog,
- void* workSpace, size_t wkspSize)
-{
- return HUF_compress_internal(dst, dstSize, src, srcSize,
- maxSymbolValue, huffLog, HUF_singleStream,
- workSpace, wkspSize,
- NULL, NULL, 0, 0 /*bmi2*/, 0);
+ nbStreams, table->CTable, flags);
}
size_t HUF_compress1X_repeat (void* dst, size_t dstSize,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned huffLog,
void* workSpace, size_t wkspSize,
- HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat,
- int bmi2, unsigned suspectUncompressible)
+ HUF_CElt* hufTable, HUF_repeat* repeat, int flags)
{
+ DEBUGLOG(5, "HUF_compress1X_repeat (srcSize = %zu)", srcSize);
return HUF_compress_internal(dst, dstSize, src, srcSize,
maxSymbolValue, huffLog, HUF_singleStream,
workSpace, wkspSize, hufTable,
- repeat, preferRepeat, bmi2, suspectUncompressible);
-}
-
-/* HUF_compress4X_repeat():
- * compress input using 4 streams.
- * provide workspace to generate compression tables */
-size_t HUF_compress4X_wksp (void* dst, size_t dstSize,
- const void* src, size_t srcSize,
- unsigned maxSymbolValue, unsigned huffLog,
- void* workSpace, size_t wkspSize)
-{
- return HUF_compress_internal(dst, dstSize, src, srcSize,
- maxSymbolValue, huffLog, HUF_fourStreams,
- workSpace, wkspSize,
- NULL, NULL, 0, 0 /*bmi2*/, 0);
+ repeat, flags);
}
/* HUF_compress4X_repeat():
* compress input using 4 streams.
* consider skipping quickly
- * re-use an existing huffman compression table */
+ * reuse an existing huffman compression table */
size_t HUF_compress4X_repeat (void* dst, size_t dstSize,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned huffLog,
void* workSpace, size_t wkspSize,
- HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2, unsigned suspectUncompressible)
+ HUF_CElt* hufTable, HUF_repeat* repeat, int flags)
{
+ DEBUGLOG(5, "HUF_compress4X_repeat (srcSize = %zu)", srcSize);
return HUF_compress_internal(dst, dstSize, src, srcSize,
maxSymbolValue, huffLog, HUF_fourStreams,
workSpace, wkspSize,
- hufTable, repeat, preferRepeat, bmi2, suspectUncompressible);
+ hufTable, repeat, flags);
}
-