diff options
Diffstat (limited to 'lib/zstd/decompress/zstd_decompress_block.c')
| -rw-r--r-- | lib/zstd/decompress/zstd_decompress_block.c | 724 | 
1 files changed, 431 insertions, 293 deletions
| diff --git a/lib/zstd/decompress/zstd_decompress_block.c b/lib/zstd/decompress/zstd_decompress_block.c index c1913b8e7c89..710eb0ffd5a3 100644 --- a/lib/zstd/decompress/zstd_decompress_block.c +++ b/lib/zstd/decompress/zstd_decompress_block.c @@ -1,5 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause  /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates.   * All rights reserved.   *   * This source code is licensed under both the BSD-style license (found in the @@ -20,12 +21,12 @@  #include "../common/mem.h"         /* low level memory routines */  #define FSE_STATIC_LINKING_ONLY  #include "../common/fse.h" -#define HUF_STATIC_LINKING_ONLY  #include "../common/huf.h"  #include "../common/zstd_internal.h"  #include "zstd_decompress_internal.h"   /* ZSTD_DCtx */  #include "zstd_ddict.h"  /* ZSTD_DDictDictContent */  #include "zstd_decompress_block.h" +#include "../common/bits.h"  /* ZSTD_highbit32 */  /*_*******************************************************  *  Macros @@ -51,6 +52,13 @@ static void ZSTD_copy4(void* dst, const void* src) { ZSTD_memcpy(dst, src, 4); }   *   Block decoding   ***************************************************************/ +static size_t ZSTD_blockSizeMax(ZSTD_DCtx const* dctx) +{ +    size_t const blockSizeMax = dctx->isFrameDecompression ? dctx->fParams.blockSizeMax : ZSTD_BLOCKSIZE_MAX; +    assert(blockSizeMax <= ZSTD_BLOCKSIZE_MAX); +    return blockSizeMax; +} +  /*! ZSTD_getcBlockSize() :   *  Provides the size of compressed block from block header `src` */  size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, @@ -73,41 +81,49 @@ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,  static void ZSTD_allocateLiteralsBuffer(ZSTD_DCtx* dctx, void* const dst, const size_t dstCapacity, const size_t litSize,      const streaming_operation streaming, const size_t expectedWriteSize, const unsigned splitImmediately)  { -    if (streaming == not_streaming && dstCapacity > ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH + litSize + WILDCOPY_OVERLENGTH) -    { -        /* room for litbuffer to fit without read faulting */ -        dctx->litBuffer = (BYTE*)dst + ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH; +    size_t const blockSizeMax = ZSTD_blockSizeMax(dctx); +    assert(litSize <= blockSizeMax); +    assert(dctx->isFrameDecompression || streaming == not_streaming); +    assert(expectedWriteSize <= blockSizeMax); +    if (streaming == not_streaming && dstCapacity > blockSizeMax + WILDCOPY_OVERLENGTH + litSize + WILDCOPY_OVERLENGTH) { +        /* If we aren't streaming, we can just put the literals after the output +         * of the current block. We don't need to worry about overwriting the +         * extDict of our window, because it doesn't exist. +         * So if we have space after the end of the block, just put it there. +         */ +        dctx->litBuffer = (BYTE*)dst + blockSizeMax + WILDCOPY_OVERLENGTH;          dctx->litBufferEnd = dctx->litBuffer + litSize;          dctx->litBufferLocation = ZSTD_in_dst; -    } -    else if (litSize > ZSTD_LITBUFFEREXTRASIZE) -    { -        /* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */ +    } else if (litSize <= ZSTD_LITBUFFEREXTRASIZE) { +        /* Literals fit entirely within the extra buffer, put them there to avoid +         * having to split the literals. +         */ +        dctx->litBuffer = dctx->litExtraBuffer; +        dctx->litBufferEnd = dctx->litBuffer + litSize; +        dctx->litBufferLocation = ZSTD_not_in_dst; +    } else { +        assert(blockSizeMax > ZSTD_LITBUFFEREXTRASIZE); +        /* Literals must be split between the output block and the extra lit +         * buffer. We fill the extra lit buffer with the tail of the literals, +         * and put the rest of the literals at the end of the block, with +         * WILDCOPY_OVERLENGTH of buffer room to allow for overreads. +         * This MUST not write more than our maxBlockSize beyond dst, because in +         * streaming mode, that could overwrite part of our extDict window. +         */          if (splitImmediately) {              /* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */              dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH;              dctx->litBufferEnd = dctx->litBuffer + litSize - ZSTD_LITBUFFEREXTRASIZE; -        } -        else { -            /* initially this will be stored entirely in dst during huffman decoding, it will partially shifted to litExtraBuffer after */ +        } else { +            /* initially this will be stored entirely in dst during huffman decoding, it will partially be shifted to litExtraBuffer after */              dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize;              dctx->litBufferEnd = (BYTE*)dst + expectedWriteSize;          }          dctx->litBufferLocation = ZSTD_split; -    } -    else -    { -        /* fits entirely within litExtraBuffer, so no split is necessary */ -        dctx->litBuffer = dctx->litExtraBuffer; -        dctx->litBufferEnd = dctx->litBuffer + litSize; -        dctx->litBufferLocation = ZSTD_not_in_dst; +        assert(dctx->litBufferEnd <= (BYTE*)dst + expectedWriteSize);      }  } -/* Hidden declaration for fullbench */ -size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, -                          const void* src, size_t srcSize, -                          void* dst, size_t dstCapacity, const streaming_operation streaming);  /*! ZSTD_decodeLiteralsBlock() :   * Where it is possible to do so without being stomped by the output during decompression, the literals block will be stored   * in the dstBuffer.  If there is room to do so, it will be stored in full in the excess dst space after where the current @@ -116,7 +132,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,   *   * @return : nb of bytes read from src (< srcSize )   *  note : symbol not declared but exposed for fullbench */ -size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, +static size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,                            const void* src, size_t srcSize,   /* note : srcSize < BLOCKSIZE */                            void* dst, size_t dstCapacity, const streaming_operation streaming)  { @@ -124,7 +140,8 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,      RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected, "");      {   const BYTE* const istart = (const BYTE*) src; -        symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3); +        SymbolEncodingType_e const litEncType = (SymbolEncodingType_e)(istart[0] & 3); +        size_t const blockSizeMax = ZSTD_blockSizeMax(dctx);          switch(litEncType)          { @@ -134,13 +151,16 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,              ZSTD_FALLTHROUGH;          case set_compressed: -            RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3"); +            RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need up to 5 for case 3");              {   size_t lhSize, litSize, litCSize;                  U32 singleStream=0;                  U32 const lhlCode = (istart[0] >> 2) & 3;                  U32 const lhc = MEM_readLE32(istart);                  size_t hufSuccess; -                size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity); +                size_t expectedWriteSize = MIN(blockSizeMax, dstCapacity); +                int const flags = 0 +                    | (ZSTD_DCtx_get_bmi2(dctx) ? HUF_flags_bmi2 : 0) +                    | (dctx->disableHufAsm ? HUF_flags_disableAsm : 0);                  switch(lhlCode)                  {                  case 0: case 1: default:   /* note : default is impossible, since lhlCode into [0..3] */ @@ -164,7 +184,11 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,                      break;                  }                  RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled"); -                RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, ""); +                RETURN_ERROR_IF(litSize > blockSizeMax, corruption_detected, ""); +                if (!singleStream) +                    RETURN_ERROR_IF(litSize < MIN_LITERALS_FOR_4_STREAMS, literals_headerWrong, +                        "Not enough literals (%zu) for the 4-streams mode (min %u)", +                        litSize, MIN_LITERALS_FOR_4_STREAMS);                  RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, "");                  RETURN_ERROR_IF(expectedWriteSize < litSize , dstSize_tooSmall, "");                  ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 0); @@ -176,13 +200,14 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,                  if (litEncType==set_repeat) {                      if (singleStream) { -                        hufSuccess = HUF_decompress1X_usingDTable_bmi2( +                        hufSuccess = HUF_decompress1X_usingDTable(                              dctx->litBuffer, litSize, istart+lhSize, litCSize, -                            dctx->HUFptr, ZSTD_DCtx_get_bmi2(dctx)); +                            dctx->HUFptr, flags);                      } else { -                        hufSuccess = HUF_decompress4X_usingDTable_bmi2( +                        assert(litSize >= MIN_LITERALS_FOR_4_STREAMS); +                        hufSuccess = HUF_decompress4X_usingDTable(                              dctx->litBuffer, litSize, istart+lhSize, litCSize, -                            dctx->HUFptr, ZSTD_DCtx_get_bmi2(dctx)); +                            dctx->HUFptr, flags);                      }                  } else {                      if (singleStream) { @@ -190,26 +215,28 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,                          hufSuccess = HUF_decompress1X_DCtx_wksp(                              dctx->entropy.hufTable, dctx->litBuffer, litSize,                              istart+lhSize, litCSize, dctx->workspace, -                            sizeof(dctx->workspace)); +                            sizeof(dctx->workspace), flags);  #else -                        hufSuccess = HUF_decompress1X1_DCtx_wksp_bmi2( +                        hufSuccess = HUF_decompress1X1_DCtx_wksp(                              dctx->entropy.hufTable, dctx->litBuffer, litSize,                              istart+lhSize, litCSize, dctx->workspace, -                            sizeof(dctx->workspace), ZSTD_DCtx_get_bmi2(dctx)); +                            sizeof(dctx->workspace), flags);  #endif                      } else { -                        hufSuccess = HUF_decompress4X_hufOnly_wksp_bmi2( +                        hufSuccess = HUF_decompress4X_hufOnly_wksp(                              dctx->entropy.hufTable, dctx->litBuffer, litSize,                              istart+lhSize, litCSize, dctx->workspace, -                            sizeof(dctx->workspace), ZSTD_DCtx_get_bmi2(dctx)); +                            sizeof(dctx->workspace), flags);                      }                  }                  if (dctx->litBufferLocation == ZSTD_split)                  { +                    assert(litSize > ZSTD_LITBUFFEREXTRASIZE);                      ZSTD_memcpy(dctx->litExtraBuffer, dctx->litBufferEnd - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE);                      ZSTD_memmove(dctx->litBuffer + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH, dctx->litBuffer, litSize - ZSTD_LITBUFFEREXTRASIZE);                      dctx->litBuffer += ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH;                      dctx->litBufferEnd -= WILDCOPY_OVERLENGTH; +                    assert(dctx->litBufferEnd <= (BYTE*)dst + blockSizeMax);                  }                  RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, ""); @@ -224,7 +251,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,          case set_basic:              {   size_t litSize, lhSize;                  U32 const lhlCode = ((istart[0]) >> 2) & 3; -                size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity); +                size_t expectedWriteSize = MIN(blockSizeMax, dstCapacity);                  switch(lhlCode)                  {                  case 0: case 2: default:   /* note : default is impossible, since lhlCode into [0..3] */ @@ -237,11 +264,13 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,                      break;                  case 3:                      lhSize = 3; +                    RETURN_ERROR_IF(srcSize<3, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize = 3");                      litSize = MEM_readLE24(istart) >> 4;                      break;                  }                  RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled"); +                RETURN_ERROR_IF(litSize > blockSizeMax, corruption_detected, "");                  RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");                  ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1);                  if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) {  /* risk reading beyond src buffer with wildcopy */ @@ -270,7 +299,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,          case set_rle:              {   U32 const lhlCode = ((istart[0]) >> 2) & 3;                  size_t litSize, lhSize; -                size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity); +                size_t expectedWriteSize = MIN(blockSizeMax, dstCapacity);                  switch(lhlCode)                  {                  case 0: case 2: default:   /* note : default is impossible, since lhlCode into [0..3] */ @@ -279,16 +308,17 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,                      break;                  case 1:                      lhSize = 2; +                    RETURN_ERROR_IF(srcSize<3, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize+1 = 3");                      litSize = MEM_readLE16(istart) >> 4;                      break;                  case 3:                      lhSize = 3; +                    RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize+1 = 4");                      litSize = MEM_readLE24(istart) >> 4; -                    RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4");                      break;                  }                  RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled"); -                RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, ""); +                RETURN_ERROR_IF(litSize > blockSizeMax, corruption_detected, "");                  RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");                  ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1);                  if (dctx->litBufferLocation == ZSTD_split) @@ -310,6 +340,18 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,      }  } +/* Hidden declaration for fullbench */ +size_t ZSTD_decodeLiteralsBlock_wrapper(ZSTD_DCtx* dctx, +                          const void* src, size_t srcSize, +                          void* dst, size_t dstCapacity); +size_t ZSTD_decodeLiteralsBlock_wrapper(ZSTD_DCtx* dctx, +                          const void* src, size_t srcSize, +                          void* dst, size_t dstCapacity) +{ +    dctx->isFrameDecompression = 0; +    return ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, not_streaming); +} +  /* Default FSE distribution tables.   * These are pre-calculated FSE decoding tables using default distributions as defined in specification :   * https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#default-distributions @@ -317,7 +359,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,   * - start from default distributions, present in /lib/common/zstd_internal.h   * - generate tables normally, using ZSTD_buildFSETable()   * - printout the content of tables - * - pretify output, report below, test with fuzzer to ensure it's correct */ + * - prettify output, report below, test with fuzzer to ensure it's correct */  /* Default FSE distribution table for Literal Lengths */  static const ZSTD_seqSymbol LL_defaultDTable[(1<<LL_DEFAULTNORMLOG)+1] = { @@ -506,14 +548,15 @@ void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,                  for (i = 8; i < n; i += 8) {                      MEM_write64(spread + pos + i, sv);                  } -                pos += n; +                assert(n>=0); +                pos += (size_t)n;              }          }          /* Now we spread those positions across the table. -         * The benefit of doing it in two stages is that we avoid the the +         * The benefit of doing it in two stages is that we avoid the           * variable size inner loop, which caused lots of branch misses.           * Now we can run through all the positions without any branch misses. -         * We unroll the loop twice, since that is what emperically worked best. +         * We unroll the loop twice, since that is what empirically worked best.           */          {              size_t position = 0; @@ -540,7 +583,7 @@ void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,              for (i=0; i<n; i++) {                  tableDecode[position].baseValue = s;                  position = (position + step) & tableMask; -                while (position > highThreshold) position = (position + step) & tableMask;   /* lowprob area */ +                while (UNLIKELY(position > highThreshold)) position = (position + step) & tableMask;   /* lowprob area */          }   }          assert(position == 0); /* position must reach all cells once, otherwise normalizedCounter is incorrect */      } @@ -551,7 +594,7 @@ void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,          for (u=0; u<tableSize; u++) {              U32 const symbol = tableDecode[u].baseValue;              U32 const nextState = symbolNext[symbol]++; -            tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) ); +            tableDecode[u].nbBits = (BYTE) (tableLog - ZSTD_highbit32(nextState) );              tableDecode[u].nextState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);              assert(nbAdditionalBits[symbol] < 255);              tableDecode[u].nbAdditionalBits = nbAdditionalBits[symbol]; @@ -603,7 +646,7 @@ void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,   * @return : nb bytes read from src,   *           or an error code if it fails */  static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymbol** DTablePtr, -                                 symbolEncodingType_e type, unsigned max, U32 maxLog, +                                 SymbolEncodingType_e type, unsigned max, U32 maxLog,                                   const void* src, size_t srcSize,                                   const U32* baseValue, const U8* nbAdditionalBits,                                   const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable, @@ -664,11 +707,6 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,      /* SeqHead */      nbSeq = *ip++; -    if (!nbSeq) { -        *nbSeqPtr=0; -        RETURN_ERROR_IF(srcSize != 1, srcSize_wrong, ""); -        return 1; -    }      if (nbSeq > 0x7F) {          if (nbSeq == 0xFF) {              RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, ""); @@ -681,11 +719,19 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,      }      *nbSeqPtr = nbSeq; +    if (nbSeq == 0) { +        /* No sequence : section ends immediately */ +        RETURN_ERROR_IF(ip != iend, corruption_detected, +            "extraneous data present in the Sequences section"); +        return (size_t)(ip - istart); +    } +      /* FSE table descriptors */      RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong, ""); /* minimum possible size: 1 byte for symbol encoding types */ -    {   symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6); -        symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3); -        symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3); +    RETURN_ERROR_IF(*ip & 3, corruption_detected, ""); /* The last field, Reserved, must be all-zeroes. */ +    {   SymbolEncodingType_e const LLtype = (SymbolEncodingType_e)(*ip >> 6); +        SymbolEncodingType_e const OFtype = (SymbolEncodingType_e)((*ip >> 4) & 3); +        SymbolEncodingType_e const MLtype = (SymbolEncodingType_e)((*ip >> 2) & 3);          ip++;          /* Build DTables */ @@ -829,7 +875,7 @@ static void ZSTD_safecopy(BYTE* op, const BYTE* const oend_w, BYTE const* ip, pt  /* ZSTD_safecopyDstBeforeSrc():   * This version allows overlap with dst before src, or handles the non-overlap case with dst after src   * Kept separate from more common ZSTD_safecopy case to avoid performance impact to the safecopy common case */ -static void ZSTD_safecopyDstBeforeSrc(BYTE* op, BYTE const* ip, ptrdiff_t length) { +static void ZSTD_safecopyDstBeforeSrc(BYTE* op, const BYTE* ip, ptrdiff_t length) {      ptrdiff_t const diff = op - ip;      BYTE* const oend = op + length; @@ -858,6 +904,7 @@ static void ZSTD_safecopyDstBeforeSrc(BYTE* op, BYTE const* ip, ptrdiff_t length   * to be optimized for many small sequences, since those fall into ZSTD_execSequence().   */  FORCE_NOINLINE +ZSTD_ALLOW_POINTER_OVERFLOW_ATTR  size_t ZSTD_execSequenceEnd(BYTE* op,      BYTE* const oend, seq_t sequence,      const BYTE** litPtr, const BYTE* const litLimit, @@ -905,6 +952,7 @@ size_t ZSTD_execSequenceEnd(BYTE* op,   * This version is intended to be used during instances where the litBuffer is still split.  It is kept separate to avoid performance impact for the good case.   */  FORCE_NOINLINE +ZSTD_ALLOW_POINTER_OVERFLOW_ATTR  size_t ZSTD_execSequenceEndSplitLitBuffer(BYTE* op,      BYTE* const oend, const BYTE* const oend_w, seq_t sequence,      const BYTE** litPtr, const BYTE* const litLimit, @@ -950,6 +998,7 @@ size_t ZSTD_execSequenceEndSplitLitBuffer(BYTE* op,  }  HINT_INLINE +ZSTD_ALLOW_POINTER_OVERFLOW_ATTR  size_t ZSTD_execSequence(BYTE* op,      BYTE* const oend, seq_t sequence,      const BYTE** litPtr, const BYTE* const litLimit, @@ -964,6 +1013,11 @@ size_t ZSTD_execSequence(BYTE* op,      assert(op != NULL /* Precondition */);      assert(oend_w < oend /* No underflow */); + +#if defined(__aarch64__) +    /* prefetch sequence starting from match that will be used for copy later */ +    PREFETCH_L1(match); +#endif      /* Handle edge cases in a slow path:       *   - Read beyond end of literals       *   - Match end is within WILDCOPY_OVERLIMIT of oend @@ -1043,6 +1097,7 @@ size_t ZSTD_execSequence(BYTE* op,  }  HINT_INLINE +ZSTD_ALLOW_POINTER_OVERFLOW_ATTR  size_t ZSTD_execSequenceSplitLitBuffer(BYTE* op,      BYTE* const oend, const BYTE* const oend_w, seq_t sequence,      const BYTE** litPtr, const BYTE* const litLimit, @@ -1154,7 +1209,7 @@ ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, U16  }  /* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum - * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1) + * offset bits. But we can only read at most STREAM_ACCUMULATOR_MIN_32   * bits before reloading. This value is the maximum number of bytes we read   * after reloading when we are decoding long offsets.   */ @@ -1165,13 +1220,37 @@ ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, U16  typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e; +/* + * ZSTD_decodeSequence(): + * @p longOffsets : tells the decoder to reload more bit while decoding large offsets + *                  only used in 32-bit mode + * @return : Sequence (litL + matchL + offset) + */  FORCE_INLINE_TEMPLATE seq_t -ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets) +ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, const int isLastSeq)  {      seq_t seq; +    /* +     * ZSTD_seqSymbol is a 64 bits wide structure. +     * It can be loaded in one operation +     * and its fields extracted by simply shifting or bit-extracting on aarch64. +     * GCC doesn't recognize this and generates more unnecessary ldr/ldrb/ldrh +     * operations that cause performance drop. This can be avoided by using this +     * ZSTD_memcpy hack. +     */ +#if defined(__aarch64__) && (defined(__GNUC__) && !defined(__clang__)) +    ZSTD_seqSymbol llDInfoS, mlDInfoS, ofDInfoS; +    ZSTD_seqSymbol* const llDInfo = &llDInfoS; +    ZSTD_seqSymbol* const mlDInfo = &mlDInfoS; +    ZSTD_seqSymbol* const ofDInfo = &ofDInfoS; +    ZSTD_memcpy(llDInfo, seqState->stateLL.table + seqState->stateLL.state, sizeof(ZSTD_seqSymbol)); +    ZSTD_memcpy(mlDInfo, seqState->stateML.table + seqState->stateML.state, sizeof(ZSTD_seqSymbol)); +    ZSTD_memcpy(ofDInfo, seqState->stateOffb.table + seqState->stateOffb.state, sizeof(ZSTD_seqSymbol)); +#else      const ZSTD_seqSymbol* const llDInfo = seqState->stateLL.table + seqState->stateLL.state;      const ZSTD_seqSymbol* const mlDInfo = seqState->stateML.table + seqState->stateML.state;      const ZSTD_seqSymbol* const ofDInfo = seqState->stateOffb.table + seqState->stateOffb.state; +#endif      seq.matchLength = mlDInfo->baseValue;      seq.litLength = llDInfo->baseValue;      {   U32 const ofBase = ofDInfo->baseValue; @@ -1186,28 +1265,31 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)          U32 const llnbBits = llDInfo->nbBits;          U32 const mlnbBits = mlDInfo->nbBits;          U32 const ofnbBits = ofDInfo->nbBits; + +        assert(llBits <= MaxLLBits); +        assert(mlBits <= MaxMLBits); +        assert(ofBits <= MaxOff);          /*           * As gcc has better branch and block analyzers, sometimes it is only -         * valuable to mark likelyness for clang, it gives around 3-4% of +         * valuable to mark likeliness for clang, it gives around 3-4% of           * performance.           */          /* sequence */          {   size_t offset; -    #if defined(__clang__) -            if (LIKELY(ofBits > 1)) { -    #else              if (ofBits > 1) { -    #endif                  ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);                  ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5); -                assert(ofBits <= MaxOff); +                ZSTD_STATIC_ASSERT(STREAM_ACCUMULATOR_MIN_32 > LONG_OFFSETS_MAX_EXTRA_BITS_32); +                ZSTD_STATIC_ASSERT(STREAM_ACCUMULATOR_MIN_32 - LONG_OFFSETS_MAX_EXTRA_BITS_32 >= MaxMLBits);                  if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) { -                    U32 const extraBits = ofBits - MIN(ofBits, 32 - seqState->DStream.bitsConsumed); +                    /* Always read extra bits, this keeps the logic simple, +                     * avoids branches, and avoids accidentally reading 0 bits. +                     */ +                    U32 const extraBits = LONG_OFFSETS_MAX_EXTRA_BITS_32;                      offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);                      BIT_reloadDStream(&seqState->DStream); -                    if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits); -                    assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32);   /* to avoid another reload */ +                    offset += BIT_readBitsFast(&seqState->DStream, extraBits);                  } else {                      offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/);   /* <=  (ZSTD_WINDOWLOG_MAX-1) bits */                      if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); @@ -1224,7 +1306,7 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)                  } else {                      offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1);                      {   size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset]; -                        temp += !temp;   /* 0 is not valid; input is corrupted; force offset to 1 */ +                        temp -= !temp; /* 0 is not valid: input corrupted => force offset to -1 => corruption detected at execSequence */                          if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];                          seqState->prevOffset[1] = seqState->prevOffset[0];                          seqState->prevOffset[0] = offset = temp; @@ -1232,11 +1314,7 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)              seq.offset = offset;          } -    #if defined(__clang__) -        if (UNLIKELY(mlBits > 0)) -    #else          if (mlBits > 0) -    #endif              seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/);          if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32)) @@ -1246,11 +1324,7 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)          /* Ensure there are enough bits to read the rest of data in 64-bit mode. */          ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64); -    #if defined(__clang__) -        if (UNLIKELY(llBits > 0)) -    #else          if (llBits > 0) -    #endif              seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/);          if (MEM_32bits()) @@ -1259,17 +1333,22 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)          DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",                      (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset); -        ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llNext, llnbBits);    /* <=  9 bits */ -        ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlNext, mlnbBits);    /* <=  9 bits */ -        if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);    /* <= 18 bits */ -        ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofNext, ofnbBits);  /* <=  8 bits */ +        if (!isLastSeq) { +            /* don't update FSE state for last Sequence */ +            ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llNext, llnbBits);    /* <=  9 bits */ +            ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlNext, mlnbBits);    /* <=  9 bits */ +            if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);    /* <= 18 bits */ +            ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofNext, ofnbBits);  /* <=  8 bits */ +            BIT_reloadDStream(&seqState->DStream); +        }      }      return seq;  } -#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION -MEM_STATIC int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd) +#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) +#if DEBUGLEVEL >= 1 +static int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd)  {      size_t const windowSize = dctx->fParams.windowSize;      /* No dictionary used. */ @@ -1283,30 +1362,33 @@ MEM_STATIC int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefix      /* Dictionary is active. */      return 1;  } +#endif -MEM_STATIC void ZSTD_assertValidSequence( +static void ZSTD_assertValidSequence(          ZSTD_DCtx const* dctx,          BYTE const* op, BYTE const* oend,          seq_t const seq,          BYTE const* prefixStart, BYTE const* virtualStart)  {  #if DEBUGLEVEL >= 1 -    size_t const windowSize = dctx->fParams.windowSize; -    size_t const sequenceSize = seq.litLength + seq.matchLength; -    BYTE const* const oLitEnd = op + seq.litLength; -    DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u", -            (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset); -    assert(op <= oend); -    assert((size_t)(oend - op) >= sequenceSize); -    assert(sequenceSize <= ZSTD_BLOCKSIZE_MAX); -    if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) { -        size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing); -        /* Offset must be within the dictionary. */ -        assert(seq.offset <= (size_t)(oLitEnd - virtualStart)); -        assert(seq.offset <= windowSize + dictSize); -    } else { -        /* Offset must be within our window. */ -        assert(seq.offset <= windowSize); +    if (dctx->isFrameDecompression) { +        size_t const windowSize = dctx->fParams.windowSize; +        size_t const sequenceSize = seq.litLength + seq.matchLength; +        BYTE const* const oLitEnd = op + seq.litLength; +        DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u", +                (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset); +        assert(op <= oend); +        assert((size_t)(oend - op) >= sequenceSize); +        assert(sequenceSize <= ZSTD_blockSizeMax(dctx)); +        if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) { +            size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing); +            /* Offset must be within the dictionary. */ +            assert(seq.offset <= (size_t)(oLitEnd - virtualStart)); +            assert(seq.offset <= windowSize + dictSize); +        } else { +            /* Offset must be within our window. */ +            assert(seq.offset <= windowSize); +        }      }  #else      (void)dctx, (void)op, (void)oend, (void)seq, (void)prefixStart, (void)virtualStart; @@ -1322,23 +1404,21 @@ DONT_VECTORIZE  ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx,                                 void* dst, size_t maxDstSize,                           const void* seqStart, size_t seqSize, int nbSeq, -                         const ZSTD_longOffset_e isLongOffset, -                         const int frame) +                         const ZSTD_longOffset_e isLongOffset)  {      const BYTE* ip = (const BYTE*)seqStart;      const BYTE* const iend = ip + seqSize;      BYTE* const ostart = (BYTE*)dst; -    BYTE* const oend = ostart + maxDstSize; +    BYTE* const oend = ZSTD_maybeNullPtrAdd(ostart, maxDstSize);      BYTE* op = ostart;      const BYTE* litPtr = dctx->litPtr;      const BYTE* litBufferEnd = dctx->litBufferEnd;      const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);      const BYTE* const vBase = (const BYTE*) (dctx->virtualStart);      const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); -    DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer"); -    (void)frame; +    DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer (%i seqs)", nbSeq); -    /* Regen sequences */ +    /* Literals are split between internal buffer & output buffer */      if (nbSeq) {          seqState_t seqState;          dctx->fseEntropy = 1; @@ -1357,8 +1437,7 @@ ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx,                  BIT_DStream_completed < BIT_DStream_overflow);          /* decompress without overrunning litPtr begins */ -        { -            seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset); +        {   seq_t sequence = {0,0,0};  /* some static analyzer believe that @sequence is not initialized (it necessarily is, since for(;;) loop as at least one iteration) */              /* Align the decompression loop to 32 + 16 bytes.                  *                  * zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression @@ -1420,27 +1499,26 @@ ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx,  #endif              /* Handle the initial state where litBuffer is currently split between dst and litExtraBuffer */ -            for (; litPtr + sequence.litLength <= dctx->litBufferEnd; ) { -                size_t const oneSeqSize = ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence.litLength - WILDCOPY_OVERLENGTH, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd); +            for ( ; nbSeq; nbSeq--) { +                sequence = ZSTD_decodeSequence(&seqState, isLongOffset, nbSeq==1); +                if (litPtr + sequence.litLength > dctx->litBufferEnd) break; +                {   size_t const oneSeqSize = ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence.litLength - WILDCOPY_OVERLENGTH, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);  #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) -                assert(!ZSTD_isError(oneSeqSize)); -                if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); +                    assert(!ZSTD_isError(oneSeqSize)); +                    ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);  #endif -                if (UNLIKELY(ZSTD_isError(oneSeqSize))) -                    return oneSeqSize; -                DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize); -                op += oneSeqSize; -                if (UNLIKELY(!--nbSeq)) -                    break; -                BIT_reloadDStream(&(seqState.DStream)); -                sequence = ZSTD_decodeSequence(&seqState, isLongOffset); -            } +                    if (UNLIKELY(ZSTD_isError(oneSeqSize))) +                        return oneSeqSize; +                    DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize); +                    op += oneSeqSize; +            }   } +            DEBUGLOG(6, "reached: (litPtr + sequence.litLength > dctx->litBufferEnd)");              /* If there are more sequences, they will need to read literals from litExtraBuffer; copy over the remainder from dst and update litPtr and litEnd */              if (nbSeq > 0) {                  const size_t leftoverLit = dctx->litBufferEnd - litPtr; -                if (leftoverLit) -                { +                DEBUGLOG(6, "There are %i sequences left, and %zu/%zu literals left in buffer", nbSeq, leftoverLit, sequence.litLength); +                if (leftoverLit) {                      RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");                      ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);                      sequence.litLength -= leftoverLit; @@ -1449,24 +1527,22 @@ ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx,                  litPtr = dctx->litExtraBuffer;                  litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;                  dctx->litBufferLocation = ZSTD_not_in_dst; -                { -                    size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd); +                {   size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);  #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)                      assert(!ZSTD_isError(oneSeqSize)); -                    if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); +                    ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);  #endif                      if (UNLIKELY(ZSTD_isError(oneSeqSize)))                          return oneSeqSize;                      DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);                      op += oneSeqSize; -                    if (--nbSeq) -                        BIT_reloadDStream(&(seqState.DStream));                  } +                nbSeq--;              }          } -        if (nbSeq > 0) /* there is remaining lit from extra buffer */ -        { +        if (nbSeq > 0) { +            /* there is remaining lit from extra buffer */  #if defined(__x86_64__)              __asm__(".p2align 6"); @@ -1485,35 +1561,34 @@ ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx,  #  endif  #endif -            for (; ; ) { -                seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset); +            for ( ; nbSeq ; nbSeq--) { +                seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, nbSeq==1);                  size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);  #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)                  assert(!ZSTD_isError(oneSeqSize)); -                if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); +                ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);  #endif                  if (UNLIKELY(ZSTD_isError(oneSeqSize)))                      return oneSeqSize;                  DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);                  op += oneSeqSize; -                if (UNLIKELY(!--nbSeq)) -                    break; -                BIT_reloadDStream(&(seqState.DStream));              }          }          /* check if reached exact end */          DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer: after decode loop, remaining nbSeq : %i", nbSeq);          RETURN_ERROR_IF(nbSeq, corruption_detected, ""); -        RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, ""); +        DEBUGLOG(5, "bitStream : start=%p, ptr=%p, bitsConsumed=%u", seqState.DStream.start, seqState.DStream.ptr, seqState.DStream.bitsConsumed); +        RETURN_ERROR_IF(!BIT_endOfDStream(&seqState.DStream), corruption_detected, "");          /* save reps for next block */          { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }      }      /* last literal segment */ -    if (dctx->litBufferLocation == ZSTD_split)  /* split hasn't been reached yet, first get dst then copy litExtraBuffer */ -    { -        size_t const lastLLSize = litBufferEnd - litPtr; +    if (dctx->litBufferLocation == ZSTD_split) { +        /* split hasn't been reached yet, first get dst then copy litExtraBuffer */ +        size_t const lastLLSize = (size_t)(litBufferEnd - litPtr); +        DEBUGLOG(6, "copy last literals from segment : %u", (U32)lastLLSize);          RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, "");          if (op != NULL) {              ZSTD_memmove(op, litPtr, lastLLSize); @@ -1523,15 +1598,17 @@ ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx,          litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;          dctx->litBufferLocation = ZSTD_not_in_dst;      } -    {   size_t const lastLLSize = litBufferEnd - litPtr; +    /* copy last literals from internal buffer */ +    {   size_t const lastLLSize = (size_t)(litBufferEnd - litPtr); +        DEBUGLOG(6, "copy last literals from internal buffer : %u", (U32)lastLLSize);          RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");          if (op != NULL) {              ZSTD_memcpy(op, litPtr, lastLLSize);              op += lastLLSize; -        } -    } +    }   } -    return op-ostart; +    DEBUGLOG(6, "decoded block of size %u bytes", (U32)(op - ostart)); +    return (size_t)(op - ostart);  }  FORCE_INLINE_TEMPLATE size_t @@ -1539,21 +1616,19 @@ DONT_VECTORIZE  ZSTD_decompressSequences_body(ZSTD_DCtx* dctx,      void* dst, size_t maxDstSize,      const void* seqStart, size_t seqSize, int nbSeq, -    const ZSTD_longOffset_e isLongOffset, -    const int frame) +    const ZSTD_longOffset_e isLongOffset)  {      const BYTE* ip = (const BYTE*)seqStart;      const BYTE* const iend = ip + seqSize;      BYTE* const ostart = (BYTE*)dst; -    BYTE* const oend = dctx->litBufferLocation == ZSTD_not_in_dst ? ostart + maxDstSize : dctx->litBuffer; +    BYTE* const oend = dctx->litBufferLocation == ZSTD_not_in_dst ? ZSTD_maybeNullPtrAdd(ostart, maxDstSize) : dctx->litBuffer;      BYTE* op = ostart;      const BYTE* litPtr = dctx->litPtr;      const BYTE* const litEnd = litPtr + dctx->litSize;      const BYTE* const prefixStart = (const BYTE*)(dctx->prefixStart);      const BYTE* const vBase = (const BYTE*)(dctx->virtualStart);      const BYTE* const dictEnd = (const BYTE*)(dctx->dictEnd); -    DEBUGLOG(5, "ZSTD_decompressSequences_body"); -    (void)frame; +    DEBUGLOG(5, "ZSTD_decompressSequences_body: nbSeq = %d", nbSeq);      /* Regen sequences */      if (nbSeq) { @@ -1568,11 +1643,6 @@ ZSTD_decompressSequences_body(ZSTD_DCtx* dctx,          ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);          assert(dst != NULL); -        ZSTD_STATIC_ASSERT( -            BIT_DStream_unfinished < BIT_DStream_completed && -            BIT_DStream_endOfBuffer < BIT_DStream_completed && -            BIT_DStream_completed < BIT_DStream_overflow); -  #if defined(__x86_64__)              __asm__(".p2align 6");              __asm__("nop"); @@ -1587,73 +1657,70 @@ ZSTD_decompressSequences_body(ZSTD_DCtx* dctx,  #  endif  #endif -        for ( ; ; ) { -            seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset); +        for ( ; nbSeq ; nbSeq--) { +            seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, nbSeq==1);              size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);  #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)              assert(!ZSTD_isError(oneSeqSize)); -            if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); +            ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);  #endif              if (UNLIKELY(ZSTD_isError(oneSeqSize)))                  return oneSeqSize;              DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);              op += oneSeqSize; -            if (UNLIKELY(!--nbSeq)) -                break; -            BIT_reloadDStream(&(seqState.DStream));          }          /* check if reached exact end */ -        DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq); -        RETURN_ERROR_IF(nbSeq, corruption_detected, ""); -        RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, ""); +        assert(nbSeq == 0); +        RETURN_ERROR_IF(!BIT_endOfDStream(&seqState.DStream), corruption_detected, "");          /* save reps for next block */          { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }      }      /* last literal segment */ -    {   size_t const lastLLSize = litEnd - litPtr; +    {   size_t const lastLLSize = (size_t)(litEnd - litPtr); +        DEBUGLOG(6, "copy last literals : %u", (U32)lastLLSize);          RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");          if (op != NULL) {              ZSTD_memcpy(op, litPtr, lastLLSize);              op += lastLLSize; -        } -    } +    }   } -    return op-ostart; +    DEBUGLOG(6, "decoded block of size %u bytes", (U32)(op - ostart)); +    return (size_t)(op - ostart);  }  static size_t  ZSTD_decompressSequences_default(ZSTD_DCtx* dctx,                                   void* dst, size_t maxDstSize,                             const void* seqStart, size_t seqSize, int nbSeq, -                           const ZSTD_longOffset_e isLongOffset, -                           const int frame) +                           const ZSTD_longOffset_e isLongOffset)  { -    return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); +    return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);  }  static size_t  ZSTD_decompressSequencesSplitLitBuffer_default(ZSTD_DCtx* dctx,                                                 void* dst, size_t maxDstSize,                                           const void* seqStart, size_t seqSize, int nbSeq, -                                         const ZSTD_longOffset_e isLongOffset, -                                         const int frame) +                                         const ZSTD_longOffset_e isLongOffset)  { -    return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); +    return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);  }  #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */  #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT -FORCE_INLINE_TEMPLATE size_t -ZSTD_prefetchMatch(size_t prefetchPos, seq_t const sequence, +FORCE_INLINE_TEMPLATE + +size_t ZSTD_prefetchMatch(size_t prefetchPos, seq_t const sequence,                     const BYTE* const prefixStart, const BYTE* const dictEnd)  {      prefetchPos += sequence.litLength;      {   const BYTE* const matchBase = (sequence.offset > prefetchPos) ? dictEnd : prefixStart; -        const BYTE* const match = matchBase + prefetchPos - sequence.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted. -                                                                              * No consequence though : memory address is only used for prefetching, not for dereferencing */ +        /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted. +         * No consequence though : memory address is only used for prefetching, not for dereferencing */ +        const BYTE* const match = ZSTD_wrappedPtrSub(ZSTD_wrappedPtrAdd(matchBase, prefetchPos), sequence.offset);          PREFETCH_L1(match); PREFETCH_L1(match+CACHELINE_SIZE);   /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */      }      return prefetchPos + sequence.matchLength; @@ -1668,20 +1735,18 @@ ZSTD_decompressSequencesLong_body(                                 ZSTD_DCtx* dctx,                                 void* dst, size_t maxDstSize,                           const void* seqStart, size_t seqSize, int nbSeq, -                         const ZSTD_longOffset_e isLongOffset, -                         const int frame) +                         const ZSTD_longOffset_e isLongOffset)  {      const BYTE* ip = (const BYTE*)seqStart;      const BYTE* const iend = ip + seqSize;      BYTE* const ostart = (BYTE*)dst; -    BYTE* const oend = dctx->litBufferLocation == ZSTD_in_dst ? dctx->litBuffer : ostart + maxDstSize; +    BYTE* const oend = dctx->litBufferLocation == ZSTD_in_dst ? dctx->litBuffer : ZSTD_maybeNullPtrAdd(ostart, maxDstSize);      BYTE* op = ostart;      const BYTE* litPtr = dctx->litPtr;      const BYTE* litBufferEnd = dctx->litBufferEnd;      const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);      const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart);      const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); -    (void)frame;      /* Regen sequences */      if (nbSeq) { @@ -1706,20 +1771,17 @@ ZSTD_decompressSequencesLong_body(          ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);          /* prepare in advance */ -        for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) { -            seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset); +        for (seqNb=0; seqNb<seqAdvance; seqNb++) { +            seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, seqNb == nbSeq-1);              prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);              sequences[seqNb] = sequence;          } -        RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected, "");          /* decompress without stomping litBuffer */ -        for (; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb < nbSeq); seqNb++) { -            seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset); -            size_t oneSeqSize; +        for (; seqNb < nbSeq; seqNb++) { +            seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset, seqNb == nbSeq-1); -            if (dctx->litBufferLocation == ZSTD_split && litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength > dctx->litBufferEnd) -            { +            if (dctx->litBufferLocation == ZSTD_split && litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength > dctx->litBufferEnd) {                  /* lit buffer is reaching split point, empty out the first buffer and transition to litExtraBuffer */                  const size_t leftoverLit = dctx->litBufferEnd - litPtr;                  if (leftoverLit) @@ -1732,26 +1794,26 @@ ZSTD_decompressSequencesLong_body(                  litPtr = dctx->litExtraBuffer;                  litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;                  dctx->litBufferLocation = ZSTD_not_in_dst; -                oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd); +                {   size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);  #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) -                assert(!ZSTD_isError(oneSeqSize)); -                if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart); +                    assert(!ZSTD_isError(oneSeqSize)); +                    ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);  #endif -                if (ZSTD_isError(oneSeqSize)) return oneSeqSize; +                    if (ZSTD_isError(oneSeqSize)) return oneSeqSize; -                prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd); -                sequences[seqNb & STORED_SEQS_MASK] = sequence; -                op += oneSeqSize; -            } +                    prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd); +                    sequences[seqNb & STORED_SEQS_MASK] = sequence; +                    op += oneSeqSize; +            }   }              else              {                  /* lit buffer is either wholly contained in first or second split, or not split at all*/ -                oneSeqSize = dctx->litBufferLocation == ZSTD_split ? +                size_t const oneSeqSize = dctx->litBufferLocation == ZSTD_split ?                      ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength - WILDCOPY_OVERLENGTH, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd) :                      ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);  #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)                  assert(!ZSTD_isError(oneSeqSize)); -                if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart); +                ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);  #endif                  if (ZSTD_isError(oneSeqSize)) return oneSeqSize; @@ -1760,17 +1822,15 @@ ZSTD_decompressSequencesLong_body(                  op += oneSeqSize;              }          } -        RETURN_ERROR_IF(seqNb<nbSeq, corruption_detected, ""); +        RETURN_ERROR_IF(!BIT_endOfDStream(&seqState.DStream), corruption_detected, "");          /* finish queue */          seqNb -= seqAdvance;          for ( ; seqNb<nbSeq ; seqNb++) {              seq_t *sequence = &(sequences[seqNb&STORED_SEQS_MASK]); -            if (dctx->litBufferLocation == ZSTD_split && litPtr + sequence->litLength > dctx->litBufferEnd) -            { +            if (dctx->litBufferLocation == ZSTD_split && litPtr + sequence->litLength > dctx->litBufferEnd) {                  const size_t leftoverLit = dctx->litBufferEnd - litPtr; -                if (leftoverLit) -                { +                if (leftoverLit) {                      RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");                      ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);                      sequence->litLength -= leftoverLit; @@ -1779,11 +1839,10 @@ ZSTD_decompressSequencesLong_body(                  litPtr = dctx->litExtraBuffer;                  litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;                  dctx->litBufferLocation = ZSTD_not_in_dst; -                { -                    size_t const oneSeqSize = ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd); +                {   size_t const oneSeqSize = ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);  #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)                      assert(!ZSTD_isError(oneSeqSize)); -                    if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart); +                    ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);  #endif                      if (ZSTD_isError(oneSeqSize)) return oneSeqSize;                      op += oneSeqSize; @@ -1796,7 +1855,7 @@ ZSTD_decompressSequencesLong_body(                      ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);  #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)                  assert(!ZSTD_isError(oneSeqSize)); -                if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart); +                ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);  #endif                  if (ZSTD_isError(oneSeqSize)) return oneSeqSize;                  op += oneSeqSize; @@ -1808,8 +1867,7 @@ ZSTD_decompressSequencesLong_body(      }      /* last literal segment */ -    if (dctx->litBufferLocation == ZSTD_split)  /* first deplete literal buffer in dst, then copy litExtraBuffer */ -    { +    if (dctx->litBufferLocation == ZSTD_split) { /* first deplete literal buffer in dst, then copy litExtraBuffer */          size_t const lastLLSize = litBufferEnd - litPtr;          RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, "");          if (op != NULL) { @@ -1827,17 +1885,16 @@ ZSTD_decompressSequencesLong_body(          }      } -    return op-ostart; +    return (size_t)(op - ostart);  }  static size_t  ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,                                   void* dst, size_t maxDstSize,                             const void* seqStart, size_t seqSize, int nbSeq, -                           const ZSTD_longOffset_e isLongOffset, -                           const int frame) +                           const ZSTD_longOffset_e isLongOffset)  { -    return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); +    return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);  }  #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ @@ -1851,20 +1908,18 @@ DONT_VECTORIZE  ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,                                   void* dst, size_t maxDstSize,                             const void* seqStart, size_t seqSize, int nbSeq, -                           const ZSTD_longOffset_e isLongOffset, -                           const int frame) +                           const ZSTD_longOffset_e isLongOffset)  { -    return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); +    return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);  }  static BMI2_TARGET_ATTRIBUTE size_t  DONT_VECTORIZE  ZSTD_decompressSequencesSplitLitBuffer_bmi2(ZSTD_DCtx* dctx,                                   void* dst, size_t maxDstSize,                             const void* seqStart, size_t seqSize, int nbSeq, -                           const ZSTD_longOffset_e isLongOffset, -                           const int frame) +                           const ZSTD_longOffset_e isLongOffset)  { -    return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); +    return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);  }  #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ @@ -1873,50 +1928,40 @@ static BMI2_TARGET_ATTRIBUTE size_t  ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx,                                   void* dst, size_t maxDstSize,                             const void* seqStart, size_t seqSize, int nbSeq, -                           const ZSTD_longOffset_e isLongOffset, -                           const int frame) +                           const ZSTD_longOffset_e isLongOffset)  { -    return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); +    return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);  }  #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */  #endif /* DYNAMIC_BMI2 */ -typedef size_t (*ZSTD_decompressSequences_t)( -                            ZSTD_DCtx* dctx, -                            void* dst, size_t maxDstSize, -                            const void* seqStart, size_t seqSize, int nbSeq, -                            const ZSTD_longOffset_e isLongOffset, -                            const int frame); -  #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG  static size_t  ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,                     const void* seqStart, size_t seqSize, int nbSeq, -                   const ZSTD_longOffset_e isLongOffset, -                   const int frame) +                   const ZSTD_longOffset_e isLongOffset)  {      DEBUGLOG(5, "ZSTD_decompressSequences");  #if DYNAMIC_BMI2      if (ZSTD_DCtx_get_bmi2(dctx)) { -        return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); +        return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);      }  #endif -    return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); +    return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);  }  static size_t  ZSTD_decompressSequencesSplitLitBuffer(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,                                   const void* seqStart, size_t seqSize, int nbSeq, -                                 const ZSTD_longOffset_e isLongOffset, -                                 const int frame) +                                 const ZSTD_longOffset_e isLongOffset)  {      DEBUGLOG(5, "ZSTD_decompressSequencesSplitLitBuffer");  #if DYNAMIC_BMI2      if (ZSTD_DCtx_get_bmi2(dctx)) { -        return ZSTD_decompressSequencesSplitLitBuffer_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); +        return ZSTD_decompressSequencesSplitLitBuffer_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);      }  #endif -    return ZSTD_decompressSequencesSplitLitBuffer_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); +    return ZSTD_decompressSequencesSplitLitBuffer_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);  }  #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ @@ -1931,69 +1976,114 @@ static size_t  ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,                               void* dst, size_t maxDstSize,                               const void* seqStart, size_t seqSize, int nbSeq, -                             const ZSTD_longOffset_e isLongOffset, -                             const int frame) +                             const ZSTD_longOffset_e isLongOffset)  {      DEBUGLOG(5, "ZSTD_decompressSequencesLong");  #if DYNAMIC_BMI2      if (ZSTD_DCtx_get_bmi2(dctx)) { -        return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); +        return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);      }  #endif -  return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); +  return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);  }  #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ +/* + * @returns The total size of the history referenceable by zstd, including + * both the prefix and the extDict. At @p op any offset larger than this + * is invalid. + */ +static size_t ZSTD_totalHistorySize(BYTE* op, BYTE const* virtualStart) +{ +    return (size_t)(op - virtualStart); +} + +typedef struct { +    unsigned longOffsetShare; +    unsigned maxNbAdditionalBits; +} ZSTD_OffsetInfo; -#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ -    !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) -/* ZSTD_getLongOffsetsShare() : +/* ZSTD_getOffsetInfo() :   * condition : offTable must be valid   * @return : "share" of long offsets (arbitrarily defined as > (1<<23)) - *           compared to maximum possible of (1<<OffFSELog) */ -static unsigned -ZSTD_getLongOffsetsShare(const ZSTD_seqSymbol* offTable) + *           compared to maximum possible of (1<<OffFSELog), + *           as well as the maximum number additional bits required. + */ +static ZSTD_OffsetInfo +ZSTD_getOffsetInfo(const ZSTD_seqSymbol* offTable, int nbSeq)  { -    const void* ptr = offTable; -    U32 const tableLog = ((const ZSTD_seqSymbol_header*)ptr)[0].tableLog; -    const ZSTD_seqSymbol* table = offTable + 1; -    U32 const max = 1 << tableLog; -    U32 u, total = 0; -    DEBUGLOG(5, "ZSTD_getLongOffsetsShare: (tableLog=%u)", tableLog); - -    assert(max <= (1 << OffFSELog));  /* max not too large */ -    for (u=0; u<max; u++) { -        if (table[u].nbAdditionalBits > 22) total += 1; +    ZSTD_OffsetInfo info = {0, 0}; +    /* If nbSeq == 0, then the offTable is uninitialized, but we have +     * no sequences, so both values should be 0. +     */ +    if (nbSeq != 0) { +        const void* ptr = offTable; +        U32 const tableLog = ((const ZSTD_seqSymbol_header*)ptr)[0].tableLog; +        const ZSTD_seqSymbol* table = offTable + 1; +        U32 const max = 1 << tableLog; +        U32 u; +        DEBUGLOG(5, "ZSTD_getLongOffsetsShare: (tableLog=%u)", tableLog); + +        assert(max <= (1 << OffFSELog));  /* max not too large */ +        for (u=0; u<max; u++) { +            info.maxNbAdditionalBits = MAX(info.maxNbAdditionalBits, table[u].nbAdditionalBits); +            if (table[u].nbAdditionalBits > 22) info.longOffsetShare += 1; +        } + +        assert(tableLog <= OffFSELog); +        info.longOffsetShare <<= (OffFSELog - tableLog);  /* scale to OffFSELog */      } -    assert(tableLog <= OffFSELog); -    total <<= (OffFSELog - tableLog);  /* scale to OffFSELog */ +    return info; +} -    return total; +/* + * @returns The maximum offset we can decode in one read of our bitstream, without + * reloading more bits in the middle of the offset bits read. Any offsets larger + * than this must use the long offset decoder. + */ +static size_t ZSTD_maxShortOffset(void) +{ +    if (MEM_64bits()) { +        /* We can decode any offset without reloading bits. +         * This might change if the max window size grows. +         */ +        ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31); +        return (size_t)-1; +    } else { +        /* The maximum offBase is (1 << (STREAM_ACCUMULATOR_MIN + 1)) - 1. +         * This offBase would require STREAM_ACCUMULATOR_MIN extra bits. +         * Then we have to subtract ZSTD_REP_NUM to get the maximum possible offset. +         */ +        size_t const maxOffbase = ((size_t)1 << (STREAM_ACCUMULATOR_MIN + 1)) - 1; +        size_t const maxOffset = maxOffbase - ZSTD_REP_NUM; +        assert(ZSTD_highbit32((U32)maxOffbase) == STREAM_ACCUMULATOR_MIN); +        return maxOffset; +    }  } -#endif  size_t  ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,                                void* dst, size_t dstCapacity, -                        const void* src, size_t srcSize, const int frame, const streaming_operation streaming) +                        const void* src, size_t srcSize, const streaming_operation streaming)  {   /* blockType == blockCompressed */      const BYTE* ip = (const BYTE*)src; -    /* isLongOffset must be true if there are long offsets. -     * Offsets are long if they are larger than 2^STREAM_ACCUMULATOR_MIN. -     * We don't expect that to be the case in 64-bit mode. -     * In block mode, window size is not known, so we have to be conservative. -     * (note: but it could be evaluated from current-lowLimit) -     */ -    ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN)))); -    DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize); - -    RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong, ""); +    DEBUGLOG(5, "ZSTD_decompressBlock_internal (cSize : %u)", (unsigned)srcSize); + +    /* Note : the wording of the specification +     * allows compressed block to be sized exactly ZSTD_blockSizeMax(dctx). +     * This generally does not happen, as it makes little sense, +     * since an uncompressed block would feature same size and have no decompression cost. +     * Also, note that decoder from reference libzstd before < v1.5.4 +     * would consider this edge case as an error. +     * As a consequence, avoid generating compressed blocks of size ZSTD_blockSizeMax(dctx) +     * for broader compatibility with the deployed ecosystem of zstd decoders */ +    RETURN_ERROR_IF(srcSize > ZSTD_blockSizeMax(dctx), srcSize_wrong, "");      /* Decode literals section */      {   size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, streaming); -        DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : %u", (U32)litCSize); +        DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : cSize=%u, nbLiterals=%zu", (U32)litCSize, dctx->litSize);          if (ZSTD_isError(litCSize)) return litCSize;          ip += litCSize;          srcSize -= litCSize; @@ -2001,6 +2091,23 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,      /* Build Decoding Tables */      { +        /* Compute the maximum block size, which must also work when !frame and fParams are unset. +         * Additionally, take the min with dstCapacity to ensure that the totalHistorySize fits in a size_t. +         */ +        size_t const blockSizeMax = MIN(dstCapacity, ZSTD_blockSizeMax(dctx)); +        size_t const totalHistorySize = ZSTD_totalHistorySize(ZSTD_maybeNullPtrAdd((BYTE*)dst, blockSizeMax), (BYTE const*)dctx->virtualStart); +        /* isLongOffset must be true if there are long offsets. +         * Offsets are long if they are larger than ZSTD_maxShortOffset(). +         * We don't expect that to be the case in 64-bit mode. +         * +         * We check here to see if our history is large enough to allow long offsets. +         * If it isn't, then we can't possible have (valid) long offsets. If the offset +         * is invalid, then it is okay to read it incorrectly. +         * +         * If isLongOffsets is true, then we will later check our decoding table to see +         * if it is even possible to generate long offsets. +         */ +        ZSTD_longOffset_e isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (totalHistorySize > ZSTD_maxShortOffset()));          /* These macros control at build-time which decompressor implementation           * we use. If neither is defined, we do some inspection and dispatch at           * runtime. @@ -2008,6 +2115,11 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,  #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \      !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)          int usePrefetchDecoder = dctx->ddictIsCold; +#else +        /* Set to 1 to avoid computing offset info if we don't need to. +         * Otherwise this value is ignored. +         */ +        int usePrefetchDecoder = 1;  #endif          int nbSeq;          size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize); @@ -2015,40 +2127,55 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,          ip += seqHSize;          srcSize -= seqHSize; -        RETURN_ERROR_IF(dst == NULL && nbSeq > 0, dstSize_tooSmall, "NULL not handled"); +        RETURN_ERROR_IF((dst == NULL || dstCapacity == 0) && nbSeq > 0, dstSize_tooSmall, "NULL not handled"); +        RETURN_ERROR_IF(MEM_64bits() && sizeof(size_t) == sizeof(void*) && (size_t)(-1) - (size_t)dst < (size_t)(1 << 20), dstSize_tooSmall, +                "invalid dst"); -#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ -    !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) -        if ( !usePrefetchDecoder -          && (!frame || (dctx->fParams.windowSize > (1<<24))) -          && (nbSeq>ADVANCED_SEQS) ) {  /* could probably use a larger nbSeq limit */ -            U32 const shareLongOffsets = ZSTD_getLongOffsetsShare(dctx->OFTptr); -            U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */ -            usePrefetchDecoder = (shareLongOffsets >= minShare); +        /* If we could potentially have long offsets, or we might want to use the prefetch decoder, +         * compute information about the share of long offsets, and the maximum nbAdditionalBits. +         * NOTE: could probably use a larger nbSeq limit +         */ +        if (isLongOffset || (!usePrefetchDecoder && (totalHistorySize > (1u << 24)) && (nbSeq > 8))) { +            ZSTD_OffsetInfo const info = ZSTD_getOffsetInfo(dctx->OFTptr, nbSeq); +            if (isLongOffset && info.maxNbAdditionalBits <= STREAM_ACCUMULATOR_MIN) { +                /* If isLongOffset, but the maximum number of additional bits that we see in our table is small +                 * enough, then we know it is impossible to have too long an offset in this block, so we can +                 * use the regular offset decoder. +                 */ +                isLongOffset = ZSTD_lo_isRegularOffset; +            } +            if (!usePrefetchDecoder) { +                U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */ +                usePrefetchDecoder = (info.longOffsetShare >= minShare); +            }          } -#endif          dctx->ddictIsCold = 0;  #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \      !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) -        if (usePrefetchDecoder) +        if (usePrefetchDecoder) { +#else +        (void)usePrefetchDecoder; +        {  #endif  #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT -            return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame); +            return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);  #endif +        }  #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG          /* else */          if (dctx->litBufferLocation == ZSTD_split) -            return ZSTD_decompressSequencesSplitLitBuffer(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame); +            return ZSTD_decompressSequencesSplitLitBuffer(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);          else -            return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame); +            return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);  #endif      }  } +ZSTD_ALLOW_POINTER_OVERFLOW_ATTR  void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize)  {      if (dst != dctx->previousDstEnd && dstSize > 0) {   /* not contiguous */ @@ -2060,13 +2187,24 @@ void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize)  } -size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, -                            void* dst, size_t dstCapacity, -                      const void* src, size_t srcSize) +size_t ZSTD_decompressBlock_deprecated(ZSTD_DCtx* dctx, +                                       void* dst, size_t dstCapacity, +                                 const void* src, size_t srcSize)  {      size_t dSize; +    dctx->isFrameDecompression = 0;      ZSTD_checkContinuity(dctx, dst, dstCapacity); -    dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0, not_streaming); +    dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, not_streaming); +    FORWARD_IF_ERROR(dSize, "");      dctx->previousDstEnd = (char*)dst + dSize;      return dSize;  } + + +/* NOTE: Must just wrap ZSTD_decompressBlock_deprecated() */ +size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, +                            void* dst, size_t dstCapacity, +                      const void* src, size_t srcSize) +{ +    return ZSTD_decompressBlock_deprecated(dctx, dst, dstCapacity, src, srcSize); +} | 
