VeraCrypt
aboutsummaryrefslogtreecommitdiff
path: root/src/Common/lzma
diff options
context:
space:
mode:
Diffstat (limited to 'src/Common/lzma')
-rw-r--r--src/Common/lzma/7zTypes.h597
-rw-r--r--src/Common/lzma/7zWindows.h101
-rw-r--r--src/Common/lzma/Alloc.c535
-rw-r--r--src/Common/lzma/Alloc.h71
-rw-r--r--src/Common/lzma/Compiler.h159
-rw-r--r--src/Common/lzma/CpuArch.c823
-rw-r--r--src/Common/lzma/CpuArch.h523
-rw-r--r--src/Common/lzma/LzFind.c1717
-rw-r--r--src/Common/lzma/LzFind.h159
-rw-r--r--src/Common/lzma/LzFindMt.c1406
-rw-r--r--src/Common/lzma/LzFindMt.h109
-rw-r--r--src/Common/lzma/LzFindOpt.c578
-rw-r--r--src/Common/lzma/LzHash.h34
-rw-r--r--src/Common/lzma/LzmaDec.c1363
-rw-r--r--src/Common/lzma/LzmaDec.h237
-rw-r--r--src/Common/lzma/LzmaEnc.c3144
-rw-r--r--src/Common/lzma/LzmaEnc.h83
-rw-r--r--src/Common/lzma/LzmaLib.c42
-rw-r--r--src/Common/lzma/LzmaLib.h138
-rw-r--r--src/Common/lzma/Precomp.h10
-rw-r--r--src/Common/lzma/Threads.c562
-rw-r--r--src/Common/lzma/Threads.h240
-rw-r--r--src/Common/lzma/lzma-history.txt560
-rw-r--r--src/Common/lzma/lzma-sdk.txt404
24 files changed, 13595 insertions, 0 deletions
diff --git a/src/Common/lzma/7zTypes.h b/src/Common/lzma/7zTypes.h
new file mode 100644
index 00000000..1fcb2473
--- /dev/null
+++ b/src/Common/lzma/7zTypes.h
@@ -0,0 +1,597 @@
+/* 7zTypes.h -- Basic types
+2023-04-02 : Igor Pavlov : Public domain */
+
+#ifndef ZIP7_7Z_TYPES_H
+#define ZIP7_7Z_TYPES_H
+
+#ifdef _WIN32
+/* #include <windows.h> */
+#else
+#include <errno.h>
+#endif
+
+#include <stddef.h>
+
+#ifndef EXTERN_C_BEGIN
+#ifdef __cplusplus
+#define EXTERN_C_BEGIN extern "C" {
+#define EXTERN_C_END }
+#else
+#define EXTERN_C_BEGIN
+#define EXTERN_C_END
+#endif
+#endif
+
+EXTERN_C_BEGIN
+
+#define SZ_OK 0
+
+#define SZ_ERROR_DATA 1
+#define SZ_ERROR_MEM 2
+#define SZ_ERROR_CRC 3
+#define SZ_ERROR_UNSUPPORTED 4
+#define SZ_ERROR_PARAM 5
+#define SZ_ERROR_INPUT_EOF 6
+#define SZ_ERROR_OUTPUT_EOF 7
+#define SZ_ERROR_READ 8
+#define SZ_ERROR_WRITE 9
+#define SZ_ERROR_PROGRESS 10
+#define SZ_ERROR_FAIL 11
+#define SZ_ERROR_THREAD 12
+
+#define SZ_ERROR_ARCHIVE 16
+#define SZ_ERROR_NO_ARCHIVE 17
+
+typedef int SRes;
+
+
+#ifdef _MSC_VER
+ #if _MSC_VER > 1200
+ #define MY_ALIGN(n) __declspec(align(n))
+ #else
+ #define MY_ALIGN(n)
+ #endif
+#else
+ /*
+ // C11/C++11:
+ #include <stdalign.h>
+ #define MY_ALIGN(n) alignas(n)
+ */
+ #define MY_ALIGN(n) __attribute__ ((aligned(n)))
+#endif
+
+
+#ifdef _WIN32
+
+/* typedef DWORD WRes; */
+typedef unsigned WRes;
+#define MY_SRes_HRESULT_FROM_WRes(x) HRESULT_FROM_WIN32(x)
+
+// #define MY_HRES_ERROR_INTERNAL_ERROR MY_SRes_HRESULT_FROM_WRes(ERROR_INTERNAL_ERROR)
+
+#else // _WIN32
+
+// #define ENV_HAVE_LSTAT
+typedef int WRes;
+
+// (FACILITY_ERRNO = 0x800) is 7zip's FACILITY constant to represent (errno) errors in HRESULT
+#define MY_FACILITY_ERRNO 0x800
+#define MY_FACILITY_WIN32 7
+#define MY_FACILITY_WRes MY_FACILITY_ERRNO
+
+#define MY_HRESULT_FROM_errno_CONST_ERROR(x) ((HRESULT)( \
+ ( (HRESULT)(x) & 0x0000FFFF) \
+ | (MY_FACILITY_WRes << 16) \
+ | (HRESULT)0x80000000 ))
+
+#define MY_SRes_HRESULT_FROM_WRes(x) \
+ ((HRESULT)(x) <= 0 ? ((HRESULT)(x)) : MY_HRESULT_FROM_errno_CONST_ERROR(x))
+
+// we call macro HRESULT_FROM_WIN32 for system errors (WRes) that are (errno)
+#define HRESULT_FROM_WIN32(x) MY_SRes_HRESULT_FROM_WRes(x)
+
+/*
+#define ERROR_FILE_NOT_FOUND 2L
+#define ERROR_ACCESS_DENIED 5L
+#define ERROR_NO_MORE_FILES 18L
+#define ERROR_LOCK_VIOLATION 33L
+#define ERROR_FILE_EXISTS 80L
+#define ERROR_DISK_FULL 112L
+#define ERROR_NEGATIVE_SEEK 131L
+#define ERROR_ALREADY_EXISTS 183L
+#define ERROR_DIRECTORY 267L
+#define ERROR_TOO_MANY_POSTS 298L
+
+#define ERROR_INTERNAL_ERROR 1359L
+#define ERROR_INVALID_REPARSE_DATA 4392L
+#define ERROR_REPARSE_TAG_INVALID 4393L
+#define ERROR_REPARSE_TAG_MISMATCH 4394L
+*/
+
+// we use errno equivalents for some WIN32 errors:
+
+#define ERROR_INVALID_PARAMETER EINVAL
+#define ERROR_INVALID_FUNCTION EINVAL
+#define ERROR_ALREADY_EXISTS EEXIST
+#define ERROR_FILE_EXISTS EEXIST
+#define ERROR_PATH_NOT_FOUND ENOENT
+#define ERROR_FILE_NOT_FOUND ENOENT
+#define ERROR_DISK_FULL ENOSPC
+// #define ERROR_INVALID_HANDLE EBADF
+
+// we use FACILITY_WIN32 for errors that has no errno equivalent
+// Too many posts were made to a semaphore.
+#define ERROR_TOO_MANY_POSTS ((HRESULT)0x8007012AL)
+#define ERROR_INVALID_REPARSE_DATA ((HRESULT)0x80071128L)
+#define ERROR_REPARSE_TAG_INVALID ((HRESULT)0x80071129L)
+
+// if (MY_FACILITY_WRes != FACILITY_WIN32),
+// we use FACILITY_WIN32 for COM errors:
+#define E_OUTOFMEMORY ((HRESULT)0x8007000EL)
+#define E_INVALIDARG ((HRESULT)0x80070057L)
+#define MY_E_ERROR_NEGATIVE_SEEK ((HRESULT)0x80070083L)
+
+/*
+// we can use FACILITY_ERRNO for some COM errors, that have errno equivalents:
+#define E_OUTOFMEMORY MY_HRESULT_FROM_errno_CONST_ERROR(ENOMEM)
+#define E_INVALIDARG MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL)
+#define MY_E_ERROR_NEGATIVE_SEEK MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL)
+*/
+
+#define TEXT(quote) quote
+
+#define FILE_ATTRIBUTE_READONLY 0x0001
+#define FILE_ATTRIBUTE_HIDDEN 0x0002
+#define FILE_ATTRIBUTE_SYSTEM 0x0004
+#define FILE_ATTRIBUTE_DIRECTORY 0x0010
+#define FILE_ATTRIBUTE_ARCHIVE 0x0020
+#define FILE_ATTRIBUTE_DEVICE 0x0040
+#define FILE_ATTRIBUTE_NORMAL 0x0080
+#define FILE_ATTRIBUTE_TEMPORARY 0x0100
+#define FILE_ATTRIBUTE_SPARSE_FILE 0x0200
+#define FILE_ATTRIBUTE_REPARSE_POINT 0x0400
+#define FILE_ATTRIBUTE_COMPRESSED 0x0800
+#define FILE_ATTRIBUTE_OFFLINE 0x1000
+#define FILE_ATTRIBUTE_NOT_CONTENT_INDEXED 0x2000
+#define FILE_ATTRIBUTE_ENCRYPTED 0x4000
+
+#define FILE_ATTRIBUTE_UNIX_EXTENSION 0x8000 /* trick for Unix */
+
+#endif
+
+
+#ifndef RINOK
+#define RINOK(x) { const int _result_ = (x); if (_result_ != 0) return _result_; }
+#endif
+
+#ifndef RINOK_WRes
+#define RINOK_WRes(x) { const WRes _result_ = (x); if (_result_ != 0) return _result_; }
+#endif
+
+typedef unsigned char Byte;
+typedef short Int16;
+typedef unsigned short UInt16;
+
+#ifdef Z7_DECL_Int32_AS_long
+typedef long Int32;
+typedef unsigned long UInt32;
+#else
+typedef int Int32;
+typedef unsigned int UInt32;
+#endif
+
+
+#ifndef _WIN32
+
+typedef int INT;
+typedef Int32 INT32;
+typedef unsigned int UINT;
+typedef UInt32 UINT32;
+typedef INT32 LONG; // LONG, ULONG and DWORD must be 32-bit for _WIN32 compatibility
+typedef UINT32 ULONG;
+
+#undef DWORD
+typedef UINT32 DWORD;
+
+#define VOID void
+
+#define HRESULT LONG
+
+typedef void *LPVOID;
+// typedef void VOID;
+// typedef ULONG_PTR DWORD_PTR, *PDWORD_PTR;
+// gcc / clang on Unix : sizeof(long==sizeof(void*) in 32 or 64 bits)
+typedef long INT_PTR;
+typedef unsigned long UINT_PTR;
+typedef long LONG_PTR;
+typedef unsigned long DWORD_PTR;
+
+typedef size_t SIZE_T;
+
+#endif // _WIN32
+
+
+#define MY_HRES_ERROR_INTERNAL_ERROR ((HRESULT)0x8007054FL)
+
+
+#ifdef Z7_DECL_Int64_AS_long
+
+typedef long Int64;
+typedef unsigned long UInt64;
+
+#else
+
+#if (defined(_MSC_VER) || defined(__BORLANDC__)) && !defined(__clang__)
+typedef __int64 Int64;
+typedef unsigned __int64 UInt64;
+#else
+#if defined(__clang__) || defined(__GNUC__)
+#include <stdint.h>
+typedef int64_t Int64;
+typedef uint64_t UInt64;
+#else
+typedef long long int Int64;
+typedef unsigned long long int UInt64;
+// #define UINT64_CONST(n) n ## ULL
+#endif
+#endif
+
+#endif
+
+#define UINT64_CONST(n) n
+
+
+#ifdef Z7_DECL_SizeT_AS_unsigned_int
+typedef unsigned int SizeT;
+#else
+typedef size_t SizeT;
+#endif
+
+/*
+#if (defined(_MSC_VER) && _MSC_VER <= 1200)
+typedef size_t MY_uintptr_t;
+#else
+#include <stdint.h>
+typedef uintptr_t MY_uintptr_t;
+#endif
+*/
+
+typedef int BoolInt;
+/* typedef BoolInt Bool; */
+#define True 1
+#define False 0
+
+
+#ifdef _WIN32
+#define Z7_STDCALL __stdcall
+#else
+#define Z7_STDCALL
+#endif
+
+#ifdef _MSC_VER
+
+#if _MSC_VER >= 1300
+#define Z7_NO_INLINE __declspec(noinline)
+#else
+#define Z7_NO_INLINE
+#endif
+
+#define Z7_FORCE_INLINE __forceinline
+
+#define Z7_CDECL __cdecl
+#define Z7_FASTCALL __fastcall
+
+#else // _MSC_VER
+
+#if (defined(__GNUC__) && (__GNUC__ >= 4)) \
+ || (defined(__clang__) && (__clang_major__ >= 4)) \
+ || defined(__INTEL_COMPILER) \
+ || defined(__xlC__)
+#define Z7_NO_INLINE __attribute__((noinline))
+#define Z7_FORCE_INLINE __attribute__((always_inline)) inline
+#else
+#define Z7_NO_INLINE
+#define Z7_FORCE_INLINE
+#endif
+
+#define Z7_CDECL
+
+#if defined(_M_IX86) \
+ || defined(__i386__)
+// #define Z7_FASTCALL __attribute__((fastcall))
+// #define Z7_FASTCALL __attribute__((cdecl))
+#define Z7_FASTCALL
+#elif defined(MY_CPU_AMD64)
+// #define Z7_FASTCALL __attribute__((ms_abi))
+#define Z7_FASTCALL
+#else
+#define Z7_FASTCALL
+#endif
+
+#endif // _MSC_VER
+
+
+/* The following interfaces use first parameter as pointer to structure */
+
+// #define Z7_C_IFACE_CONST_QUAL
+#define Z7_C_IFACE_CONST_QUAL const
+
+#define Z7_C_IFACE_DECL(a) \
+ struct a ## _; \
+ typedef Z7_C_IFACE_CONST_QUAL struct a ## _ * a ## Ptr; \
+ typedef struct a ## _ a; \
+ struct a ## _
+
+
+Z7_C_IFACE_DECL (IByteIn)
+{
+ Byte (*Read)(IByteInPtr p); /* reads one byte, returns 0 in case of EOF or error */
+};
+#define IByteIn_Read(p) (p)->Read(p)
+
+
+Z7_C_IFACE_DECL (IByteOut)
+{
+ void (*Write)(IByteOutPtr p, Byte b);
+};
+#define IByteOut_Write(p, b) (p)->Write(p, b)
+
+
+Z7_C_IFACE_DECL (ISeqInStream)
+{
+ SRes (*Read)(ISeqInStreamPtr p, void *buf, size_t *size);
+ /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream.
+ (output(*size) < input(*size)) is allowed */
+};
+#define ISeqInStream_Read(p, buf, size) (p)->Read(p, buf, size)
+
+/* try to read as much as avail in stream and limited by (*processedSize) */
+SRes SeqInStream_ReadMax(ISeqInStreamPtr stream, void *buf, size_t *processedSize);
+/* it can return SZ_ERROR_INPUT_EOF */
+// SRes SeqInStream_Read(ISeqInStreamPtr stream, void *buf, size_t size);
+// SRes SeqInStream_Read2(ISeqInStreamPtr stream, void *buf, size_t size, SRes errorType);
+SRes SeqInStream_ReadByte(ISeqInStreamPtr stream, Byte *buf);
+
+
+Z7_C_IFACE_DECL (ISeqOutStream)
+{
+ size_t (*Write)(ISeqOutStreamPtr p, const void *buf, size_t size);
+ /* Returns: result - the number of actually written bytes.
+ (result < size) means error */
+};
+#define ISeqOutStream_Write(p, buf, size) (p)->Write(p, buf, size)
+
+typedef enum
+{
+ SZ_SEEK_SET = 0,
+ SZ_SEEK_CUR = 1,
+ SZ_SEEK_END = 2
+} ESzSeek;
+
+
+Z7_C_IFACE_DECL (ISeekInStream)
+{
+ SRes (*Read)(ISeekInStreamPtr p, void *buf, size_t *size); /* same as ISeqInStream::Read */
+ SRes (*Seek)(ISeekInStreamPtr p, Int64 *pos, ESzSeek origin);
+};
+#define ISeekInStream_Read(p, buf, size) (p)->Read(p, buf, size)
+#define ISeekInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin)
+
+
+Z7_C_IFACE_DECL (ILookInStream)
+{
+ SRes (*Look)(ILookInStreamPtr p, const void **buf, size_t *size);
+ /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream.
+ (output(*size) > input(*size)) is not allowed
+ (output(*size) < input(*size)) is allowed */
+ SRes (*Skip)(ILookInStreamPtr p, size_t offset);
+ /* offset must be <= output(*size) of Look */
+ SRes (*Read)(ILookInStreamPtr p, void *buf, size_t *size);
+ /* reads directly (without buffer). It's same as ISeqInStream::Read */
+ SRes (*Seek)(ILookInStreamPtr p, Int64 *pos, ESzSeek origin);
+};
+
+#define ILookInStream_Look(p, buf, size) (p)->Look(p, buf, size)
+#define ILookInStream_Skip(p, offset) (p)->Skip(p, offset)
+#define ILookInStream_Read(p, buf, size) (p)->Read(p, buf, size)
+#define ILookInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin)
+
+
+SRes LookInStream_LookRead(ILookInStreamPtr stream, void *buf, size_t *size);
+SRes LookInStream_SeekTo(ILookInStreamPtr stream, UInt64 offset);
+
+/* reads via ILookInStream::Read */
+SRes LookInStream_Read2(ILookInStreamPtr stream, void *buf, size_t size, SRes errorType);
+SRes LookInStream_Read(ILookInStreamPtr stream, void *buf, size_t size);
+
+
+typedef struct
+{
+ ILookInStream vt;
+ ISeekInStreamPtr realStream;
+
+ size_t pos;
+ size_t size; /* it's data size */
+
+ /* the following variables must be set outside */
+ Byte *buf;
+ size_t bufSize;
+} CLookToRead2;
+
+void LookToRead2_CreateVTable(CLookToRead2 *p, int lookahead);
+
+#define LookToRead2_INIT(p) { (p)->pos = (p)->size = 0; }
+
+
+typedef struct
+{
+ ISeqInStream vt;
+ ILookInStreamPtr realStream;
+} CSecToLook;
+
+void SecToLook_CreateVTable(CSecToLook *p);
+
+
+
+typedef struct
+{
+ ISeqInStream vt;
+ ILookInStreamPtr realStream;
+} CSecToRead;
+
+void SecToRead_CreateVTable(CSecToRead *p);
+
+
+Z7_C_IFACE_DECL (ICompressProgress)
+{
+ SRes (*Progress)(ICompressProgressPtr p, UInt64 inSize, UInt64 outSize);
+ /* Returns: result. (result != SZ_OK) means break.
+ Value (UInt64)(Int64)-1 for size means unknown value. */
+};
+
+#define ICompressProgress_Progress(p, inSize, outSize) (p)->Progress(p, inSize, outSize)
+
+
+
+typedef struct ISzAlloc ISzAlloc;
+typedef const ISzAlloc * ISzAllocPtr;
+
+struct ISzAlloc
+{
+ void *(*Alloc)(ISzAllocPtr p, size_t size);
+ void (*Free)(ISzAllocPtr p, void *address); /* address can be 0 */
+};
+
+#define ISzAlloc_Alloc(p, size) (p)->Alloc(p, size)
+#define ISzAlloc_Free(p, a) (p)->Free(p, a)
+
+/* deprecated */
+#define IAlloc_Alloc(p, size) ISzAlloc_Alloc(p, size)
+#define IAlloc_Free(p, a) ISzAlloc_Free(p, a)
+
+
+
+
+
+#ifndef MY_offsetof
+ #ifdef offsetof
+ #define MY_offsetof(type, m) offsetof(type, m)
+ /*
+ #define MY_offsetof(type, m) FIELD_OFFSET(type, m)
+ */
+ #else
+ #define MY_offsetof(type, m) ((size_t)&(((type *)0)->m))
+ #endif
+#endif
+
+
+
+#ifndef Z7_container_of
+
+/*
+#define Z7_container_of(ptr, type, m) container_of(ptr, type, m)
+#define Z7_container_of(ptr, type, m) CONTAINING_RECORD(ptr, type, m)
+#define Z7_container_of(ptr, type, m) ((type *)((char *)(ptr) - offsetof(type, m)))
+#define Z7_container_of(ptr, type, m) (&((type *)0)->m == (ptr), ((type *)(((char *)(ptr)) - MY_offsetof(type, m))))
+*/
+
+/*
+ GCC shows warning: "perhaps the 'offsetof' macro was used incorrectly"
+ GCC 3.4.4 : classes with constructor
+ GCC 4.8.1 : classes with non-public variable members"
+*/
+
+#define Z7_container_of(ptr, type, m) \
+ ((type *)(void *)((char *)(void *) \
+ (1 ? (ptr) : &((type *)NULL)->m) - MY_offsetof(type, m)))
+
+#define Z7_container_of_CONST(ptr, type, m) \
+ ((const type *)(const void *)((const char *)(const void *) \
+ (1 ? (ptr) : &((type *)NULL)->m) - MY_offsetof(type, m)))
+
+/*
+#define Z7_container_of_NON_CONST_FROM_CONST(ptr, type, m) \
+ ((type *)(void *)(const void *)((const char *)(const void *) \
+ (1 ? (ptr) : &((type *)NULL)->m) - MY_offsetof(type, m)))
+*/
+
+#endif
+
+#define Z7_CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) ((type *)(void *)(ptr))
+
+// #define Z7_CONTAINER_FROM_VTBL(ptr, type, m) Z7_CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m)
+#define Z7_CONTAINER_FROM_VTBL(ptr, type, m) Z7_container_of(ptr, type, m)
+// #define Z7_CONTAINER_FROM_VTBL(ptr, type, m) Z7_container_of_NON_CONST_FROM_CONST(ptr, type, m)
+
+#define Z7_CONTAINER_FROM_VTBL_CONST(ptr, type, m) Z7_container_of_CONST(ptr, type, m)
+
+#define Z7_CONTAINER_FROM_VTBL_CLS(ptr, type, m) Z7_CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m)
+/*
+#define Z7_CONTAINER_FROM_VTBL_CLS(ptr, type, m) Z7_CONTAINER_FROM_VTBL(ptr, type, m)
+*/
+#if defined (__clang__) || defined(__GNUC__)
+#define Z7_DIAGNOSCTIC_IGNORE_BEGIN_CAST_QUAL \
+ _Pragma("GCC diagnostic push") \
+ _Pragma("GCC diagnostic ignored \"-Wcast-qual\"")
+#define Z7_DIAGNOSCTIC_IGNORE_END_CAST_QUAL \
+ _Pragma("GCC diagnostic pop")
+#else
+#define Z7_DIAGNOSCTIC_IGNORE_BEGIN_CAST_QUAL
+#define Z7_DIAGNOSCTIC_IGNORE_END_CAST_QUAL
+#endif
+
+#define Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR(ptr, type, m, p) \
+ Z7_DIAGNOSCTIC_IGNORE_BEGIN_CAST_QUAL \
+ type *p = Z7_CONTAINER_FROM_VTBL(ptr, type, m); \
+ Z7_DIAGNOSCTIC_IGNORE_END_CAST_QUAL
+
+#define Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(type) \
+ Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR(pp, type, vt, p)
+
+
+// #define ZIP7_DECLARE_HANDLE(name) typedef void *name;
+#define Z7_DECLARE_HANDLE(name) struct name##_dummy{int unused;}; typedef struct name##_dummy *name;
+
+
+#define Z7_memset_0_ARRAY(a) memset((a), 0, sizeof(a))
+
+#ifndef Z7_ARRAY_SIZE
+#define Z7_ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
+#endif
+
+
+#ifdef _WIN32
+
+#define CHAR_PATH_SEPARATOR '\\'
+#define WCHAR_PATH_SEPARATOR L'\\'
+#define STRING_PATH_SEPARATOR "\\"
+#define WSTRING_PATH_SEPARATOR L"\\"
+
+#else
+
+#define CHAR_PATH_SEPARATOR '/'
+#define WCHAR_PATH_SEPARATOR L'/'
+#define STRING_PATH_SEPARATOR "/"
+#define WSTRING_PATH_SEPARATOR L"/"
+
+#endif
+
+#define k_PropVar_TimePrec_0 0
+#define k_PropVar_TimePrec_Unix 1
+#define k_PropVar_TimePrec_DOS 2
+#define k_PropVar_TimePrec_HighPrec 3
+#define k_PropVar_TimePrec_Base 16
+#define k_PropVar_TimePrec_100ns (k_PropVar_TimePrec_Base + 7)
+#define k_PropVar_TimePrec_1ns (k_PropVar_TimePrec_Base + 9)
+
+EXTERN_C_END
+
+#endif
+
+/*
+#ifndef Z7_ST
+#ifdef _7ZIP_ST
+#define Z7_ST
+#endif
+#endif
+*/
diff --git a/src/Common/lzma/7zWindows.h b/src/Common/lzma/7zWindows.h
new file mode 100644
index 00000000..42c6db8b
--- /dev/null
+++ b/src/Common/lzma/7zWindows.h
@@ -0,0 +1,101 @@
+/* 7zWindows.h -- StdAfx
+2023-04-02 : Igor Pavlov : Public domain */
+
+#ifndef ZIP7_INC_7Z_WINDOWS_H
+#define ZIP7_INC_7Z_WINDOWS_H
+
+#ifdef _WIN32
+
+#if defined(__clang__)
+# pragma clang diagnostic push
+#endif
+
+#if defined(_MSC_VER)
+
+#pragma warning(push)
+#pragma warning(disable : 4668) // '_WIN32_WINNT' is not defined as a preprocessor macro, replacing with '0' for '#if/#elif'
+
+#if _MSC_VER == 1900
+// for old kit10 versions
+// #pragma warning(disable : 4255) // winuser.h(13979): warning C4255: 'GetThreadDpiAwarenessContext':
+#endif
+// win10 Windows Kit:
+#endif // _MSC_VER
+
+#if defined(_MSC_VER) && _MSC_VER <= 1200 && !defined(_WIN64)
+// for msvc6 without sdk2003
+#define RPC_NO_WINDOWS_H
+#endif
+
+#if defined(__MINGW32__) || defined(__MINGW64__)
+// #if defined(__GNUC__) && !defined(__clang__)
+#include <windows.h>
+#else
+#include <Windows.h>
+#endif
+// #include <basetsd.h>
+// #include <wtypes.h>
+
+// but if precompiled with clang-cl then we need
+// #include <windows.h>
+#if defined(_MSC_VER)
+#pragma warning(pop)
+#endif
+
+#if defined(__clang__)
+# pragma clang diagnostic pop
+#endif
+
+#if defined(_MSC_VER) && _MSC_VER <= 1200 && !defined(_WIN64)
+#ifndef _W64
+
+typedef long LONG_PTR, *PLONG_PTR;
+typedef unsigned long ULONG_PTR, *PULONG_PTR;
+typedef ULONG_PTR DWORD_PTR, *PDWORD_PTR;
+
+#define Z7_OLD_WIN_SDK
+#endif // _W64
+#endif // _MSC_VER == 1200
+
+#ifdef Z7_OLD_WIN_SDK
+
+#ifndef INVALID_FILE_ATTRIBUTES
+#define INVALID_FILE_ATTRIBUTES ((DWORD)-1)
+#endif
+#ifndef INVALID_SET_FILE_POINTER
+#define INVALID_SET_FILE_POINTER ((DWORD)-1)
+#endif
+#ifndef FILE_SPECIAL_ACCESS
+#define FILE_SPECIAL_ACCESS (FILE_ANY_ACCESS)
+#endif
+
+// ShlObj.h:
+// #define BIF_NEWDIALOGSTYLE 0x0040
+
+#pragma warning(disable : 4201)
+// #pragma warning(disable : 4115)
+
+#undef VARIANT_TRUE
+#define VARIANT_TRUE ((VARIANT_BOOL)-1)
+#endif
+
+#endif // Z7_OLD_WIN_SDK
+
+#ifdef UNDER_CE
+#undef VARIANT_TRUE
+#define VARIANT_TRUE ((VARIANT_BOOL)-1)
+#endif
+
+
+#if defined(_MSC_VER)
+#if _MSC_VER >= 1400 && _MSC_VER <= 1600
+ // BaseTsd.h(148) : 'HandleToULong' : unreferenced inline function has been removed
+ // string.h
+ // #pragma warning(disable : 4514)
+#endif
+#endif
+
+
+/* #include "7zTypes.h" */
+
+#endif
diff --git a/src/Common/lzma/Alloc.c b/src/Common/lzma/Alloc.c
new file mode 100644
index 00000000..d841bf20
--- /dev/null
+++ b/src/Common/lzma/Alloc.c
@@ -0,0 +1,535 @@
+/* Alloc.c -- Memory allocation functions
+2023-04-02 : Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+#ifdef _WIN32
+#include "7zWindows.h"
+#endif
+#include <stdlib.h>
+
+#include "Alloc.h"
+
+#ifdef _WIN32
+#ifdef Z7_LARGE_PAGES
+#if defined(__clang__) || defined(__GNUC__)
+typedef void (*Z7_voidFunction)(void);
+#define MY_CAST_FUNC (Z7_voidFunction)
+#elif defined(_MSC_VER) && _MSC_VER > 1920
+#define MY_CAST_FUNC (void *)
+// #pragma warning(disable : 4191) // 'type cast': unsafe conversion from 'FARPROC' to 'void (__cdecl *)()'
+#else
+#define MY_CAST_FUNC
+#endif
+#endif // Z7_LARGE_PAGES
+#endif // _WIN32
+
+// #define SZ_ALLOC_DEBUG
+/* #define SZ_ALLOC_DEBUG */
+
+/* use SZ_ALLOC_DEBUG to debug alloc/free operations */
+#ifdef SZ_ALLOC_DEBUG
+
+#include <string.h>
+#include <stdio.h>
+static int g_allocCount = 0;
+#ifdef _WIN32
+static int g_allocCountMid = 0;
+static int g_allocCountBig = 0;
+#endif
+
+
+#define CONVERT_INT_TO_STR(charType, tempSize) \
+ char temp[tempSize]; unsigned i = 0; \
+ while (val >= 10) { temp[i++] = (char)('0' + (unsigned)(val % 10)); val /= 10; } \
+ *s++ = (charType)('0' + (unsigned)val); \
+ while (i != 0) { i--; *s++ = temp[i]; } \
+ *s = 0;
+
+static void ConvertUInt64ToString(UInt64 val, char *s)
+{
+ CONVERT_INT_TO_STR(char, 24)
+}
+
+#define GET_HEX_CHAR(t) ((char)(((t < 10) ? ('0' + t) : ('A' + (t - 10)))))
+
+static void ConvertUInt64ToHex(UInt64 val, char *s)
+{
+ UInt64 v = val;
+ unsigned i;
+ for (i = 1;; i++)
+ {
+ v >>= 4;
+ if (v == 0)
+ break;
+ }
+ s[i] = 0;
+ do
+ {
+ unsigned t = (unsigned)(val & 0xF);
+ val >>= 4;
+ s[--i] = GET_HEX_CHAR(t);
+ }
+ while (i);
+}
+
+#define DEBUG_OUT_STREAM stderr
+
+static void Print(const char *s)
+{
+ fputs(s, DEBUG_OUT_STREAM);
+}
+
+static void PrintAligned(const char *s, size_t align)
+{
+ size_t len = strlen(s);
+ for(;;)
+ {
+ fputc(' ', DEBUG_OUT_STREAM);
+ if (len >= align)
+ break;
+ ++len;
+ }
+ Print(s);
+}
+
+static void PrintLn(void)
+{
+ Print("\n");
+}
+
+static void PrintHex(UInt64 v, size_t align)
+{
+ char s[32];
+ ConvertUInt64ToHex(v, s);
+ PrintAligned(s, align);
+}
+
+static void PrintDec(int v, size_t align)
+{
+ char s[32];
+ ConvertUInt64ToString((unsigned)v, s);
+ PrintAligned(s, align);
+}
+
+static void PrintAddr(void *p)
+{
+ PrintHex((UInt64)(size_t)(ptrdiff_t)p, 12);
+}
+
+
+#define PRINT_REALLOC(name, cnt, size, ptr) { \
+ Print(name " "); \
+ if (!ptr) PrintDec(cnt++, 10); \
+ PrintHex(size, 10); \
+ PrintAddr(ptr); \
+ PrintLn(); }
+
+#define PRINT_ALLOC(name, cnt, size, ptr) { \
+ Print(name " "); \
+ PrintDec(cnt++, 10); \
+ PrintHex(size, 10); \
+ PrintAddr(ptr); \
+ PrintLn(); }
+
+#define PRINT_FREE(name, cnt, ptr) if (ptr) { \
+ Print(name " "); \
+ PrintDec(--cnt, 10); \
+ PrintAddr(ptr); \
+ PrintLn(); }
+
+#else
+
+#ifdef _WIN32
+#define PRINT_ALLOC(name, cnt, size, ptr)
+#endif
+#define PRINT_FREE(name, cnt, ptr)
+#define Print(s)
+#define PrintLn()
+#define PrintHex(v, align)
+#define PrintAddr(p)
+
+#endif
+
+
+/*
+by specification:
+ malloc(non_NULL, 0) : returns NULL or a unique pointer value that can later be successfully passed to free()
+ realloc(NULL, size) : the call is equivalent to malloc(size)
+ realloc(non_NULL, 0) : the call is equivalent to free(ptr)
+
+in main compilers:
+ malloc(0) : returns non_NULL
+ realloc(NULL, 0) : returns non_NULL
+ realloc(non_NULL, 0) : returns NULL
+*/
+
+
+void *MyAlloc(size_t size)
+{
+ if (size == 0)
+ return NULL;
+ // PRINT_ALLOC("Alloc ", g_allocCount, size, NULL)
+ #ifdef SZ_ALLOC_DEBUG
+ {
+ void *p = malloc(size);
+ if (p)
+ {
+ PRINT_ALLOC("Alloc ", g_allocCount, size, p)
+ }
+ return p;
+ }
+ #else
+ return malloc(size);
+ #endif
+}
+
+void MyFree(void *address)
+{
+ PRINT_FREE("Free ", g_allocCount, address)
+
+ free(address);
+}
+
+void *MyRealloc(void *address, size_t size)
+{
+ if (size == 0)
+ {
+ MyFree(address);
+ return NULL;
+ }
+ // PRINT_REALLOC("Realloc ", g_allocCount, size, address)
+ #ifdef SZ_ALLOC_DEBUG
+ {
+ void *p = realloc(address, size);
+ if (p)
+ {
+ PRINT_REALLOC("Realloc ", g_allocCount, size, address)
+ }
+ return p;
+ }
+ #else
+ return realloc(address, size);
+ #endif
+}
+
+
+#ifdef _WIN32
+
+void *MidAlloc(size_t size)
+{
+ if (size == 0)
+ return NULL;
+ #ifdef SZ_ALLOC_DEBUG
+ {
+ void *p = VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE);
+ if (p)
+ {
+ PRINT_ALLOC("Alloc-Mid", g_allocCountMid, size, p)
+ }
+ return p;
+ }
+ #else
+ return VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE);
+ #endif
+}
+
+void MidFree(void *address)
+{
+ PRINT_FREE("Free-Mid", g_allocCountMid, address)
+
+ if (!address)
+ return;
+ VirtualFree(address, 0, MEM_RELEASE);
+}
+
+#ifdef Z7_LARGE_PAGES
+
+#ifdef MEM_LARGE_PAGES
+ #define MY__MEM_LARGE_PAGES MEM_LARGE_PAGES
+#else
+ #define MY__MEM_LARGE_PAGES 0x20000000
+#endif
+
+extern
+SIZE_T g_LargePageSize;
+SIZE_T g_LargePageSize = 0;
+typedef SIZE_T (WINAPI *Func_GetLargePageMinimum)(VOID);
+
+void SetLargePageSize(void)
+{
+ #ifdef Z7_LARGE_PAGES
+ SIZE_T size;
+ const
+ Func_GetLargePageMinimum fn =
+ (Func_GetLargePageMinimum) MY_CAST_FUNC GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")),
+ "GetLargePageMinimum");
+ if (!fn)
+ return;
+ size = fn();
+ if (size == 0 || (size & (size - 1)) != 0)
+ return;
+ g_LargePageSize = size;
+ #endif
+}
+
+#endif // Z7_LARGE_PAGES
+
+void *BigAlloc(size_t size)
+{
+ if (size == 0)
+ return NULL;
+
+ PRINT_ALLOC("Alloc-Big", g_allocCountBig, size, NULL)
+
+ #ifdef Z7_LARGE_PAGES
+ {
+ SIZE_T ps = g_LargePageSize;
+ if (ps != 0 && ps <= (1 << 30) && size > (ps / 2))
+ {
+ size_t size2;
+ ps--;
+ size2 = (size + ps) & ~ps;
+ if (size2 >= size)
+ {
+ void *p = VirtualAlloc(NULL, size2, MEM_COMMIT | MY__MEM_LARGE_PAGES, PAGE_READWRITE);
+ if (p)
+ {
+ PRINT_ALLOC("Alloc-BM ", g_allocCountMid, size2, p)
+ return p;
+ }
+ }
+ }
+ }
+ #endif
+
+ return MidAlloc(size);
+}
+
+void BigFree(void *address)
+{
+ PRINT_FREE("Free-Big", g_allocCountBig, address)
+ MidFree(address);
+}
+
+#endif // _WIN32
+
+
+static void *SzAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p) return MyAlloc(size); }
+static void SzFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p) MyFree(address); }
+const ISzAlloc g_Alloc = { SzAlloc, SzFree };
+
+#ifdef _WIN32
+static void *SzMidAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p) return MidAlloc(size); }
+static void SzMidFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p) MidFree(address); }
+static void *SzBigAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p) return BigAlloc(size); }
+static void SzBigFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p) BigFree(address); }
+const ISzAlloc g_MidAlloc = { SzMidAlloc, SzMidFree };
+const ISzAlloc g_BigAlloc = { SzBigAlloc, SzBigFree };
+#endif
+
+/*
+ uintptr_t : <stdint.h> C99 (optional)
+ : unsupported in VS6
+*/
+
+#ifdef _WIN32
+ typedef UINT_PTR UIntPtr;
+#else
+ /*
+ typedef uintptr_t UIntPtr;
+ */
+ typedef ptrdiff_t UIntPtr;
+#endif
+
+
+#define ADJUST_ALLOC_SIZE 0
+/*
+#define ADJUST_ALLOC_SIZE (sizeof(void *) - 1)
+*/
+/*
+ Use (ADJUST_ALLOC_SIZE = (sizeof(void *) - 1)), if
+ MyAlloc() can return address that is NOT multiple of sizeof(void *).
+*/
+
+
+/*
+#define MY_ALIGN_PTR_DOWN(p, align) ((void *)((char *)(p) - ((size_t)(UIntPtr)(p) & ((align) - 1))))
+*/
+#define MY_ALIGN_PTR_DOWN(p, align) ((void *)((((UIntPtr)(p)) & ~((UIntPtr)(align) - 1))))
+
+
+#if !defined(_WIN32) && defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE >= 200112L)
+ #define USE_posix_memalign
+#endif
+
+#ifndef USE_posix_memalign
+#define MY_ALIGN_PTR_UP_PLUS(p, align) MY_ALIGN_PTR_DOWN(((char *)(p) + (align) + ADJUST_ALLOC_SIZE), align)
+#endif
+
+/*
+ This posix_memalign() is for test purposes only.
+ We also need special Free() function instead of free(),
+ if this posix_memalign() is used.
+*/
+
+/*
+static int posix_memalign(void **ptr, size_t align, size_t size)
+{
+ size_t newSize = size + align;
+ void *p;
+ void *pAligned;
+ *ptr = NULL;
+ if (newSize < size)
+ return 12; // ENOMEM
+ p = MyAlloc(newSize);
+ if (!p)
+ return 12; // ENOMEM
+ pAligned = MY_ALIGN_PTR_UP_PLUS(p, align);
+ ((void **)pAligned)[-1] = p;
+ *ptr = pAligned;
+ return 0;
+}
+*/
+
+/*
+ ALLOC_ALIGN_SIZE >= sizeof(void *)
+ ALLOC_ALIGN_SIZE >= cache_line_size
+*/
+
+#define ALLOC_ALIGN_SIZE ((size_t)1 << 7)
+
+static void *SzAlignedAlloc(ISzAllocPtr pp, size_t size)
+{
+ #ifndef USE_posix_memalign
+
+ void *p;
+ void *pAligned;
+ size_t newSize;
+ UNUSED_VAR(pp)
+
+ /* also we can allocate additional dummy ALLOC_ALIGN_SIZE bytes after aligned
+ block to prevent cache line sharing with another allocated blocks */
+
+ newSize = size + ALLOC_ALIGN_SIZE * 1 + ADJUST_ALLOC_SIZE;
+ if (newSize < size)
+ return NULL;
+
+ p = MyAlloc(newSize);
+
+ if (!p)
+ return NULL;
+ pAligned = MY_ALIGN_PTR_UP_PLUS(p, ALLOC_ALIGN_SIZE);
+
+ Print(" size="); PrintHex(size, 8);
+ Print(" a_size="); PrintHex(newSize, 8);
+ Print(" ptr="); PrintAddr(p);
+ Print(" a_ptr="); PrintAddr(pAligned);
+ PrintLn();
+
+ ((void **)pAligned)[-1] = p;
+
+ return pAligned;
+
+ #else
+
+ void *p;
+ UNUSED_VAR(pp)
+ if (posix_memalign(&p, ALLOC_ALIGN_SIZE, size))
+ return NULL;
+
+ Print(" posix_memalign="); PrintAddr(p);
+ PrintLn();
+
+ return p;
+
+ #endif
+}
+
+
+static void SzAlignedFree(ISzAllocPtr pp, void *address)
+{
+ UNUSED_VAR(pp)
+ #ifndef USE_posix_memalign
+ if (address)
+ MyFree(((void **)address)[-1]);
+ #else
+ free(address);
+ #endif
+}
+
+
+const ISzAlloc g_AlignedAlloc = { SzAlignedAlloc, SzAlignedFree };
+
+
+
+#define MY_ALIGN_PTR_DOWN_1(p) MY_ALIGN_PTR_DOWN(p, sizeof(void *))
+
+/* we align ptr to support cases where CAlignOffsetAlloc::offset is not multiply of sizeof(void *) */
+#define REAL_BLOCK_PTR_VAR(p) ((void **)MY_ALIGN_PTR_DOWN_1(p))[-1]
+/*
+#define REAL_BLOCK_PTR_VAR(p) ((void **)(p))[-1]
+*/
+
+static void *AlignOffsetAlloc_Alloc(ISzAllocPtr pp, size_t size)
+{
+ const CAlignOffsetAlloc *p = Z7_CONTAINER_FROM_VTBL_CONST(pp, CAlignOffsetAlloc, vt);
+ void *adr;
+ void *pAligned;
+ size_t newSize;
+ size_t extra;
+ size_t alignSize = (size_t)1 << p->numAlignBits;
+
+ if (alignSize < sizeof(void *))
+ alignSize = sizeof(void *);
+
+ if (p->offset >= alignSize)
+ return NULL;
+
+ /* also we can allocate additional dummy ALLOC_ALIGN_SIZE bytes after aligned
+ block to prevent cache line sharing with another allocated blocks */
+ extra = p->offset & (sizeof(void *) - 1);
+ newSize = size + alignSize + extra + ADJUST_ALLOC_SIZE;
+ if (newSize < size)
+ return NULL;
+
+ adr = ISzAlloc_Alloc(p->baseAlloc, newSize);
+
+ if (!adr)
+ return NULL;
+
+ pAligned = (char *)MY_ALIGN_PTR_DOWN((char *)adr +
+ alignSize - p->offset + extra + ADJUST_ALLOC_SIZE, alignSize) + p->offset;
+
+ PrintLn();
+ Print("- Aligned: ");
+ Print(" size="); PrintHex(size, 8);
+ Print(" a_size="); PrintHex(newSize, 8);
+ Print(" ptr="); PrintAddr(adr);
+ Print(" a_ptr="); PrintAddr(pAligned);
+ PrintLn();
+
+ REAL_BLOCK_PTR_VAR(pAligned) = adr;
+
+ return pAligned;
+}
+
+
+static void AlignOffsetAlloc_Free(ISzAllocPtr pp, void *address)
+{
+ if (address)
+ {
+ const CAlignOffsetAlloc *p = Z7_CONTAINER_FROM_VTBL_CONST(pp, CAlignOffsetAlloc, vt);
+ PrintLn();
+ Print("- Aligned Free: ");
+ PrintLn();
+ ISzAlloc_Free(p->baseAlloc, REAL_BLOCK_PTR_VAR(address));
+ }
+}
+
+
+void AlignOffsetAlloc_CreateVTable(CAlignOffsetAlloc *p)
+{
+ p->vt.Alloc = AlignOffsetAlloc_Alloc;
+ p->vt.Free = AlignOffsetAlloc_Free;
+}
diff --git a/src/Common/lzma/Alloc.h b/src/Common/lzma/Alloc.h
new file mode 100644
index 00000000..fac5b62f
--- /dev/null
+++ b/src/Common/lzma/Alloc.h
@@ -0,0 +1,71 @@
+/* Alloc.h -- Memory allocation functions
+2023-03-04 : Igor Pavlov : Public domain */
+
+#ifndef ZIP7_INC_ALLOC_H
+#define ZIP7_INC_ALLOC_H
+
+#include "7zTypes.h"
+
+EXTERN_C_BEGIN
+
+/*
+ MyFree(NULL) : is allowed, as free(NULL)
+ MyAlloc(0) : returns NULL : but malloc(0) is allowed to return NULL or non_NULL
+ MyRealloc(NULL, 0) : returns NULL : but realloc(NULL, 0) is allowed to return NULL or non_NULL
+MyRealloc() is similar to realloc() for the following cases:
+ MyRealloc(non_NULL, 0) : returns NULL and always calls MyFree(ptr)
+ MyRealloc(NULL, non_ZERO) : returns NULL, if allocation failed
+ MyRealloc(non_NULL, non_ZERO) : returns NULL, if reallocation failed
+*/
+
+void *MyAlloc(size_t size);
+void MyFree(void *address);
+void *MyRealloc(void *address, size_t size);
+
+#ifdef _WIN32
+
+#ifdef Z7_LARGE_PAGES
+void SetLargePageSize(void);
+#endif
+
+void *MidAlloc(size_t size);
+void MidFree(void *address);
+void *BigAlloc(size_t size);
+void BigFree(void *address);
+
+#else
+
+#define MidAlloc(size) MyAlloc(size)
+#define MidFree(address) MyFree(address)
+#define BigAlloc(size) MyAlloc(size)
+#define BigFree(address) MyFree(address)
+
+#endif
+
+extern const ISzAlloc g_Alloc;
+
+#ifdef _WIN32
+extern const ISzAlloc g_BigAlloc;
+extern const ISzAlloc g_MidAlloc;
+#else
+#define g_BigAlloc g_AlignedAlloc
+#define g_MidAlloc g_AlignedAlloc
+#endif
+
+extern const ISzAlloc g_AlignedAlloc;
+
+
+typedef struct
+{
+ ISzAlloc vt;
+ ISzAllocPtr baseAlloc;
+ unsigned numAlignBits; /* ((1 << numAlignBits) >= sizeof(void *)) */
+ size_t offset; /* (offset == (k * sizeof(void *)) && offset < (1 << numAlignBits) */
+} CAlignOffsetAlloc;
+
+void AlignOffsetAlloc_CreateVTable(CAlignOffsetAlloc *p);
+
+
+EXTERN_C_END
+
+#endif
diff --git a/src/Common/lzma/Compiler.h b/src/Common/lzma/Compiler.h
new file mode 100644
index 00000000..185a52de
--- /dev/null
+++ b/src/Common/lzma/Compiler.h
@@ -0,0 +1,159 @@
+/* Compiler.h : Compiler specific defines and pragmas
+2023-04-02 : Igor Pavlov : Public domain */
+
+#ifndef ZIP7_INC_COMPILER_H
+#define ZIP7_INC_COMPILER_H
+
+#if defined(__clang__)
+# define Z7_CLANG_VERSION (__clang_major__ * 10000 + __clang_minor__ * 100 + __clang_patchlevel__)
+#endif
+#if defined(__clang__) && defined(__apple_build_version__)
+# define Z7_APPLE_CLANG_VERSION Z7_CLANG_VERSION
+#elif defined(__clang__)
+# define Z7_LLVM_CLANG_VERSION Z7_CLANG_VERSION
+#elif defined(__GNUC__)
+# define Z7_GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
+#endif
+
+#ifdef _MSC_VER
+#if !defined(__clang__) && !defined(__GNUC__)
+#define Z7_MSC_VER_ORIGINAL _MSC_VER
+#endif
+#endif
+
+#if defined(__MINGW32__) || defined(__MINGW64__)
+#define Z7_MINGW
+#endif
+
+// #pragma GCC diagnostic ignored "-Wunknown-pragmas"
+
+#ifdef __clang__
+// padding size of '' with 4 bytes to alignment boundary
+#pragma GCC diagnostic ignored "-Wpadded"
+#endif
+
+
+#ifdef _MSC_VER
+
+ #ifdef UNDER_CE
+ #define RPC_NO_WINDOWS_H
+ /* #pragma warning(disable : 4115) // '_RPC_ASYNC_STATE' : named type definition in parentheses */
+ #pragma warning(disable : 4201) // nonstandard extension used : nameless struct/union
+ #pragma warning(disable : 4214) // nonstandard extension used : bit field types other than int
+ #endif
+
+#if defined(_MSC_VER) && _MSC_VER >= 1800
+#pragma warning(disable : 4464) // relative include path contains '..'
+#endif
+
+// == 1200 : -O1 : for __forceinline
+// >= 1900 : -O1 : for printf
+#pragma warning(disable : 4710) // function not inlined
+
+#if _MSC_VER < 1900
+// winnt.h: 'Int64ShllMod32'
+#pragma warning(disable : 4514) // unreferenced inline function has been removed
+#endif
+
+#if _MSC_VER < 1300
+// #pragma warning(disable : 4702) // unreachable code
+// Bra.c : -O1:
+#pragma warning(disable : 4714) // function marked as __forceinline not inlined
+#endif
+
+/*
+#if _MSC_VER > 1400 && _MSC_VER <= 1900
+// strcat: This function or variable may be unsafe
+// sysinfoapi.h: kit10: GetVersion was declared deprecated
+#pragma warning(disable : 4996)
+#endif
+*/
+
+#if _MSC_VER > 1200
+// -Wall warnings
+
+#pragma warning(disable : 4711) // function selected for automatic inline expansion
+#pragma warning(disable : 4820) // '2' bytes padding added after data member
+
+#if _MSC_VER >= 1400 && _MSC_VER < 1920
+// 1400: string.h: _DBG_MEMCPY_INLINE_
+// 1600 - 191x : smmintrin.h __cplusplus'
+// is not defined as a preprocessor macro, replacing with '0' for '#if/#elif'
+#pragma warning(disable : 4668)
+
+// 1400 - 1600 : WinDef.h : 'FARPROC' :
+// 1900 - 191x : immintrin.h: _readfsbase_u32
+// no function prototype given : converting '()' to '(void)'
+#pragma warning(disable : 4255)
+#endif
+
+#if _MSC_VER >= 1914
+// Compiler will insert Spectre mitigation for memory load if /Qspectre switch specified
+#pragma warning(disable : 5045)
+#endif
+
+#endif // _MSC_VER > 1200
+#endif // _MSC_VER
+
+
+#if defined(__clang__) && (__clang_major__ >= 4)
+ #define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE \
+ _Pragma("clang loop unroll(disable)") \
+ _Pragma("clang loop vectorize(disable)")
+ #define Z7_ATTRIB_NO_VECTORIZE
+#elif defined(__GNUC__) && (__GNUC__ >= 5)
+ #define Z7_ATTRIB_NO_VECTORIZE __attribute__((optimize("no-tree-vectorize")))
+ // __attribute__((optimize("no-unroll-loops")));
+ #define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
+#elif defined(_MSC_VER) && (_MSC_VER >= 1920)
+ #define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE \
+ _Pragma("loop( no_vector )")
+ #define Z7_ATTRIB_NO_VECTORIZE
+#else
+ #define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
+ #define Z7_ATTRIB_NO_VECTORIZE
+#endif
+
+#if defined(MY_CPU_X86_OR_AMD64) && ( \
+ defined(__clang__) && (__clang_major__ >= 4) \
+ || defined(__GNUC__) && (__GNUC__ >= 5))
+ #define Z7_ATTRIB_NO_SSE __attribute__((__target__("no-sse")))
+#else
+ #define Z7_ATTRIB_NO_SSE
+#endif
+
+#define Z7_ATTRIB_NO_VECTOR \
+ Z7_ATTRIB_NO_VECTORIZE \
+ Z7_ATTRIB_NO_SSE
+
+
+#if defined(__clang__) && (__clang_major__ >= 8) \
+ || defined(__GNUC__) && (__GNUC__ >= 1000) \
+ /* || defined(_MSC_VER) && (_MSC_VER >= 1920) */
+ // GCC is not good for __builtin_expect()
+ #define Z7_LIKELY(x) (__builtin_expect((x), 1))
+ #define Z7_UNLIKELY(x) (__builtin_expect((x), 0))
+ // #define Z7_unlikely [[unlikely]]
+ // #define Z7_likely [[likely]]
+#else
+ #define Z7_LIKELY(x) (x)
+ #define Z7_UNLIKELY(x) (x)
+ // #define Z7_likely
+#endif
+
+
+#if (defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 36000))
+#define Z7_DIAGNOSCTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER \
+ _Pragma("GCC diagnostic push") \
+ _Pragma("GCC diagnostic ignored \"-Wreserved-macro-identifier\"")
+#define Z7_DIAGNOSCTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER \
+ _Pragma("GCC diagnostic pop")
+#else
+#define Z7_DIAGNOSCTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
+#define Z7_DIAGNOSCTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
+#endif
+
+#define UNUSED_VAR(x) (void)x;
+/* #define UNUSED_VAR(x) x=x; */
+
+#endif
diff --git a/src/Common/lzma/CpuArch.c b/src/Common/lzma/CpuArch.c
new file mode 100644
index 00000000..33f8a3ab
--- /dev/null
+++ b/src/Common/lzma/CpuArch.c
@@ -0,0 +1,823 @@
+/* CpuArch.c -- CPU specific code
+2023-05-18 : Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+// #include <stdio.h>
+
+#include "CpuArch.h"
+
+#ifdef MY_CPU_X86_OR_AMD64
+
+#undef NEED_CHECK_FOR_CPUID
+#if !defined(MY_CPU_AMD64)
+#define NEED_CHECK_FOR_CPUID
+#endif
+
+/*
+ cpuid instruction supports (subFunction) parameter in ECX,
+ that is used only with some specific (function) parameter values.
+ But we always use only (subFunction==0).
+*/
+/*
+ __cpuid(): MSVC and GCC/CLANG use same function/macro name
+ but parameters are different.
+ We use MSVC __cpuid() parameters style for our z7_x86_cpuid() function.
+*/
+
+#if defined(__GNUC__) /* && (__GNUC__ >= 10) */ \
+ || defined(__clang__) /* && (__clang_major__ >= 10) */
+
+/* there was some CLANG/GCC compilers that have issues with
+ rbx(ebx) handling in asm blocks in -fPIC mode (__PIC__ is defined).
+ compiler's <cpuid.h> contains the macro __cpuid() that is similar to our code.
+ The history of __cpuid() changes in CLANG/GCC:
+ GCC:
+ 2007: it preserved ebx for (__PIC__ && __i386__)
+ 2013: it preserved rbx and ebx for __PIC__
+ 2014: it doesn't preserves rbx and ebx anymore
+ we suppose that (__GNUC__ >= 5) fixed that __PIC__ ebx/rbx problem.
+ CLANG:
+ 2014+: it preserves rbx, but only for 64-bit code. No __PIC__ check.
+ Why CLANG cares about 64-bit mode only, and doesn't care about ebx (in 32-bit)?
+ Do we need __PIC__ test for CLANG or we must care about rbx even if
+ __PIC__ is not defined?
+*/
+
+#define ASM_LN "\n"
+
+#if defined(MY_CPU_AMD64) && defined(__PIC__) \
+ && ((defined (__GNUC__) && (__GNUC__ < 5)) || defined(__clang__))
+
+#define x86_cpuid_MACRO(p, func) { \
+ __asm__ __volatile__ ( \
+ ASM_LN "mov %%rbx, %q1" \
+ ASM_LN "cpuid" \
+ ASM_LN "xchg %%rbx, %q1" \
+ : "=a" ((p)[0]), "=&r" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(0)); }
+
+ /* "=&r" selects free register. It can select even rbx, if that register is free.
+ "=&D" for (RDI) also works, but the code can be larger with "=&D"
+ "2"(0) means (subFunction = 0),
+ 2 is (zero-based) index in the output constraint list "=c" (ECX). */
+
+#elif defined(MY_CPU_X86) && defined(__PIC__) \
+ && ((defined (__GNUC__) && (__GNUC__ < 5)) || defined(__clang__))
+
+#define x86_cpuid_MACRO(p, func) { \
+ __asm__ __volatile__ ( \
+ ASM_LN "mov %%ebx, %k1" \
+ ASM_LN "cpuid" \
+ ASM_LN "xchg %%ebx, %k1" \
+ : "=a" ((p)[0]), "=&r" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(0)); }
+
+#else
+
+#define x86_cpuid_MACRO(p, func) { \
+ __asm__ __volatile__ ( \
+ ASM_LN "cpuid" \
+ : "=a" ((p)[0]), "=b" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(0)); }
+
+#endif
+
+
+void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
+{
+ x86_cpuid_MACRO(p, func)
+}
+
+
+Z7_NO_INLINE
+UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
+{
+ #if defined(NEED_CHECK_FOR_CPUID)
+ #define EFALGS_CPUID_BIT 21
+ UInt32 a;
+ __asm__ __volatile__ (
+ ASM_LN "pushf"
+ ASM_LN "pushf"
+ ASM_LN "pop %0"
+ // ASM_LN "movl %0, %1"
+ // ASM_LN "xorl $0x200000, %0"
+ ASM_LN "btc %1, %0"
+ ASM_LN "push %0"
+ ASM_LN "popf"
+ ASM_LN "pushf"
+ ASM_LN "pop %0"
+ ASM_LN "xorl (%%esp), %0"
+
+ ASM_LN "popf"
+ ASM_LN
+ : "=&r" (a) // "=a"
+ : "i" (EFALGS_CPUID_BIT)
+ );
+ if ((a & (1 << EFALGS_CPUID_BIT)) == 0)
+ return 0;
+ #endif
+ {
+ UInt32 p[4];
+ x86_cpuid_MACRO(p, 0)
+ return p[0];
+ }
+}
+
+#undef ASM_LN
+
+#elif !defined(_MSC_VER)
+
+/*
+// for gcc/clang and other: we can try to use __cpuid macro:
+#include <cpuid.h>
+void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
+{
+ __cpuid(func, p[0], p[1], p[2], p[3]);
+}
+UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
+{
+ return (UInt32)__get_cpuid_max(0, NULL);
+}
+*/
+// for unsupported cpuid:
+void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
+{
+ UNUSED_VAR(func)
+ p[0] = p[1] = p[2] = p[3] = 0;
+}
+UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
+{
+ return 0;
+}
+
+#else // _MSC_VER
+
+#if !defined(MY_CPU_AMD64)
+
+UInt32 __declspec(naked) Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
+{
+ #if defined(NEED_CHECK_FOR_CPUID)
+ #define EFALGS_CPUID_BIT 21
+ __asm pushfd
+ __asm pushfd
+ /*
+ __asm pop eax
+ // __asm mov edx, eax
+ __asm btc eax, EFALGS_CPUID_BIT
+ __asm push eax
+ */
+ __asm btc dword ptr [esp], EFALGS_CPUID_BIT
+ __asm popfd
+ __asm pushfd
+ __asm pop eax
+ // __asm xor eax, edx
+ __asm xor eax, [esp]
+ // __asm push edx
+ __asm popfd
+ __asm and eax, (1 shl EFALGS_CPUID_BIT)
+ __asm jz end_func
+ #endif
+ __asm push ebx
+ __asm xor eax, eax // func
+ __asm xor ecx, ecx // subFunction (optional) for (func == 0)
+ __asm cpuid
+ __asm pop ebx
+ #if defined(NEED_CHECK_FOR_CPUID)
+ end_func:
+ #endif
+ __asm ret 0
+}
+
+void __declspec(naked) Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
+{
+ UNUSED_VAR(p)
+ UNUSED_VAR(func)
+ __asm push ebx
+ __asm push edi
+ __asm mov edi, ecx // p
+ __asm mov eax, edx // func
+ __asm xor ecx, ecx // subfunction (optional) for (func == 0)
+ __asm cpuid
+ __asm mov [edi ], eax
+ __asm mov [edi + 4], ebx
+ __asm mov [edi + 8], ecx
+ __asm mov [edi + 12], edx
+ __asm pop edi
+ __asm pop ebx
+ __asm ret 0
+}
+
+#else // MY_CPU_AMD64
+
+ #if _MSC_VER >= 1600
+ #include <intrin.h>
+ #define MY_cpuidex __cpuidex
+ #else
+/*
+ __cpuid (func == (0 or 7)) requires subfunction number in ECX.
+ MSDN: The __cpuid intrinsic clears the ECX register before calling the cpuid instruction.
+ __cpuid() in new MSVC clears ECX.
+ __cpuid() in old MSVC (14.00) x64 doesn't clear ECX
+ We still can use __cpuid for low (func) values that don't require ECX,
+ but __cpuid() in old MSVC will be incorrect for some func values: (func == 7).
+ So here we use the hack for old MSVC to send (subFunction) in ECX register to cpuid instruction,
+ where ECX value is first parameter for FASTCALL / NO_INLINE func,
+ So the caller of MY_cpuidex_HACK() sets ECX as subFunction, and
+ old MSVC for __cpuid() doesn't change ECX and cpuid instruction gets (subFunction) value.
+
+DON'T remove Z7_NO_INLINE and Z7_FASTCALL for MY_cpuidex_HACK(): !!!
+*/
+static
+Z7_NO_INLINE void Z7_FASTCALL MY_cpuidex_HACK(UInt32 subFunction, UInt32 func, int *CPUInfo)
+{
+ UNUSED_VAR(subFunction)
+ __cpuid(CPUInfo, func);
+}
+ #define MY_cpuidex(info, func, func2) MY_cpuidex_HACK(func2, func, info)
+ #pragma message("======== MY_cpuidex_HACK WAS USED ========")
+ #endif // _MSC_VER >= 1600
+
+#if !defined(MY_CPU_AMD64)
+/* inlining for __cpuid() in MSVC x86 (32-bit) produces big ineffective code,
+ so we disable inlining here */
+Z7_NO_INLINE
+#endif
+void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
+{
+ MY_cpuidex((int *)p, (int)func, 0);
+}
+
+Z7_NO_INLINE
+UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
+{
+ int a[4];
+ MY_cpuidex(a, 0, 0);
+ return a[0];
+}
+
+#endif // MY_CPU_AMD64
+#endif // _MSC_VER
+
+#if defined(NEED_CHECK_FOR_CPUID)
+#define CHECK_CPUID_IS_SUPPORTED { if (z7_x86_cpuid_GetMaxFunc() == 0) return 0; }
+#else
+#define CHECK_CPUID_IS_SUPPORTED
+#endif
+#undef NEED_CHECK_FOR_CPUID
+
+
+static
+BoolInt x86cpuid_Func_1(UInt32 *p)
+{
+ CHECK_CPUID_IS_SUPPORTED
+ z7_x86_cpuid(p, 1);
+ return True;
+}
+
+/*
+static const UInt32 kVendors[][1] =
+{
+ { 0x756E6547 }, // , 0x49656E69, 0x6C65746E },
+ { 0x68747541 }, // , 0x69746E65, 0x444D4163 },
+ { 0x746E6543 } // , 0x48727561, 0x736C7561 }
+};
+*/
+
+/*
+typedef struct
+{
+ UInt32 maxFunc;
+ UInt32 vendor[3];
+ UInt32 ver;
+ UInt32 b;
+ UInt32 c;
+ UInt32 d;
+} Cx86cpuid;
+
+enum
+{
+ CPU_FIRM_INTEL,
+ CPU_FIRM_AMD,
+ CPU_FIRM_VIA
+};
+int x86cpuid_GetFirm(const Cx86cpuid *p);
+#define x86cpuid_ver_GetFamily(ver) (((ver >> 16) & 0xff0) | ((ver >> 8) & 0xf))
+#define x86cpuid_ver_GetModel(ver) (((ver >> 12) & 0xf0) | ((ver >> 4) & 0xf))
+#define x86cpuid_ver_GetStepping(ver) (ver & 0xf)
+
+int x86cpuid_GetFirm(const Cx86cpuid *p)
+{
+ unsigned i;
+ for (i = 0; i < sizeof(kVendors) / sizeof(kVendors[0]); i++)
+ {
+ const UInt32 *v = kVendors[i];
+ if (v[0] == p->vendor[0]
+ // && v[1] == p->vendor[1]
+ // && v[2] == p->vendor[2]
+ )
+ return (int)i;
+ }
+ return -1;
+}
+
+BoolInt CPU_Is_InOrder()
+{
+ Cx86cpuid p;
+ UInt32 family, model;
+ if (!x86cpuid_CheckAndRead(&p))
+ return True;
+
+ family = x86cpuid_ver_GetFamily(p.ver);
+ model = x86cpuid_ver_GetModel(p.ver);
+
+ switch (x86cpuid_GetFirm(&p))
+ {
+ case CPU_FIRM_INTEL: return (family < 6 || (family == 6 && (
+ // In-Order Atom CPU
+ model == 0x1C // 45 nm, N4xx, D4xx, N5xx, D5xx, 230, 330
+ || model == 0x26 // 45 nm, Z6xx
+ || model == 0x27 // 32 nm, Z2460
+ || model == 0x35 // 32 nm, Z2760
+ || model == 0x36 // 32 nm, N2xxx, D2xxx
+ )));
+ case CPU_FIRM_AMD: return (family < 5 || (family == 5 && (model < 6 || model == 0xA)));
+ case CPU_FIRM_VIA: return (family < 6 || (family == 6 && model < 0xF));
+ }
+ return False; // v23 : unknown processors are not In-Order
+}
+*/
+
+#ifdef _WIN32
+#include "7zWindows.h"
+#endif
+
+#if !defined(MY_CPU_AMD64) && defined(_WIN32)
+
+/* for legacy SSE ia32: there is no user-space cpu instruction to check
+ that OS supports SSE register storing/restoring on context switches.
+ So we need some OS-specific function to check that it's safe to use SSE registers.
+*/
+
+Z7_FORCE_INLINE
+static BoolInt CPU_Sys_Is_SSE_Supported(void)
+{
+#ifdef _MSC_VER
+ #pragma warning(push)
+ #pragma warning(disable : 4996) // `GetVersion': was declared deprecated
+#endif
+ /* low byte is major version of Windows
+ We suppose that any Windows version since
+ Windows2000 (major == 5) supports SSE registers */
+ return (Byte)GetVersion() >= 5;
+#if defined(_MSC_VER)
+ #pragma warning(pop)
+#endif
+}
+#define CHECK_SYS_SSE_SUPPORT if (!CPU_Sys_Is_SSE_Supported()) return False;
+#else
+#define CHECK_SYS_SSE_SUPPORT
+#endif
+
+
+#if !defined(MY_CPU_AMD64)
+
+BoolInt CPU_IsSupported_CMOV(void)
+{
+ UInt32 a[4];
+ if (!x86cpuid_Func_1(&a[0]))
+ return 0;
+ return (a[3] >> 15) & 1;
+}
+
+BoolInt CPU_IsSupported_SSE(void)
+{
+ UInt32 a[4];
+ CHECK_SYS_SSE_SUPPORT
+ if (!x86cpuid_Func_1(&a[0]))
+ return 0;
+ return (a[3] >> 25) & 1;
+}
+
+BoolInt CPU_IsSupported_SSE2(void)
+{
+ UInt32 a[4];
+ CHECK_SYS_SSE_SUPPORT
+ if (!x86cpuid_Func_1(&a[0]))
+ return 0;
+ return (a[3] >> 26) & 1;
+}
+
+#endif
+
+
+static UInt32 x86cpuid_Func_1_ECX(void)
+{
+ UInt32 a[4];
+ CHECK_SYS_SSE_SUPPORT
+ if (!x86cpuid_Func_1(&a[0]))
+ return 0;
+ return a[2];
+}
+
+BoolInt CPU_IsSupported_AES(void)
+{
+ return (x86cpuid_Func_1_ECX() >> 25) & 1;
+}
+
+BoolInt CPU_IsSupported_SSSE3(void)
+{
+ return (x86cpuid_Func_1_ECX() >> 9) & 1;
+}
+
+BoolInt CPU_IsSupported_SSE41(void)
+{
+ return (x86cpuid_Func_1_ECX() >> 19) & 1;
+}
+
+BoolInt CPU_IsSupported_SHA(void)
+{
+ CHECK_SYS_SSE_SUPPORT
+
+ if (z7_x86_cpuid_GetMaxFunc() < 7)
+ return False;
+ {
+ UInt32 d[4];
+ z7_x86_cpuid(d, 7);
+ return (d[1] >> 29) & 1;
+ }
+}
+
+/*
+MSVC: _xgetbv() intrinsic is available since VS2010SP1.
+ MSVC also defines (_XCR_XFEATURE_ENABLED_MASK) macro in
+ <immintrin.h> that we can use or check.
+ For any 32-bit x86 we can use asm code in MSVC,
+ but MSVC asm code is huge after compilation.
+ So _xgetbv() is better
+
+ICC: _xgetbv() intrinsic is available (in what version of ICC?)
+ ICC defines (__GNUC___) and it supports gnu assembler
+ also ICC supports MASM style code with -use-msasm switch.
+ but ICC doesn't support __attribute__((__target__))
+
+GCC/CLANG 9:
+ _xgetbv() is macro that works via __builtin_ia32_xgetbv()
+ and we need __attribute__((__target__("xsave")).
+ But with __target__("xsave") the function will be not
+ inlined to function that has no __target__("xsave") attribute.
+ If we want _xgetbv() call inlining, then we should use asm version
+ instead of calling _xgetbv().
+ Note:intrinsic is broke before GCC 8.2:
+ https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85684
+*/
+
+#if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1100) \
+ || defined(_MSC_VER) && (_MSC_VER >= 1600) && (_MSC_FULL_VER >= 160040219) \
+ || defined(__GNUC__) && (__GNUC__ >= 9) \
+ || defined(__clang__) && (__clang_major__ >= 9)
+// we define ATTRIB_XGETBV, if we want to use predefined _xgetbv() from compiler
+#if defined(__INTEL_COMPILER)
+#define ATTRIB_XGETBV
+#elif defined(__GNUC__) || defined(__clang__)
+// we don't define ATTRIB_XGETBV here, because asm version is better for inlining.
+// #define ATTRIB_XGETBV __attribute__((__target__("xsave")))
+#else
+#define ATTRIB_XGETBV
+#endif
+#endif
+
+#if defined(ATTRIB_XGETBV)
+#include <immintrin.h>
+#endif
+
+
+// XFEATURE_ENABLED_MASK/XCR0
+#define MY_XCR_XFEATURE_ENABLED_MASK 0
+
+#if defined(ATTRIB_XGETBV)
+ATTRIB_XGETBV
+#endif
+static UInt64 x86_xgetbv_0(UInt32 num)
+{
+#if defined(ATTRIB_XGETBV)
+ {
+ return
+ #if (defined(_MSC_VER))
+ _xgetbv(num);
+ #else
+ __builtin_ia32_xgetbv(
+ #if !defined(__clang__)
+ (int)
+ #endif
+ num);
+ #endif
+ }
+
+#elif defined(__GNUC__) || defined(__clang__) || defined(__SUNPRO_CC)
+
+ UInt32 a, d;
+ #if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4))
+ __asm__
+ (
+ "xgetbv"
+ : "=a"(a), "=d"(d) : "c"(num) : "cc"
+ );
+ #else // is old gcc
+ __asm__
+ (
+ ".byte 0x0f, 0x01, 0xd0" "\n\t"
+ : "=a"(a), "=d"(d) : "c"(num) : "cc"
+ );
+ #endif
+ return ((UInt64)d << 32) | a;
+ // return a;
+
+#elif defined(_MSC_VER) && !defined(MY_CPU_AMD64)
+
+ UInt32 a, d;
+ __asm {
+ push eax
+ push edx
+ push ecx
+ mov ecx, num;
+ // xor ecx, ecx // = MY_XCR_XFEATURE_ENABLED_MASK
+ _emit 0x0f
+ _emit 0x01
+ _emit 0xd0
+ mov a, eax
+ mov d, edx
+ pop ecx
+ pop edx
+ pop eax
+ }
+ return ((UInt64)d << 32) | a;
+ // return a;
+
+#else // it's unknown compiler
+ // #error "Need xgetbv function"
+ UNUSED_VAR(num)
+ // for MSVC-X64 we could call external function from external file.
+ /* Actually we had checked OSXSAVE/AVX in cpuid before.
+ So it's expected that OS supports at least AVX and below. */
+ // if (num != MY_XCR_XFEATURE_ENABLED_MASK) return 0; // if not XCR0
+ return
+ // (1 << 0) | // x87
+ (1 << 1) // SSE
+ | (1 << 2); // AVX
+
+#endif
+}
+
+#ifdef _WIN32
+/*
+ Windows versions do not know about new ISA extensions that
+ can be introduced. But we still can use new extensions,
+ even if Windows doesn't report about supporting them,
+ But we can use new extensions, only if Windows knows about new ISA extension
+ that changes the number or size of registers: SSE, AVX/XSAVE, AVX512
+ So it's enough to check
+ MY_PF_AVX_INSTRUCTIONS_AVAILABLE
+ instead of
+ MY_PF_AVX2_INSTRUCTIONS_AVAILABLE
+*/
+#define MY_PF_XSAVE_ENABLED 17
+// #define MY_PF_SSSE3_INSTRUCTIONS_AVAILABLE 36
+// #define MY_PF_SSE4_1_INSTRUCTIONS_AVAILABLE 37
+// #define MY_PF_SSE4_2_INSTRUCTIONS_AVAILABLE 38
+// #define MY_PF_AVX_INSTRUCTIONS_AVAILABLE 39
+// #define MY_PF_AVX2_INSTRUCTIONS_AVAILABLE 40
+// #define MY_PF_AVX512F_INSTRUCTIONS_AVAILABLE 41
+#endif
+
+BoolInt CPU_IsSupported_AVX(void)
+{
+ #ifdef _WIN32
+ if (!IsProcessorFeaturePresent(MY_PF_XSAVE_ENABLED))
+ return False;
+ /* PF_AVX_INSTRUCTIONS_AVAILABLE probably is supported starting from
+ some latest Win10 revisions. But we need AVX in older Windows also.
+ So we don't use the following check: */
+ /*
+ if (!IsProcessorFeaturePresent(MY_PF_AVX_INSTRUCTIONS_AVAILABLE))
+ return False;
+ */
+ #endif
+
+ /*
+ OS must use new special XSAVE/XRSTOR instructions to save
+ AVX registers when it required for context switching.
+ At OS statring:
+ OS sets CR4.OSXSAVE flag to signal the processor that OS supports the XSAVE extensions.
+ Also OS sets bitmask in XCR0 register that defines what
+ registers will be processed by XSAVE instruction:
+ XCR0.SSE[bit 0] - x87 registers and state
+ XCR0.SSE[bit 1] - SSE registers and state
+ XCR0.AVX[bit 2] - AVX registers and state
+ CR4.OSXSAVE is reflected to CPUID.1:ECX.OSXSAVE[bit 27].
+ So we can read that bit in user-space.
+ XCR0 is available for reading in user-space by new XGETBV instruction.
+ */
+ {
+ const UInt32 c = x86cpuid_Func_1_ECX();
+ if (0 == (1
+ & (c >> 28) // AVX instructions are supported by hardware
+ & (c >> 27))) // OSXSAVE bit: XSAVE and related instructions are enabled by OS.
+ return False;
+ }
+
+ /* also we can check
+ CPUID.1:ECX.XSAVE [bit 26] : that shows that
+ XSAVE, XRESTOR, XSETBV, XGETBV instructions are supported by hardware.
+ But that check is redundant, because if OSXSAVE bit is set, then XSAVE is also set */
+
+ /* If OS have enabled XSAVE extension instructions (OSXSAVE == 1),
+ in most cases we expect that OS also will support storing/restoring
+ for AVX and SSE states at least.
+ But to be ensure for that we call user-space instruction
+ XGETBV(0) to get XCR0 value that contains bitmask that defines
+ what exact states(registers) OS have enabled for storing/restoring.
+ */
+
+ {
+ const UInt32 bm = (UInt32)x86_xgetbv_0(MY_XCR_XFEATURE_ENABLED_MASK);
+ // printf("\n=== XGetBV=%d\n", bm);
+ return 1
+ & (bm >> 1) // SSE state is supported (set by OS) for storing/restoring
+ & (bm >> 2); // AVX state is supported (set by OS) for storing/restoring
+ }
+ // since Win7SP1: we can use GetEnabledXStateFeatures();
+}
+
+
+BoolInt CPU_IsSupported_AVX2(void)
+{
+ if (!CPU_IsSupported_AVX())
+ return False;
+ if (z7_x86_cpuid_GetMaxFunc() < 7)
+ return False;
+ {
+ UInt32 d[4];
+ z7_x86_cpuid(d, 7);
+ // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]);
+ return 1
+ & (d[1] >> 5); // avx2
+ }
+}
+
+BoolInt CPU_IsSupported_VAES_AVX2(void)
+{
+ if (!CPU_IsSupported_AVX())
+ return False;
+ if (z7_x86_cpuid_GetMaxFunc() < 7)
+ return False;
+ {
+ UInt32 d[4];
+ z7_x86_cpuid(d, 7);
+ // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]);
+ return 1
+ & (d[1] >> 5) // avx2
+ // & (d[1] >> 31) // avx512vl
+ & (d[2] >> 9); // vaes // VEX-256/EVEX
+ }
+}
+
+BoolInt CPU_IsSupported_PageGB(void)
+{
+ CHECK_CPUID_IS_SUPPORTED
+ {
+ UInt32 d[4];
+ z7_x86_cpuid(d, 0x80000000);
+ if (d[0] < 0x80000001)
+ return False;
+ z7_x86_cpuid(d, 0x80000001);
+ return (d[3] >> 26) & 1;
+ }
+}
+
+
+#elif defined(MY_CPU_ARM_OR_ARM64)
+
+#ifdef _WIN32
+
+#include "7zWindows.h"
+
+BoolInt CPU_IsSupported_CRC32(void) { return IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
+BoolInt CPU_IsSupported_CRYPTO(void) { return IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
+BoolInt CPU_IsSupported_NEON(void) { return IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
+
+#else
+
+#if defined(__APPLE__)
+
+/*
+#include <stdio.h>
+#include <string.h>
+static void Print_sysctlbyname(const char *name)
+{
+ size_t bufSize = 256;
+ char buf[256];
+ int res = sysctlbyname(name, &buf, &bufSize, NULL, 0);
+ {
+ int i;
+ printf("\nres = %d : %s : '%s' : bufSize = %d, numeric", res, name, buf, (unsigned)bufSize);
+ for (i = 0; i < 20; i++)
+ printf(" %2x", (unsigned)(Byte)buf[i]);
+
+ }
+}
+*/
+/*
+ Print_sysctlbyname("hw.pagesize");
+ Print_sysctlbyname("machdep.cpu.brand_string");
+*/
+
+static BoolInt z7_sysctlbyname_Get_BoolInt(const char *name)
+{
+ UInt32 val = 0;
+ if (z7_sysctlbyname_Get_UInt32(name, &val) == 0 && val == 1)
+ return 1;
+ return 0;
+}
+
+BoolInt CPU_IsSupported_CRC32(void)
+{
+ return z7_sysctlbyname_Get_BoolInt("hw.optional.armv8_crc32");
+}
+
+BoolInt CPU_IsSupported_NEON(void)
+{
+ return z7_sysctlbyname_Get_BoolInt("hw.optional.neon");
+}
+
+#ifdef MY_CPU_ARM64
+#define APPLE_CRYPTO_SUPPORT_VAL 1
+#else
+#define APPLE_CRYPTO_SUPPORT_VAL 0
+#endif
+
+BoolInt CPU_IsSupported_SHA1(void) { return APPLE_CRYPTO_SUPPORT_VAL; }
+BoolInt CPU_IsSupported_SHA2(void) { return APPLE_CRYPTO_SUPPORT_VAL; }
+BoolInt CPU_IsSupported_AES (void) { return APPLE_CRYPTO_SUPPORT_VAL; }
+
+
+#else // __APPLE__
+
+#include <sys/auxv.h>
+
+#define USE_HWCAP
+
+#ifdef USE_HWCAP
+
+#include <asm/hwcap.h>
+
+ #define MY_HWCAP_CHECK_FUNC_2(name1, name2) \
+ BoolInt CPU_IsSupported_ ## name1() { return (getauxval(AT_HWCAP) & (HWCAP_ ## name2)) ? 1 : 0; }
+
+#ifdef MY_CPU_ARM64
+ #define MY_HWCAP_CHECK_FUNC(name) \
+ MY_HWCAP_CHECK_FUNC_2(name, name)
+ MY_HWCAP_CHECK_FUNC_2(NEON, ASIMD)
+// MY_HWCAP_CHECK_FUNC (ASIMD)
+#elif defined(MY_CPU_ARM)
+ #define MY_HWCAP_CHECK_FUNC(name) \
+ BoolInt CPU_IsSupported_ ## name() { return (getauxval(AT_HWCAP2) & (HWCAP2_ ## name)) ? 1 : 0; }
+ MY_HWCAP_CHECK_FUNC_2(NEON, NEON)
+#endif
+
+#else // USE_HWCAP
+
+ #define MY_HWCAP_CHECK_FUNC(name) \
+ BoolInt CPU_IsSupported_ ## name() { return 0; }
+ MY_HWCAP_CHECK_FUNC(NEON)
+
+#endif // USE_HWCAP
+
+MY_HWCAP_CHECK_FUNC (CRC32)
+MY_HWCAP_CHECK_FUNC (SHA1)
+MY_HWCAP_CHECK_FUNC (SHA2)
+MY_HWCAP_CHECK_FUNC (AES)
+
+#endif // __APPLE__
+#endif // _WIN32
+
+#endif // MY_CPU_ARM_OR_ARM64
+
+
+
+#ifdef __APPLE__
+
+#include <sys/sysctl.h>
+
+int z7_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize)
+{
+ return sysctlbyname(name, buf, bufSize, NULL, 0);
+}
+
+int z7_sysctlbyname_Get_UInt32(const char *name, UInt32 *val)
+{
+ size_t bufSize = sizeof(*val);
+ const int res = z7_sysctlbyname_Get(name, val, &bufSize);
+ if (res == 0 && bufSize != sizeof(*val))
+ return EFAULT;
+ return res;
+}
+
+#endif
diff --git a/src/Common/lzma/CpuArch.h b/src/Common/lzma/CpuArch.h
new file mode 100644
index 00000000..8e5d8a54
--- /dev/null
+++ b/src/Common/lzma/CpuArch.h
@@ -0,0 +1,523 @@
+/* CpuArch.h -- CPU specific code
+2023-04-02 : Igor Pavlov : Public domain */
+
+#ifndef ZIP7_INC_CPU_ARCH_H
+#define ZIP7_INC_CPU_ARCH_H
+
+#include "7zTypes.h"
+
+EXTERN_C_BEGIN
+
+/*
+MY_CPU_LE means that CPU is LITTLE ENDIAN.
+MY_CPU_BE means that CPU is BIG ENDIAN.
+If MY_CPU_LE and MY_CPU_BE are not defined, we don't know about ENDIANNESS of platform.
+
+MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned memory accesses.
+
+MY_CPU_64BIT means that processor can work with 64-bit registers.
+ MY_CPU_64BIT can be used to select fast code branch
+ MY_CPU_64BIT doesn't mean that (sizeof(void *) == 8)
+*/
+
+#if defined(_M_X64) \
+ || defined(_M_AMD64) \
+ || defined(__x86_64__) \
+ || defined(__AMD64__) \
+ || defined(__amd64__)
+ #define MY_CPU_AMD64
+ #ifdef __ILP32__
+ #define MY_CPU_NAME "x32"
+ #define MY_CPU_SIZEOF_POINTER 4
+ #else
+ #define MY_CPU_NAME "x64"
+ #define MY_CPU_SIZEOF_POINTER 8
+ #endif
+ #define MY_CPU_64BIT
+#endif
+
+
+#if defined(_M_IX86) \
+ || defined(__i386__)
+ #define MY_CPU_X86
+ #define MY_CPU_NAME "x86"
+ /* #define MY_CPU_32BIT */
+ #define MY_CPU_SIZEOF_POINTER 4
+#endif
+
+
+#if defined(_M_ARM64) \
+ || defined(__AARCH64EL__) \
+ || defined(__AARCH64EB__) \
+ || defined(__aarch64__)
+ #define MY_CPU_ARM64
+ #ifdef __ILP32__
+ #define MY_CPU_NAME "arm64-32"
+ #define MY_CPU_SIZEOF_POINTER 4
+ #else
+ #define MY_CPU_NAME "arm64"
+ #define MY_CPU_SIZEOF_POINTER 8
+ #endif
+ #define MY_CPU_64BIT
+#endif
+
+
+#if defined(_M_ARM) \
+ || defined(_M_ARM_NT) \
+ || defined(_M_ARMT) \
+ || defined(__arm__) \
+ || defined(__thumb__) \
+ || defined(__ARMEL__) \
+ || defined(__ARMEB__) \
+ || defined(__THUMBEL__) \
+ || defined(__THUMBEB__)
+ #define MY_CPU_ARM
+
+ #if defined(__thumb__) || defined(__THUMBEL__) || defined(_M_ARMT)
+ #define MY_CPU_ARMT
+ #define MY_CPU_NAME "armt"
+ #else
+ #define MY_CPU_ARM32
+ #define MY_CPU_NAME "arm"
+ #endif
+ /* #define MY_CPU_32BIT */
+ #define MY_CPU_SIZEOF_POINTER 4
+#endif
+
+
+#if defined(_M_IA64) \
+ || defined(__ia64__)
+ #define MY_CPU_IA64
+ #define MY_CPU_NAME "ia64"
+ #define MY_CPU_64BIT
+#endif
+
+
+#if defined(__mips64) \
+ || defined(__mips64__) \
+ || (defined(__mips) && (__mips == 64 || __mips == 4 || __mips == 3))
+ #define MY_CPU_NAME "mips64"
+ #define MY_CPU_64BIT
+#elif defined(__mips__)
+ #define MY_CPU_NAME "mips"
+ /* #define MY_CPU_32BIT */
+#endif
+
+
+#if defined(__ppc64__) \
+ || defined(__powerpc64__) \
+ || defined(__ppc__) \
+ || defined(__powerpc__) \
+ || defined(__PPC__) \
+ || defined(_POWER)
+
+#define MY_CPU_PPC_OR_PPC64
+
+#if defined(__ppc64__) \
+ || defined(__powerpc64__) \
+ || defined(_LP64) \
+ || defined(__64BIT__)
+ #ifdef __ILP32__
+ #define MY_CPU_NAME "ppc64-32"
+ #define MY_CPU_SIZEOF_POINTER 4
+ #else
+ #define MY_CPU_NAME "ppc64"
+ #define MY_CPU_SIZEOF_POINTER 8
+ #endif
+ #define MY_CPU_64BIT
+#else
+ #define MY_CPU_NAME "ppc"
+ #define MY_CPU_SIZEOF_POINTER 4
+ /* #define MY_CPU_32BIT */
+#endif
+#endif
+
+
+#if defined(__riscv) \
+ || defined(__riscv__)
+ #if __riscv_xlen == 32
+ #define MY_CPU_NAME "riscv32"
+ #elif __riscv_xlen == 64
+ #define MY_CPU_NAME "riscv64"
+ #else
+ #define MY_CPU_NAME "riscv"
+ #endif
+#endif
+
+
+#if defined(MY_CPU_X86) || defined(MY_CPU_AMD64)
+#define MY_CPU_X86_OR_AMD64
+#endif
+
+#if defined(MY_CPU_ARM) || defined(MY_CPU_ARM64)
+#define MY_CPU_ARM_OR_ARM64
+#endif
+
+
+#ifdef _WIN32
+
+ #ifdef MY_CPU_ARM
+ #define MY_CPU_ARM_LE
+ #endif
+
+ #ifdef MY_CPU_ARM64
+ #define MY_CPU_ARM64_LE
+ #endif
+
+ #ifdef _M_IA64
+ #define MY_CPU_IA64_LE
+ #endif
+
+#endif
+
+
+#if defined(MY_CPU_X86_OR_AMD64) \
+ || defined(MY_CPU_ARM_LE) \
+ || defined(MY_CPU_ARM64_LE) \
+ || defined(MY_CPU_IA64_LE) \
+ || defined(__LITTLE_ENDIAN__) \
+ || defined(__ARMEL__) \
+ || defined(__THUMBEL__) \
+ || defined(__AARCH64EL__) \
+ || defined(__MIPSEL__) \
+ || defined(__MIPSEL) \
+ || defined(_MIPSEL) \
+ || defined(__BFIN__) \
+ || (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__))
+ #define MY_CPU_LE
+#endif
+
+#if defined(__BIG_ENDIAN__) \
+ || defined(__ARMEB__) \
+ || defined(__THUMBEB__) \
+ || defined(__AARCH64EB__) \
+ || defined(__MIPSEB__) \
+ || defined(__MIPSEB) \
+ || defined(_MIPSEB) \
+ || defined(__m68k__) \
+ || defined(__s390__) \
+ || defined(__s390x__) \
+ || defined(__zarch__) \
+ || (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__))
+ #define MY_CPU_BE
+#endif
+
+
+#if defined(MY_CPU_LE) && defined(MY_CPU_BE)
+ #error Stop_Compiling_Bad_Endian
+#endif
+
+#if !defined(MY_CPU_LE) && !defined(MY_CPU_BE)
+ #error Stop_Compiling_CPU_ENDIAN_must_be_detected_at_compile_time
+#endif
+
+#if defined(MY_CPU_32BIT) && defined(MY_CPU_64BIT)
+ #error Stop_Compiling_Bad_32_64_BIT
+#endif
+
+#ifdef __SIZEOF_POINTER__
+ #ifdef MY_CPU_SIZEOF_POINTER
+ #if MY_CPU_SIZEOF_POINTER != __SIZEOF_POINTER__
+ #error Stop_Compiling_Bad_MY_CPU_PTR_SIZE
+ #endif
+ #else
+ #define MY_CPU_SIZEOF_POINTER __SIZEOF_POINTER__
+ #endif
+#endif
+
+#if defined(MY_CPU_SIZEOF_POINTER) && (MY_CPU_SIZEOF_POINTER == 4)
+#if defined (_LP64)
+ #error Stop_Compiling_Bad_MY_CPU_PTR_SIZE
+#endif
+#endif
+
+#ifdef _MSC_VER
+ #if _MSC_VER >= 1300
+ #define MY_CPU_pragma_pack_push_1 __pragma(pack(push, 1))
+ #define MY_CPU_pragma_pop __pragma(pack(pop))
+ #else
+ #define MY_CPU_pragma_pack_push_1
+ #define MY_CPU_pragma_pop
+ #endif
+#else
+ #ifdef __xlC__
+ #define MY_CPU_pragma_pack_push_1 _Pragma("pack(1)")
+ #define MY_CPU_pragma_pop _Pragma("pack()")
+ #else
+ #define MY_CPU_pragma_pack_push_1 _Pragma("pack(push, 1)")
+ #define MY_CPU_pragma_pop _Pragma("pack(pop)")
+ #endif
+#endif
+
+
+#ifndef MY_CPU_NAME
+ #ifdef MY_CPU_LE
+ #define MY_CPU_NAME "LE"
+ #elif defined(MY_CPU_BE)
+ #define MY_CPU_NAME "BE"
+ #else
+ /*
+ #define MY_CPU_NAME ""
+ */
+ #endif
+#endif
+
+
+
+
+
+#ifdef __has_builtin
+ #define Z7_has_builtin(x) __has_builtin(x)
+#else
+ #define Z7_has_builtin(x) 0
+#endif
+
+
+#define Z7_BSWAP32_CONST(v) \
+ ( (((UInt32)(v) << 24) ) \
+ | (((UInt32)(v) << 8) & (UInt32)0xff0000) \
+ | (((UInt32)(v) >> 8) & (UInt32)0xff00 ) \
+ | (((UInt32)(v) >> 24) ))
+
+
+#if defined(_MSC_VER) && (_MSC_VER >= 1300)
+
+#include <stdlib.h>
+
+/* Note: these macros will use bswap instruction (486), that is unsupported in 386 cpu */
+
+#pragma intrinsic(_byteswap_ushort)
+#pragma intrinsic(_byteswap_ulong)
+#pragma intrinsic(_byteswap_uint64)
+
+#define Z7_BSWAP16(v) _byteswap_ushort(v)
+#define Z7_BSWAP32(v) _byteswap_ulong (v)
+#define Z7_BSWAP64(v) _byteswap_uint64(v)
+#define Z7_CPU_FAST_BSWAP_SUPPORTED
+
+#elif (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) \
+ || (defined(__clang__) && Z7_has_builtin(__builtin_bswap16))
+
+#define Z7_BSWAP16(v) __builtin_bswap16(v)
+#define Z7_BSWAP32(v) __builtin_bswap32(v)
+#define Z7_BSWAP64(v) __builtin_bswap64(v)
+#define Z7_CPU_FAST_BSWAP_SUPPORTED
+
+#else
+
+#define Z7_BSWAP16(v) ((UInt16) \
+ ( ((UInt32)(v) << 8) \
+ | ((UInt32)(v) >> 8) \
+ ))
+
+#define Z7_BSWAP32(v) Z7_BSWAP32_CONST(v)
+
+#define Z7_BSWAP64(v) \
+ ( ( ( (UInt64)(v) ) << 8 * 7 ) \
+ | ( ( (UInt64)(v) & ((UInt32)0xff << 8 * 1) ) << 8 * 5 ) \
+ | ( ( (UInt64)(v) & ((UInt32)0xff << 8 * 2) ) << 8 * 3 ) \
+ | ( ( (UInt64)(v) & ((UInt32)0xff << 8 * 3) ) << 8 * 1 ) \
+ | ( ( (UInt64)(v) >> 8 * 1 ) & ((UInt32)0xff << 8 * 3) ) \
+ | ( ( (UInt64)(v) >> 8 * 3 ) & ((UInt32)0xff << 8 * 2) ) \
+ | ( ( (UInt64)(v) >> 8 * 5 ) & ((UInt32)0xff << 8 * 1) ) \
+ | ( ( (UInt64)(v) >> 8 * 7 ) ) \
+ )
+
+#endif
+
+
+
+#ifdef MY_CPU_LE
+ #if defined(MY_CPU_X86_OR_AMD64) \
+ || defined(MY_CPU_ARM64)
+ #define MY_CPU_LE_UNALIGN
+ #define MY_CPU_LE_UNALIGN_64
+ #elif defined(__ARM_FEATURE_UNALIGNED)
+ /* gcc9 for 32-bit arm can use LDRD instruction that requires 32-bit alignment.
+ So we can't use unaligned 64-bit operations. */
+ #define MY_CPU_LE_UNALIGN
+ #endif
+#endif
+
+
+#ifdef MY_CPU_LE_UNALIGN
+
+#define GetUi16(p) (*(const UInt16 *)(const void *)(p))
+#define GetUi32(p) (*(const UInt32 *)(const void *)(p))
+#ifdef MY_CPU_LE_UNALIGN_64
+#define GetUi64(p) (*(const UInt64 *)(const void *)(p))
+#define SetUi64(p, v) { *(UInt64 *)(void *)(p) = (v); }
+#endif
+
+#define SetUi16(p, v) { *(UInt16 *)(void *)(p) = (v); }
+#define SetUi32(p, v) { *(UInt32 *)(void *)(p) = (v); }
+
+#else
+
+#define GetUi16(p) ( (UInt16) ( \
+ ((const Byte *)(p))[0] | \
+ ((UInt16)((const Byte *)(p))[1] << 8) ))
+
+#define GetUi32(p) ( \
+ ((const Byte *)(p))[0] | \
+ ((UInt32)((const Byte *)(p))[1] << 8) | \
+ ((UInt32)((const Byte *)(p))[2] << 16) | \
+ ((UInt32)((const Byte *)(p))[3] << 24))
+
+#define SetUi16(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \
+ _ppp_[0] = (Byte)_vvv_; \
+ _ppp_[1] = (Byte)(_vvv_ >> 8); }
+
+#define SetUi32(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \
+ _ppp_[0] = (Byte)_vvv_; \
+ _ppp_[1] = (Byte)(_vvv_ >> 8); \
+ _ppp_[2] = (Byte)(_vvv_ >> 16); \
+ _ppp_[3] = (Byte)(_vvv_ >> 24); }
+
+#endif
+
+
+#ifndef GetUi64
+#define GetUi64(p) (GetUi32(p) | ((UInt64)GetUi32(((const Byte *)(p)) + 4) << 32))
+#endif
+
+#ifndef SetUi64
+#define SetUi64(p, v) { Byte *_ppp2_ = (Byte *)(p); UInt64 _vvv2_ = (v); \
+ SetUi32(_ppp2_ , (UInt32)_vvv2_) \
+ SetUi32(_ppp2_ + 4, (UInt32)(_vvv2_ >> 32)) }
+#endif
+
+
+#if defined(MY_CPU_LE_UNALIGN) && defined(Z7_CPU_FAST_BSWAP_SUPPORTED)
+
+#define GetBe32(p) Z7_BSWAP32 (*(const UInt32 *)(const void *)(p))
+#define SetBe32(p, v) { (*(UInt32 *)(void *)(p)) = Z7_BSWAP32(v); }
+
+#if defined(MY_CPU_LE_UNALIGN_64)
+#define GetBe64(p) Z7_BSWAP64 (*(const UInt64 *)(const void *)(p))
+#endif
+
+#else
+
+#define GetBe32(p) ( \
+ ((UInt32)((const Byte *)(p))[0] << 24) | \
+ ((UInt32)((const Byte *)(p))[1] << 16) | \
+ ((UInt32)((const Byte *)(p))[2] << 8) | \
+ ((const Byte *)(p))[3] )
+
+#define SetBe32(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \
+ _ppp_[0] = (Byte)(_vvv_ >> 24); \
+ _ppp_[1] = (Byte)(_vvv_ >> 16); \
+ _ppp_[2] = (Byte)(_vvv_ >> 8); \
+ _ppp_[3] = (Byte)_vvv_; }
+
+#endif
+
+#ifndef GetBe64
+#define GetBe64(p) (((UInt64)GetBe32(p) << 32) | GetBe32(((const Byte *)(p)) + 4))
+#endif
+
+#ifndef GetBe16
+#define GetBe16(p) ( (UInt16) ( \
+ ((UInt16)((const Byte *)(p))[0] << 8) | \
+ ((const Byte *)(p))[1] ))
+#endif
+
+
+#if defined(MY_CPU_BE)
+#define Z7_CONV_BE_TO_NATIVE_CONST32(v) (v)
+#define Z7_CONV_LE_TO_NATIVE_CONST32(v) Z7_BSWAP32_CONST(v)
+#define Z7_CONV_NATIVE_TO_BE_32(v) (v)
+#elif defined(MY_CPU_LE)
+#define Z7_CONV_BE_TO_NATIVE_CONST32(v) Z7_BSWAP32_CONST(v)
+#define Z7_CONV_LE_TO_NATIVE_CONST32(v) (v)
+#define Z7_CONV_NATIVE_TO_BE_32(v) Z7_BSWAP32(v)
+#else
+#error Stop_Compiling_Unknown_Endian_CONV
+#endif
+
+
+#if defined(MY_CPU_BE)
+
+#define GetBe32a(p) (*(const UInt32 *)(const void *)(p))
+#define GetBe16a(p) (*(const UInt16 *)(const void *)(p))
+#define SetBe32a(p, v) { *(UInt32 *)(void *)(p) = (v); }
+#define SetBe16a(p, v) { *(UInt16 *)(void *)(p) = (v); }
+
+#define GetUi32a(p) GetUi32(p)
+#define GetUi16a(p) GetUi16(p)
+#define SetUi32a(p, v) SetUi32(p, v)
+#define SetUi16a(p, v) SetUi16(p, v)
+
+#elif defined(MY_CPU_LE)
+
+#define GetUi32a(p) (*(const UInt32 *)(const void *)(p))
+#define GetUi16a(p) (*(const UInt16 *)(const void *)(p))
+#define SetUi32a(p, v) { *(UInt32 *)(void *)(p) = (v); }
+#define SetUi16a(p, v) { *(UInt16 *)(void *)(p) = (v); }
+
+#define GetBe32a(p) GetBe32(p)
+#define GetBe16a(p) GetBe16(p)
+#define SetBe32a(p, v) SetBe32(p, v)
+#define SetBe16a(p, v) SetBe16(p, v)
+
+#else
+#error Stop_Compiling_Unknown_Endian_CPU_a
+#endif
+
+
+#if defined(MY_CPU_X86_OR_AMD64) \
+ || defined(MY_CPU_ARM_OR_ARM64) \
+ || defined(MY_CPU_PPC_OR_PPC64)
+ #define Z7_CPU_FAST_ROTATE_SUPPORTED
+#endif
+
+
+#ifdef MY_CPU_X86_OR_AMD64
+
+void Z7_FASTCALL z7_x86_cpuid(UInt32 a[4], UInt32 function);
+UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void);
+#if defined(MY_CPU_AMD64)
+#define Z7_IF_X86_CPUID_SUPPORTED
+#else
+#define Z7_IF_X86_CPUID_SUPPORTED if (z7_x86_cpuid_GetMaxFunc())
+#endif
+
+BoolInt CPU_IsSupported_AES(void);
+BoolInt CPU_IsSupported_AVX(void);
+BoolInt CPU_IsSupported_AVX2(void);
+BoolInt CPU_IsSupported_VAES_AVX2(void);
+BoolInt CPU_IsSupported_CMOV(void);
+BoolInt CPU_IsSupported_SSE(void);
+BoolInt CPU_IsSupported_SSE2(void);
+BoolInt CPU_IsSupported_SSSE3(void);
+BoolInt CPU_IsSupported_SSE41(void);
+BoolInt CPU_IsSupported_SHA(void);
+BoolInt CPU_IsSupported_PageGB(void);
+
+#elif defined(MY_CPU_ARM_OR_ARM64)
+
+BoolInt CPU_IsSupported_CRC32(void);
+BoolInt CPU_IsSupported_NEON(void);
+
+#if defined(_WIN32)
+BoolInt CPU_IsSupported_CRYPTO(void);
+#define CPU_IsSupported_SHA1 CPU_IsSupported_CRYPTO
+#define CPU_IsSupported_SHA2 CPU_IsSupported_CRYPTO
+#define CPU_IsSupported_AES CPU_IsSupported_CRYPTO
+#else
+BoolInt CPU_IsSupported_SHA1(void);
+BoolInt CPU_IsSupported_SHA2(void);
+BoolInt CPU_IsSupported_AES(void);
+#endif
+
+#endif
+
+#if defined(__APPLE__)
+int z7_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize);
+int z7_sysctlbyname_Get_UInt32(const char *name, UInt32 *val);
+#endif
+
+EXTERN_C_END
+
+#endif
diff --git a/src/Common/lzma/LzFind.c b/src/Common/lzma/LzFind.c
new file mode 100644
index 00000000..0fbd5aae
--- /dev/null
+++ b/src/Common/lzma/LzFind.c
@@ -0,0 +1,1717 @@
+/* LzFind.c -- Match finder for LZ algorithms
+2023-03-14 : Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+#include <string.h>
+// #include <stdio.h>
+
+#include "CpuArch.h"
+#include "LzFind.h"
+#include "LzHash.h"
+
+#define kBlockMoveAlign (1 << 7) // alignment for memmove()
+#define kBlockSizeAlign (1 << 16) // alignment for block allocation
+#define kBlockSizeReserveMin (1 << 24) // it's 1/256 from 4 GB dictinary
+
+#define kEmptyHashValue 0
+
+#define kMaxValForNormalize ((UInt32)0)
+// #define kMaxValForNormalize ((UInt32)(1 << 20) + 0xfff) // for debug
+
+// #define kNormalizeAlign (1 << 7) // alignment for speculated accesses
+
+#define GET_AVAIL_BYTES(p) \
+ Inline_MatchFinder_GetNumAvailableBytes(p)
+
+
+// #define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size)
+#define kFix5HashSize kFix4HashSize
+
+/*
+ HASH2_CALC:
+ if (hv) match, then cur[0] and cur[1] also match
+*/
+#define HASH2_CALC hv = GetUi16(cur);
+
+// (crc[0 ... 255] & 0xFF) provides one-to-one correspondence to [0 ... 255]
+
+/*
+ HASH3_CALC:
+ if (cur[0]) and (h2) match, then cur[1] also match
+ if (cur[0]) and (hv) match, then cur[1] and cur[2] also match
+*/
+#define HASH3_CALC { \
+ UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
+ h2 = temp & (kHash2Size - 1); \
+ hv = (temp ^ ((UInt32)cur[2] << 8)) & p->hashMask; }
+
+#define HASH4_CALC { \
+ UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
+ h2 = temp & (kHash2Size - 1); \
+ temp ^= ((UInt32)cur[2] << 8); \
+ h3 = temp & (kHash3Size - 1); \
+ hv = (temp ^ (p->crc[cur[3]] << kLzHash_CrcShift_1)) & p->hashMask; }
+
+#define HASH5_CALC { \
+ UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
+ h2 = temp & (kHash2Size - 1); \
+ temp ^= ((UInt32)cur[2] << 8); \
+ h3 = temp & (kHash3Size - 1); \
+ temp ^= (p->crc[cur[3]] << kLzHash_CrcShift_1); \
+ /* h4 = temp & p->hash4Mask; */ /* (kHash4Size - 1); */ \
+ hv = (temp ^ (p->crc[cur[4]] << kLzHash_CrcShift_2)) & p->hashMask; }
+
+#define HASH_ZIP_CALC hv = ((cur[2] | ((UInt32)cur[0] << 8)) ^ p->crc[cur[1]]) & 0xFFFF;
+
+
+static void LzInWindow_Free(CMatchFinder *p, ISzAllocPtr alloc)
+{
+ // if (!p->directInput)
+ {
+ ISzAlloc_Free(alloc, p->bufBase);
+ p->bufBase = NULL;
+ }
+}
+
+
+static int LzInWindow_Create2(CMatchFinder *p, UInt32 blockSize, ISzAllocPtr alloc)
+{
+ if (blockSize == 0)
+ return 0;
+ if (!p->bufBase || p->blockSize != blockSize)
+ {
+ // size_t blockSizeT;
+ LzInWindow_Free(p, alloc);
+ p->blockSize = blockSize;
+ // blockSizeT = blockSize;
+
+ // printf("\nblockSize = 0x%x\n", blockSize);
+ /*
+ #if defined _WIN64
+ // we can allocate 4GiB, but still use UInt32 for (p->blockSize)
+ // we use UInt32 type for (p->blockSize), because
+ // we don't want to wrap over 4 GiB,
+ // when we use (p->streamPos - p->pos) that is UInt32.
+ if (blockSize >= (UInt32)0 - (UInt32)kBlockSizeAlign)
+ {
+ blockSizeT = ((size_t)1 << 32);
+ printf("\nchanged to blockSizeT = 4GiB\n");
+ }
+ #endif
+ */
+
+ p->bufBase = (Byte *)ISzAlloc_Alloc(alloc, blockSize);
+ // printf("\nbufferBase = %p\n", p->bufBase);
+ // return 0; // for debug
+ }
+ return (p->bufBase != NULL);
+}
+
+static const Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return p->buffer; }
+
+static UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder *p) { return GET_AVAIL_BYTES(p); }
+
+
+Z7_NO_INLINE
+static void MatchFinder_ReadBlock(CMatchFinder *p)
+{
+ if (p->streamEndWasReached || p->result != SZ_OK)
+ return;
+
+ /* We use (p->streamPos - p->pos) value.
+ (p->streamPos < p->pos) is allowed. */
+
+ if (p->directInput)
+ {
+ UInt32 curSize = 0xFFFFFFFF - GET_AVAIL_BYTES(p);
+ if (curSize > p->directInputRem)
+ curSize = (UInt32)p->directInputRem;
+ p->streamPos += curSize;
+ p->directInputRem -= curSize;
+ if (p->directInputRem == 0)
+ p->streamEndWasReached = 1;
+ return;
+ }
+
+ for (;;)
+ {
+ const Byte *dest = p->buffer + GET_AVAIL_BYTES(p);
+ size_t size = (size_t)(p->bufBase + p->blockSize - dest);
+ if (size == 0)
+ {
+ /* we call ReadBlock() after NeedMove() and MoveBlock().
+ NeedMove() and MoveBlock() povide more than (keepSizeAfter)
+ to the end of (blockSize).
+ So we don't execute this branch in normal code flow.
+ We can go here, if we will call ReadBlock() before NeedMove(), MoveBlock().
+ */
+ // p->result = SZ_ERROR_FAIL; // we can show error here
+ return;
+ }
+
+ // #define kRead 3
+ // if (size > kRead) size = kRead; // for debug
+
+ /*
+ // we need cast (Byte *)dest.
+ #ifdef __clang__
+ #pragma GCC diagnostic ignored "-Wcast-qual"
+ #endif
+ */
+ p->result = ISeqInStream_Read(p->stream,
+ p->bufBase + (dest - p->bufBase), &size);
+ if (p->result != SZ_OK)
+ return;
+ if (size == 0)
+ {
+ p->streamEndWasReached = 1;
+ return;
+ }
+ p->streamPos += (UInt32)size;
+ if (GET_AVAIL_BYTES(p) > p->keepSizeAfter)
+ return;
+ /* here and in another (p->keepSizeAfter) checks we keep on 1 byte more than was requested by Create() function
+ (GET_AVAIL_BYTES(p) >= p->keepSizeAfter) - minimal required size */
+ }
+
+ // on exit: (p->result != SZ_OK || p->streamEndWasReached || GET_AVAIL_BYTES(p) > p->keepSizeAfter)
+}
+
+
+
+Z7_NO_INLINE
+void MatchFinder_MoveBlock(CMatchFinder *p)
+{
+ const size_t offset = (size_t)(p->buffer - p->bufBase) - p->keepSizeBefore;
+ const size_t keepBefore = (offset & (kBlockMoveAlign - 1)) + p->keepSizeBefore;
+ p->buffer = p->bufBase + keepBefore;
+ memmove(p->bufBase,
+ p->bufBase + (offset & ~((size_t)kBlockMoveAlign - 1)),
+ keepBefore + (size_t)GET_AVAIL_BYTES(p));
+}
+
+/* We call MoveBlock() before ReadBlock().
+ So MoveBlock() can be wasteful operation, if the whole input data
+ can fit in current block even without calling MoveBlock().
+ in important case where (dataSize <= historySize)
+ condition (p->blockSize > dataSize + p->keepSizeAfter) is met
+ So there is no MoveBlock() in that case case.
+*/
+
+int MatchFinder_NeedMove(CMatchFinder *p)
+{
+ if (p->directInput)
+ return 0;
+ if (p->streamEndWasReached || p->result != SZ_OK)
+ return 0;
+ return ((size_t)(p->bufBase + p->blockSize - p->buffer) <= p->keepSizeAfter);
+}
+
+void MatchFinder_ReadIfRequired(CMatchFinder *p)
+{
+ if (p->keepSizeAfter >= GET_AVAIL_BYTES(p))
+ MatchFinder_ReadBlock(p);
+}
+
+
+
+static void MatchFinder_SetDefaultSettings(CMatchFinder *p)
+{
+ p->cutValue = 32;
+ p->btMode = 1;
+ p->numHashBytes = 4;
+ p->numHashBytes_Min = 2;
+ p->numHashOutBits = 0;
+ p->bigHash = 0;
+}
+
+#define kCrcPoly 0xEDB88320
+
+void MatchFinder_Construct(CMatchFinder *p)
+{
+ unsigned i;
+ p->buffer = NULL;
+ p->bufBase = NULL;
+ p->directInput = 0;
+ p->stream = NULL;
+ p->hash = NULL;
+ p->expectedDataSize = (UInt64)(Int64)-1;
+ MatchFinder_SetDefaultSettings(p);
+
+ for (i = 0; i < 256; i++)
+ {
+ UInt32 r = (UInt32)i;
+ unsigned j;
+ for (j = 0; j < 8; j++)
+ r = (r >> 1) ^ (kCrcPoly & ((UInt32)0 - (r & 1)));
+ p->crc[i] = r;
+ }
+}
+
+#undef kCrcPoly
+
+static void MatchFinder_FreeThisClassMemory(CMatchFinder *p, ISzAllocPtr alloc)
+{
+ ISzAlloc_Free(alloc, p->hash);
+ p->hash = NULL;
+}
+
+void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc)
+{
+ MatchFinder_FreeThisClassMemory(p, alloc);
+ LzInWindow_Free(p, alloc);
+}
+
+static CLzRef* AllocRefs(size_t num, ISzAllocPtr alloc)
+{
+ const size_t sizeInBytes = (size_t)num * sizeof(CLzRef);
+ if (sizeInBytes / sizeof(CLzRef) != num)
+ return NULL;
+ return (CLzRef *)ISzAlloc_Alloc(alloc, sizeInBytes);
+}
+
+#if (kBlockSizeReserveMin < kBlockSizeAlign * 2)
+ #error Stop_Compiling_Bad_Reserve
+#endif
+
+
+
+static UInt32 GetBlockSize(CMatchFinder *p, UInt32 historySize)
+{
+ UInt32 blockSize = (p->keepSizeBefore + p->keepSizeAfter);
+ /*
+ if (historySize > kMaxHistorySize)
+ return 0;
+ */
+ // printf("\nhistorySize == 0x%x\n", historySize);
+
+ if (p->keepSizeBefore < historySize || blockSize < p->keepSizeBefore) // if 32-bit overflow
+ return 0;
+
+ {
+ const UInt32 kBlockSizeMax = (UInt32)0 - (UInt32)kBlockSizeAlign;
+ const UInt32 rem = kBlockSizeMax - blockSize;
+ const UInt32 reserve = (blockSize >> (blockSize < ((UInt32)1 << 30) ? 1 : 2))
+ + (1 << 12) + kBlockMoveAlign + kBlockSizeAlign; // do not overflow 32-bit here
+ if (blockSize >= kBlockSizeMax
+ || rem < kBlockSizeReserveMin) // we reject settings that will be slow
+ return 0;
+ if (reserve >= rem)
+ blockSize = kBlockSizeMax;
+ else
+ {
+ blockSize += reserve;
+ blockSize &= ~(UInt32)(kBlockSizeAlign - 1);
+ }
+ }
+ // printf("\n LzFind_blockSize = %x\n", blockSize);
+ // printf("\n LzFind_blockSize = %d\n", blockSize >> 20);
+ return blockSize;
+}
+
+
+// input is historySize
+static UInt32 MatchFinder_GetHashMask2(CMatchFinder *p, UInt32 hs)
+{
+ if (p->numHashBytes == 2)
+ return (1 << 16) - 1;
+ if (hs != 0)
+ hs--;
+ hs |= (hs >> 1);
+ hs |= (hs >> 2);
+ hs |= (hs >> 4);
+ hs |= (hs >> 8);
+ // we propagated 16 bits in (hs). Low 16 bits must be set later
+ if (hs >= (1 << 24))
+ {
+ if (p->numHashBytes == 3)
+ hs = (1 << 24) - 1;
+ /* if (bigHash) mode, GetHeads4b() in LzFindMt.c needs (hs >= ((1 << 24) - 1))) */
+ }
+ // (hash_size >= (1 << 16)) : Required for (numHashBytes > 2)
+ hs |= (1 << 16) - 1; /* don't change it! */
+ // bt5: we adjust the size with recommended minimum size
+ if (p->numHashBytes >= 5)
+ hs |= (256 << kLzHash_CrcShift_2) - 1;
+ return hs;
+}
+
+// input is historySize
+static UInt32 MatchFinder_GetHashMask(CMatchFinder *p, UInt32 hs)
+{
+ if (p->numHashBytes == 2)
+ return (1 << 16) - 1;
+ if (hs != 0)
+ hs--;
+ hs |= (hs >> 1);
+ hs |= (hs >> 2);
+ hs |= (hs >> 4);
+ hs |= (hs >> 8);
+ // we propagated 16 bits in (hs). Low 16 bits must be set later
+ hs >>= 1;
+ if (hs >= (1 << 24))
+ {
+ if (p->numHashBytes == 3)
+ hs = (1 << 24) - 1;
+ else
+ hs >>= 1;
+ /* if (bigHash) mode, GetHeads4b() in LzFindMt.c needs (hs >= ((1 << 24) - 1))) */
+ }
+ // (hash_size >= (1 << 16)) : Required for (numHashBytes > 2)
+ hs |= (1 << 16) - 1; /* don't change it! */
+ // bt5: we adjust the size with recommended minimum size
+ if (p->numHashBytes >= 5)
+ hs |= (256 << kLzHash_CrcShift_2) - 1;
+ return hs;
+}
+
+
+int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
+ UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter,
+ ISzAllocPtr alloc)
+{
+ /* we need one additional byte in (p->keepSizeBefore),
+ since we use MoveBlock() after (p->pos++) and before dictionary using */
+ // keepAddBufferBefore = (UInt32)0xFFFFFFFF - (1 << 22); // for debug
+ p->keepSizeBefore = historySize + keepAddBufferBefore + 1;
+
+ keepAddBufferAfter += matchMaxLen;
+ /* we need (p->keepSizeAfter >= p->numHashBytes) */
+ if (keepAddBufferAfter < p->numHashBytes)
+ keepAddBufferAfter = p->numHashBytes;
+ // keepAddBufferAfter -= 2; // for debug
+ p->keepSizeAfter = keepAddBufferAfter;
+
+ if (p->directInput)
+ p->blockSize = 0;
+ if (p->directInput || LzInWindow_Create2(p, GetBlockSize(p, historySize), alloc))
+ {
+ size_t hashSizeSum;
+ {
+ UInt32 hs;
+ UInt32 hsCur;
+
+ if (p->numHashOutBits != 0)
+ {
+ unsigned numBits = p->numHashOutBits;
+ const unsigned nbMax =
+ (p->numHashBytes == 2 ? 16 :
+ (p->numHashBytes == 3 ? 24 : 32));
+ if (numBits > nbMax)
+ numBits = nbMax;
+ if (numBits >= 32)
+ hs = (UInt32)0 - 1;
+ else
+ hs = ((UInt32)1 << numBits) - 1;
+ // (hash_size >= (1 << 16)) : Required for (numHashBytes > 2)
+ hs |= (1 << 16) - 1; /* don't change it! */
+ if (p->numHashBytes >= 5)
+ hs |= (256 << kLzHash_CrcShift_2) - 1;
+ {
+ const UInt32 hs2 = MatchFinder_GetHashMask2(p, historySize);
+ if (hs > hs2)
+ hs = hs2;
+ }
+ hsCur = hs;
+ if (p->expectedDataSize < historySize)
+ {
+ const UInt32 hs2 = MatchFinder_GetHashMask2(p, (UInt32)p->expectedDataSize);
+ if (hsCur > hs2)
+ hsCur = hs2;
+ }
+ }
+ else
+ {
+ hs = MatchFinder_GetHashMask(p, historySize);
+ hsCur = hs;
+ if (p->expectedDataSize < historySize)
+ {
+ hsCur = MatchFinder_GetHashMask(p, (UInt32)p->expectedDataSize);
+ if (hsCur > hs) // is it possible?
+ hsCur = hs;
+ }
+ }
+
+ p->hashMask = hsCur;
+
+ hashSizeSum = hs;
+ hashSizeSum++;
+ if (hashSizeSum < hs)
+ return 0;
+ {
+ UInt32 fixedHashSize = 0;
+ if (p->numHashBytes > 2 && p->numHashBytes_Min <= 2) fixedHashSize += kHash2Size;
+ if (p->numHashBytes > 3 && p->numHashBytes_Min <= 3) fixedHashSize += kHash3Size;
+ // if (p->numHashBytes > 4) p->fixedHashSize += hs4; // kHash4Size;
+ hashSizeSum += fixedHashSize;
+ p->fixedHashSize = fixedHashSize;
+ }
+ }
+
+ p->matchMaxLen = matchMaxLen;
+
+ {
+ size_t newSize;
+ size_t numSons;
+ const UInt32 newCyclicBufferSize = historySize + 1; // do not change it
+ p->historySize = historySize;
+ p->cyclicBufferSize = newCyclicBufferSize; // it must be = (historySize + 1)
+
+ numSons = newCyclicBufferSize;
+ if (p->btMode)
+ numSons <<= 1;
+ newSize = hashSizeSum + numSons;
+
+ if (numSons < newCyclicBufferSize || newSize < numSons)
+ return 0;
+
+ // aligned size is not required here, but it can be better for some loops
+ #define NUM_REFS_ALIGN_MASK 0xF
+ newSize = (newSize + NUM_REFS_ALIGN_MASK) & ~(size_t)NUM_REFS_ALIGN_MASK;
+
+ // 22.02: we don't reallocate buffer, if old size is enough
+ if (p->hash && p->numRefs >= newSize)
+ return 1;
+
+ MatchFinder_FreeThisClassMemory(p, alloc);
+ p->numRefs = newSize;
+ p->hash = AllocRefs(newSize, alloc);
+
+ if (p->hash)
+ {
+ p->son = p->hash + hashSizeSum;
+ return 1;
+ }
+ }
+ }
+
+ MatchFinder_Free(p, alloc);
+ return 0;
+}
+
+
+static void MatchFinder_SetLimits(CMatchFinder *p)
+{
+ UInt32 k;
+ UInt32 n = kMaxValForNormalize - p->pos;
+ if (n == 0)
+ n = (UInt32)(Int32)-1; // we allow (pos == 0) at start even with (kMaxValForNormalize == 0)
+
+ k = p->cyclicBufferSize - p->cyclicBufferPos;
+ if (k < n)
+ n = k;
+
+ k = GET_AVAIL_BYTES(p);
+ {
+ const UInt32 ksa = p->keepSizeAfter;
+ UInt32 mm = p->matchMaxLen;
+ if (k > ksa)
+ k -= ksa; // we must limit exactly to keepSizeAfter for ReadBlock
+ else if (k >= mm)
+ {
+ // the limitation for (p->lenLimit) update
+ k -= mm; // optimization : to reduce the number of checks
+ k++;
+ // k = 1; // non-optimized version : for debug
+ }
+ else
+ {
+ mm = k;
+ if (k != 0)
+ k = 1;
+ }
+ p->lenLimit = mm;
+ }
+ if (k < n)
+ n = k;
+
+ p->posLimit = p->pos + n;
+}
+
+
+void MatchFinder_Init_LowHash(CMatchFinder *p)
+{
+ size_t i;
+ CLzRef *items = p->hash;
+ const size_t numItems = p->fixedHashSize;
+ for (i = 0; i < numItems; i++)
+ items[i] = kEmptyHashValue;
+}
+
+
+void MatchFinder_Init_HighHash(CMatchFinder *p)
+{
+ size_t i;
+ CLzRef *items = p->hash + p->fixedHashSize;
+ const size_t numItems = (size_t)p->hashMask + 1;
+ for (i = 0; i < numItems; i++)
+ items[i] = kEmptyHashValue;
+}
+
+
+void MatchFinder_Init_4(CMatchFinder *p)
+{
+ if (!p->directInput)
+ p->buffer = p->bufBase;
+ {
+ /* kEmptyHashValue = 0 (Zero) is used in hash tables as NO-VALUE marker.
+ the code in CMatchFinderMt expects (pos = 1) */
+ p->pos =
+ p->streamPos =
+ 1; // it's smallest optimal value. do not change it
+ // 0; // for debug
+ }
+ p->result = SZ_OK;
+ p->streamEndWasReached = 0;
+}
+
+
+// (CYC_TO_POS_OFFSET == 0) is expected by some optimized code
+#define CYC_TO_POS_OFFSET 0
+// #define CYC_TO_POS_OFFSET 1 // for debug
+
+void MatchFinder_Init(CMatchFinder *p)
+{
+ MatchFinder_Init_HighHash(p);
+ MatchFinder_Init_LowHash(p);
+ MatchFinder_Init_4(p);
+ // if (readData)
+ MatchFinder_ReadBlock(p);
+
+ /* if we init (cyclicBufferPos = pos), then we can use one variable
+ instead of both (cyclicBufferPos) and (pos) : only before (cyclicBufferPos) wrapping */
+ p->cyclicBufferPos = (p->pos - CYC_TO_POS_OFFSET); // init with relation to (pos)
+ // p->cyclicBufferPos = 0; // smallest value
+ // p->son[0] = p->son[1] = 0; // unused: we can init skipped record for speculated accesses.
+ MatchFinder_SetLimits(p);
+}
+
+
+
+#ifdef MY_CPU_X86_OR_AMD64
+ #if defined(__clang__) && (__clang_major__ >= 4) \
+ || defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40701)
+ // || defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1900)
+
+ #define USE_LZFIND_SATUR_SUB_128
+ #define USE_LZFIND_SATUR_SUB_256
+ #define LZFIND_ATTRIB_SSE41 __attribute__((__target__("sse4.1")))
+ #define LZFIND_ATTRIB_AVX2 __attribute__((__target__("avx2")))
+ #elif defined(_MSC_VER)
+ #if (_MSC_VER >= 1600)
+ #define USE_LZFIND_SATUR_SUB_128
+ #endif
+ #if (_MSC_VER >= 1900)
+ #define USE_LZFIND_SATUR_SUB_256
+ #endif
+ #endif
+
+// #elif defined(MY_CPU_ARM_OR_ARM64)
+#elif defined(MY_CPU_ARM64)
+
+ #if defined(__clang__) && (__clang_major__ >= 8) \
+ || defined(__GNUC__) && (__GNUC__ >= 8)
+ #define USE_LZFIND_SATUR_SUB_128
+ #ifdef MY_CPU_ARM64
+ // #define LZFIND_ATTRIB_SSE41 __attribute__((__target__("")))
+ #else
+ // #define LZFIND_ATTRIB_SSE41 __attribute__((__target__("fpu=crypto-neon-fp-armv8")))
+ #endif
+
+ #elif defined(_MSC_VER)
+ #if (_MSC_VER >= 1910)
+ #define USE_LZFIND_SATUR_SUB_128
+ #endif
+ #endif
+
+ #if defined(_MSC_VER) && defined(MY_CPU_ARM64)
+ #include <arm64_neon.h>
+ #else
+ #include <arm_neon.h>
+ #endif
+
+#endif
+
+
+#ifdef USE_LZFIND_SATUR_SUB_128
+
+// #define Z7_SHOW_HW_STATUS
+
+#ifdef Z7_SHOW_HW_STATUS
+#include <stdio.h>
+#define PRF(x) x
+PRF(;)
+#else
+#define PRF(x)
+#endif
+
+
+#ifdef MY_CPU_ARM_OR_ARM64
+
+#ifdef MY_CPU_ARM64
+// #define FORCE_LZFIND_SATUR_SUB_128
+#endif
+typedef uint32x4_t LzFind_v128;
+#define SASUB_128_V(v, s) \
+ vsubq_u32(vmaxq_u32(v, s), s)
+
+#else // MY_CPU_ARM_OR_ARM64
+
+#include <smmintrin.h> // sse4.1
+
+typedef __m128i LzFind_v128;
+// SSE 4.1
+#define SASUB_128_V(v, s) \
+ _mm_sub_epi32(_mm_max_epu32(v, s), s)
+
+#endif // MY_CPU_ARM_OR_ARM64
+
+
+#define SASUB_128(i) \
+ *( LzFind_v128 *)( void *)(items + (i) * 4) = SASUB_128_V( \
+ *(const LzFind_v128 *)(const void *)(items + (i) * 4), sub2);
+
+
+Z7_NO_INLINE
+static
+#ifdef LZFIND_ATTRIB_SSE41
+LZFIND_ATTRIB_SSE41
+#endif
+void
+Z7_FASTCALL
+LzFind_SaturSub_128(UInt32 subValue, CLzRef *items, const CLzRef *lim)
+{
+ const LzFind_v128 sub2 =
+ #ifdef MY_CPU_ARM_OR_ARM64
+ vdupq_n_u32(subValue);
+ #else
+ _mm_set_epi32((Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue);
+ #endif
+ Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
+ do
+ {
+ SASUB_128(0) SASUB_128(1) items += 2 * 4;
+ SASUB_128(0) SASUB_128(1) items += 2 * 4;
+ }
+ while (items != lim);
+}
+
+
+
+#ifdef USE_LZFIND_SATUR_SUB_256
+
+#include <immintrin.h> // avx
+/*
+clang :immintrin.h uses
+#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
+ defined(__AVX2__)
+#include <avx2intrin.h>
+#endif
+so we need <avxintrin.h> for clang-cl */
+
+#if defined(__clang__)
+#include <avxintrin.h>
+#include <avx2intrin.h>
+#endif
+
+// AVX2:
+#define SASUB_256(i) \
+ *( __m256i *)( void *)(items + (i) * 8) = \
+ _mm256_sub_epi32(_mm256_max_epu32( \
+ *(const __m256i *)(const void *)(items + (i) * 8), sub2), sub2);
+
+Z7_NO_INLINE
+static
+#ifdef LZFIND_ATTRIB_AVX2
+LZFIND_ATTRIB_AVX2
+#endif
+void
+Z7_FASTCALL
+LzFind_SaturSub_256(UInt32 subValue, CLzRef *items, const CLzRef *lim)
+{
+ const __m256i sub2 = _mm256_set_epi32(
+ (Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue,
+ (Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue);
+ Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
+ do
+ {
+ SASUB_256(0) SASUB_256(1) items += 2 * 8;
+ SASUB_256(0) SASUB_256(1) items += 2 * 8;
+ }
+ while (items != lim);
+}
+#endif // USE_LZFIND_SATUR_SUB_256
+
+#ifndef FORCE_LZFIND_SATUR_SUB_128
+typedef void (Z7_FASTCALL *LZFIND_SATUR_SUB_CODE_FUNC)(
+ UInt32 subValue, CLzRef *items, const CLzRef *lim);
+static LZFIND_SATUR_SUB_CODE_FUNC g_LzFind_SaturSub;
+#endif // FORCE_LZFIND_SATUR_SUB_128
+
+#endif // USE_LZFIND_SATUR_SUB_128
+
+
+// kEmptyHashValue must be zero
+// #define SASUB_32(i) { UInt32 v = items[i]; UInt32 m = v - subValue; if (v < subValue) m = kEmptyHashValue; items[i] = m; }
+#define SASUB_32(i) { UInt32 v = items[i]; if (v < subValue) v = subValue; items[i] = v - subValue; }
+
+#ifdef FORCE_LZFIND_SATUR_SUB_128
+
+#define DEFAULT_SaturSub LzFind_SaturSub_128
+
+#else
+
+#define DEFAULT_SaturSub LzFind_SaturSub_32
+
+Z7_NO_INLINE
+static
+void
+Z7_FASTCALL
+LzFind_SaturSub_32(UInt32 subValue, CLzRef *items, const CLzRef *lim)
+{
+ Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
+ do
+ {
+ SASUB_32(0) SASUB_32(1) items += 2;
+ SASUB_32(0) SASUB_32(1) items += 2;
+ SASUB_32(0) SASUB_32(1) items += 2;
+ SASUB_32(0) SASUB_32(1) items += 2;
+ }
+ while (items != lim);
+}
+
+#endif
+
+
+Z7_NO_INLINE
+void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems)
+{
+ #define LZFIND_NORM_ALIGN_BLOCK_SIZE (1 << 7)
+ Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
+ for (; numItems != 0 && ((unsigned)(ptrdiff_t)items & (LZFIND_NORM_ALIGN_BLOCK_SIZE - 1)) != 0; numItems--)
+ {
+ SASUB_32(0)
+ items++;
+ }
+ {
+ const size_t k_Align_Mask = (LZFIND_NORM_ALIGN_BLOCK_SIZE / 4 - 1);
+ CLzRef *lim = items + (numItems & ~(size_t)k_Align_Mask);
+ numItems &= k_Align_Mask;
+ if (items != lim)
+ {
+ #if defined(USE_LZFIND_SATUR_SUB_128) && !defined(FORCE_LZFIND_SATUR_SUB_128)
+ if (g_LzFind_SaturSub)
+ g_LzFind_SaturSub(subValue, items, lim);
+ else
+ #endif
+ DEFAULT_SaturSub(subValue, items, lim);
+ }
+ items = lim;
+ }
+ Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
+ for (; numItems != 0; numItems--)
+ {
+ SASUB_32(0)
+ items++;
+ }
+}
+
+
+
+// call MatchFinder_CheckLimits() only after (p->pos++) update
+
+Z7_NO_INLINE
+static void MatchFinder_CheckLimits(CMatchFinder *p)
+{
+ if (// !p->streamEndWasReached && p->result == SZ_OK &&
+ p->keepSizeAfter == GET_AVAIL_BYTES(p))
+ {
+ // we try to read only in exact state (p->keepSizeAfter == GET_AVAIL_BYTES(p))
+ if (MatchFinder_NeedMove(p))
+ MatchFinder_MoveBlock(p);
+ MatchFinder_ReadBlock(p);
+ }
+
+ if (p->pos == kMaxValForNormalize)
+ if (GET_AVAIL_BYTES(p) >= p->numHashBytes) // optional optimization for last bytes of data.
+ /*
+ if we disable normalization for last bytes of data, and
+ if (data_size == 4 GiB), we don't call wastfull normalization,
+ but (pos) will be wrapped over Zero (0) in that case.
+ And we cannot resume later to normal operation
+ */
+ {
+ // MatchFinder_Normalize(p);
+ /* after normalization we need (p->pos >= p->historySize + 1); */
+ /* we can reduce subValue to aligned value, if want to keep alignment
+ of (p->pos) and (p->buffer) for speculated accesses. */
+ const UInt32 subValue = (p->pos - p->historySize - 1) /* & ~(UInt32)(kNormalizeAlign - 1) */;
+ // const UInt32 subValue = (1 << 15); // for debug
+ // printf("\nMatchFinder_Normalize() subValue == 0x%x\n", subValue);
+ MatchFinder_REDUCE_OFFSETS(p, subValue)
+ MatchFinder_Normalize3(subValue, p->hash, (size_t)p->hashMask + 1 + p->fixedHashSize);
+ {
+ size_t numSonRefs = p->cyclicBufferSize;
+ if (p->btMode)
+ numSonRefs <<= 1;
+ MatchFinder_Normalize3(subValue, p->son, numSonRefs);
+ }
+ }
+
+ if (p->cyclicBufferPos == p->cyclicBufferSize)
+ p->cyclicBufferPos = 0;
+
+ MatchFinder_SetLimits(p);
+}
+
+
+/*
+ (lenLimit > maxLen)
+*/
+Z7_FORCE_INLINE
+static UInt32 * Hc_GetMatchesSpec(size_t lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
+ size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,
+ UInt32 *d, unsigned maxLen)
+{
+ /*
+ son[_cyclicBufferPos] = curMatch;
+ for (;;)
+ {
+ UInt32 delta = pos - curMatch;
+ if (cutValue-- == 0 || delta >= _cyclicBufferSize)
+ return d;
+ {
+ const Byte *pb = cur - delta;
+ curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)];
+ if (pb[maxLen] == cur[maxLen] && *pb == *cur)
+ {
+ UInt32 len = 0;
+ while (++len != lenLimit)
+ if (pb[len] != cur[len])
+ break;
+ if (maxLen < len)
+ {
+ maxLen = len;
+ *d++ = len;
+ *d++ = delta - 1;
+ if (len == lenLimit)
+ return d;
+ }
+ }
+ }
+ }
+ */
+
+ const Byte *lim = cur + lenLimit;
+ son[_cyclicBufferPos] = curMatch;
+
+ do
+ {
+ UInt32 delta;
+
+ if (curMatch == 0)
+ break;
+ // if (curMatch2 >= curMatch) return NULL;
+ delta = pos - curMatch;
+ if (delta >= _cyclicBufferSize)
+ break;
+ {
+ ptrdiff_t diff;
+ curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)];
+ diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
+ if (cur[maxLen] == cur[(ptrdiff_t)maxLen + diff])
+ {
+ const Byte *c = cur;
+ while (*c == c[diff])
+ {
+ if (++c == lim)
+ {
+ d[0] = (UInt32)(lim - cur);
+ d[1] = delta - 1;
+ return d + 2;
+ }
+ }
+ {
+ const unsigned len = (unsigned)(c - cur);
+ if (maxLen < len)
+ {
+ maxLen = len;
+ d[0] = (UInt32)len;
+ d[1] = delta - 1;
+ d += 2;
+ }
+ }
+ }
+ }
+ }
+ while (--cutValue);
+
+ return d;
+}
+
+
+Z7_FORCE_INLINE
+UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
+ size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,
+ UInt32 *d, UInt32 maxLen)
+{
+ CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
+ CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
+ unsigned len0 = 0, len1 = 0;
+
+ UInt32 cmCheck;
+
+ // if (curMatch >= pos) { *ptr0 = *ptr1 = kEmptyHashValue; return NULL; }
+
+ cmCheck = (UInt32)(pos - _cyclicBufferSize);
+ if ((UInt32)pos <= _cyclicBufferSize)
+ cmCheck = 0;
+
+ if (cmCheck < curMatch)
+ do
+ {
+ const UInt32 delta = pos - curMatch;
+ {
+ CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1);
+ const Byte *pb = cur - delta;
+ unsigned len = (len0 < len1 ? len0 : len1);
+ const UInt32 pair0 = pair[0];
+ if (pb[len] == cur[len])
+ {
+ if (++len != lenLimit && pb[len] == cur[len])
+ while (++len != lenLimit)
+ if (pb[len] != cur[len])
+ break;
+ if (maxLen < len)
+ {
+ maxLen = (UInt32)len;
+ *d++ = (UInt32)len;
+ *d++ = delta - 1;
+ if (len == lenLimit)
+ {
+ *ptr1 = pair0;
+ *ptr0 = pair[1];
+ return d;
+ }
+ }
+ }
+ if (pb[len] < cur[len])
+ {
+ *ptr1 = curMatch;
+ // const UInt32 curMatch2 = pair[1];
+ // if (curMatch2 >= curMatch) { *ptr0 = *ptr1 = kEmptyHashValue; return NULL; }
+ // curMatch = curMatch2;
+ curMatch = pair[1];
+ ptr1 = pair + 1;
+ len1 = len;
+ }
+ else
+ {
+ *ptr0 = curMatch;
+ curMatch = pair[0];
+ ptr0 = pair;
+ len0 = len;
+ }
+ }
+ }
+ while(--cutValue && cmCheck < curMatch);
+
+ *ptr0 = *ptr1 = kEmptyHashValue;
+ return d;
+}
+
+
+static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
+ size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue)
+{
+ CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
+ CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
+ unsigned len0 = 0, len1 = 0;
+
+ UInt32 cmCheck;
+
+ cmCheck = (UInt32)(pos - _cyclicBufferSize);
+ if ((UInt32)pos <= _cyclicBufferSize)
+ cmCheck = 0;
+
+ if (// curMatch >= pos || // failure
+ cmCheck < curMatch)
+ do
+ {
+ const UInt32 delta = pos - curMatch;
+ {
+ CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1);
+ const Byte *pb = cur - delta;
+ unsigned len = (len0 < len1 ? len0 : len1);
+ if (pb[len] == cur[len])
+ {
+ while (++len != lenLimit)
+ if (pb[len] != cur[len])
+ break;
+ {
+ if (len == lenLimit)
+ {
+ *ptr1 = pair[0];
+ *ptr0 = pair[1];
+ return;
+ }
+ }
+ }
+ if (pb[len] < cur[len])
+ {
+ *ptr1 = curMatch;
+ curMatch = pair[1];
+ ptr1 = pair + 1;
+ len1 = len;
+ }
+ else
+ {
+ *ptr0 = curMatch;
+ curMatch = pair[0];
+ ptr0 = pair;
+ len0 = len;
+ }
+ }
+ }
+ while(--cutValue && cmCheck < curMatch);
+
+ *ptr0 = *ptr1 = kEmptyHashValue;
+ return;
+}
+
+
+#define MOVE_POS \
+ ++p->cyclicBufferPos; \
+ p->buffer++; \
+ { const UInt32 pos1 = p->pos + 1; p->pos = pos1; if (pos1 == p->posLimit) MatchFinder_CheckLimits(p); }
+
+#define MOVE_POS_RET MOVE_POS return distances;
+
+Z7_NO_INLINE
+static void MatchFinder_MovePos(CMatchFinder *p)
+{
+ /* we go here at the end of stream data, when (avail < num_hash_bytes)
+ We don't update sons[cyclicBufferPos << btMode].
+ So (sons) record will contain junk. And we cannot resume match searching
+ to normal operation, even if we will provide more input data in buffer.
+ p->sons[p->cyclicBufferPos << p->btMode] = 0; // kEmptyHashValue
+ if (p->btMode)
+ p->sons[(p->cyclicBufferPos << p->btMode) + 1] = 0; // kEmptyHashValue
+ */
+ MOVE_POS
+}
+
+#define GET_MATCHES_HEADER2(minLen, ret_op) \
+ unsigned lenLimit; UInt32 hv; const Byte *cur; UInt32 curMatch; \
+ lenLimit = (unsigned)p->lenLimit; { if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; }} \
+ cur = p->buffer;
+
+#define GET_MATCHES_HEADER(minLen) GET_MATCHES_HEADER2(minLen, return distances)
+#define SKIP_HEADER(minLen) do { GET_MATCHES_HEADER2(minLen, continue)
+
+#define MF_PARAMS(p) lenLimit, curMatch, p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue
+
+#define SKIP_FOOTER SkipMatchesSpec(MF_PARAMS(p)); MOVE_POS } while (--num);
+
+#define GET_MATCHES_FOOTER_BASE(_maxLen_, func) \
+ distances = func(MF_PARAMS(p), \
+ distances, (UInt32)_maxLen_); MOVE_POS_RET
+
+#define GET_MATCHES_FOOTER_BT(_maxLen_) \
+ GET_MATCHES_FOOTER_BASE(_maxLen_, GetMatchesSpec1)
+
+#define GET_MATCHES_FOOTER_HC(_maxLen_) \
+ GET_MATCHES_FOOTER_BASE(_maxLen_, Hc_GetMatchesSpec)
+
+
+
+#define UPDATE_maxLen { \
+ const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)d2; \
+ const Byte *c = cur + maxLen; \
+ const Byte *lim = cur + lenLimit; \
+ for (; c != lim; c++) if (*(c + diff) != *c) break; \
+ maxLen = (unsigned)(c - cur); }
+
+static UInt32* Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+{
+ GET_MATCHES_HEADER(2)
+ HASH2_CALC
+ curMatch = p->hash[hv];
+ p->hash[hv] = p->pos;
+ GET_MATCHES_FOOTER_BT(1)
+}
+
+UInt32* Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+{
+ GET_MATCHES_HEADER(3)
+ HASH_ZIP_CALC
+ curMatch = p->hash[hv];
+ p->hash[hv] = p->pos;
+ GET_MATCHES_FOOTER_BT(2)
+}
+
+
+#define SET_mmm \
+ mmm = p->cyclicBufferSize; \
+ if (pos < mmm) \
+ mmm = pos;
+
+
+static UInt32* Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+{
+ UInt32 mmm;
+ UInt32 h2, d2, pos;
+ unsigned maxLen;
+ UInt32 *hash;
+ GET_MATCHES_HEADER(3)
+
+ HASH3_CALC
+
+ hash = p->hash;
+ pos = p->pos;
+
+ d2 = pos - hash[h2];
+
+ curMatch = (hash + kFix3HashSize)[hv];
+
+ hash[h2] = pos;
+ (hash + kFix3HashSize)[hv] = pos;
+
+ SET_mmm
+
+ maxLen = 2;
+
+ if (d2 < mmm && *(cur - d2) == *cur)
+ {
+ UPDATE_maxLen
+ distances[0] = (UInt32)maxLen;
+ distances[1] = d2 - 1;
+ distances += 2;
+ if (maxLen == lenLimit)
+ {
+ SkipMatchesSpec(MF_PARAMS(p));
+ MOVE_POS_RET
+ }
+ }
+
+ GET_MATCHES_FOOTER_BT(maxLen)
+}
+
+
+static UInt32* Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+{
+ UInt32 mmm;
+ UInt32 h2, h3, d2, d3, pos;
+ unsigned maxLen;
+ UInt32 *hash;
+ GET_MATCHES_HEADER(4)
+
+ HASH4_CALC
+
+ hash = p->hash;
+ pos = p->pos;
+
+ d2 = pos - hash [h2];
+ d3 = pos - (hash + kFix3HashSize)[h3];
+ curMatch = (hash + kFix4HashSize)[hv];
+
+ hash [h2] = pos;
+ (hash + kFix3HashSize)[h3] = pos;
+ (hash + kFix4HashSize)[hv] = pos;
+
+ SET_mmm
+
+ maxLen = 3;
+
+ for (;;)
+ {
+ if (d2 < mmm && *(cur - d2) == *cur)
+ {
+ distances[0] = 2;
+ distances[1] = d2 - 1;
+ distances += 2;
+ if (*(cur - d2 + 2) == cur[2])
+ {
+ // distances[-2] = 3;
+ }
+ else if (d3 < mmm && *(cur - d3) == *cur)
+ {
+ d2 = d3;
+ distances[1] = d3 - 1;
+ distances += 2;
+ }
+ else
+ break;
+ }
+ else if (d3 < mmm && *(cur - d3) == *cur)
+ {
+ d2 = d3;
+ distances[1] = d3 - 1;
+ distances += 2;
+ }
+ else
+ break;
+
+ UPDATE_maxLen
+ distances[-2] = (UInt32)maxLen;
+ if (maxLen == lenLimit)
+ {
+ SkipMatchesSpec(MF_PARAMS(p));
+ MOVE_POS_RET
+ }
+ break;
+ }
+
+ GET_MATCHES_FOOTER_BT(maxLen)
+}
+
+
+static UInt32* Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+{
+ UInt32 mmm;
+ UInt32 h2, h3, d2, d3, maxLen, pos;
+ UInt32 *hash;
+ GET_MATCHES_HEADER(5)
+
+ HASH5_CALC
+
+ hash = p->hash;
+ pos = p->pos;
+
+ d2 = pos - hash [h2];
+ d3 = pos - (hash + kFix3HashSize)[h3];
+ // d4 = pos - (hash + kFix4HashSize)[h4];
+
+ curMatch = (hash + kFix5HashSize)[hv];
+
+ hash [h2] = pos;
+ (hash + kFix3HashSize)[h3] = pos;
+ // (hash + kFix4HashSize)[h4] = pos;
+ (hash + kFix5HashSize)[hv] = pos;
+
+ SET_mmm
+
+ maxLen = 4;
+
+ for (;;)
+ {
+ if (d2 < mmm && *(cur - d2) == *cur)
+ {
+ distances[0] = 2;
+ distances[1] = d2 - 1;
+ distances += 2;
+ if (*(cur - d2 + 2) == cur[2])
+ {
+ }
+ else if (d3 < mmm && *(cur - d3) == *cur)
+ {
+ distances[1] = d3 - 1;
+ distances += 2;
+ d2 = d3;
+ }
+ else
+ break;
+ }
+ else if (d3 < mmm && *(cur - d3) == *cur)
+ {
+ distances[1] = d3 - 1;
+ distances += 2;
+ d2 = d3;
+ }
+ else
+ break;
+
+ distances[-2] = 3;
+ if (*(cur - d2 + 3) != cur[3])
+ break;
+ UPDATE_maxLen
+ distances[-2] = (UInt32)maxLen;
+ if (maxLen == lenLimit)
+ {
+ SkipMatchesSpec(MF_PARAMS(p));
+ MOVE_POS_RET
+ }
+ break;
+ }
+
+ GET_MATCHES_FOOTER_BT(maxLen)
+}
+
+
+static UInt32* Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+{
+ UInt32 mmm;
+ UInt32 h2, h3, d2, d3, pos;
+ unsigned maxLen;
+ UInt32 *hash;
+ GET_MATCHES_HEADER(4)
+
+ HASH4_CALC
+
+ hash = p->hash;
+ pos = p->pos;
+
+ d2 = pos - hash [h2];
+ d3 = pos - (hash + kFix3HashSize)[h3];
+ curMatch = (hash + kFix4HashSize)[hv];
+
+ hash [h2] = pos;
+ (hash + kFix3HashSize)[h3] = pos;
+ (hash + kFix4HashSize)[hv] = pos;
+
+ SET_mmm
+
+ maxLen = 3;
+
+ for (;;)
+ {
+ if (d2 < mmm && *(cur - d2) == *cur)
+ {
+ distances[0] = 2;
+ distances[1] = d2 - 1;
+ distances += 2;
+ if (*(cur - d2 + 2) == cur[2])
+ {
+ // distances[-2] = 3;
+ }
+ else if (d3 < mmm && *(cur - d3) == *cur)
+ {
+ d2 = d3;
+ distances[1] = d3 - 1;
+ distances += 2;
+ }
+ else
+ break;
+ }
+ else if (d3 < mmm && *(cur - d3) == *cur)
+ {
+ d2 = d3;
+ distances[1] = d3 - 1;
+ distances += 2;
+ }
+ else
+ break;
+
+ UPDATE_maxLen
+ distances[-2] = (UInt32)maxLen;
+ if (maxLen == lenLimit)
+ {
+ p->son[p->cyclicBufferPos] = curMatch;
+ MOVE_POS_RET
+ }
+ break;
+ }
+
+ GET_MATCHES_FOOTER_HC(maxLen)
+}
+
+
+static UInt32 * Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+{
+ UInt32 mmm;
+ UInt32 h2, h3, d2, d3, maxLen, pos;
+ UInt32 *hash;
+ GET_MATCHES_HEADER(5)
+
+ HASH5_CALC
+
+ hash = p->hash;
+ pos = p->pos;
+
+ d2 = pos - hash [h2];
+ d3 = pos - (hash + kFix3HashSize)[h3];
+ // d4 = pos - (hash + kFix4HashSize)[h4];
+
+ curMatch = (hash + kFix5HashSize)[hv];
+
+ hash [h2] = pos;
+ (hash + kFix3HashSize)[h3] = pos;
+ // (hash + kFix4HashSize)[h4] = pos;
+ (hash + kFix5HashSize)[hv] = pos;
+
+ SET_mmm
+
+ maxLen = 4;
+
+ for (;;)
+ {
+ if (d2 < mmm && *(cur - d2) == *cur)
+ {
+ distances[0] = 2;
+ distances[1] = d2 - 1;
+ distances += 2;
+ if (*(cur - d2 + 2) == cur[2])
+ {
+ }
+ else if (d3 < mmm && *(cur - d3) == *cur)
+ {
+ distances[1] = d3 - 1;
+ distances += 2;
+ d2 = d3;
+ }
+ else
+ break;
+ }
+ else if (d3 < mmm && *(cur - d3) == *cur)
+ {
+ distances[1] = d3 - 1;
+ distances += 2;
+ d2 = d3;
+ }
+ else
+ break;
+
+ distances[-2] = 3;
+ if (*(cur - d2 + 3) != cur[3])
+ break;
+ UPDATE_maxLen
+ distances[-2] = maxLen;
+ if (maxLen == lenLimit)
+ {
+ p->son[p->cyclicBufferPos] = curMatch;
+ MOVE_POS_RET
+ }
+ break;
+ }
+
+ GET_MATCHES_FOOTER_HC(maxLen)
+}
+
+
+UInt32* Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+{
+ GET_MATCHES_HEADER(3)
+ HASH_ZIP_CALC
+ curMatch = p->hash[hv];
+ p->hash[hv] = p->pos;
+ GET_MATCHES_FOOTER_HC(2)
+}
+
+
+static void Bt2_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+{
+ SKIP_HEADER(2)
+ {
+ HASH2_CALC
+ curMatch = p->hash[hv];
+ p->hash[hv] = p->pos;
+ }
+ SKIP_FOOTER
+}
+
+void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+{
+ SKIP_HEADER(3)
+ {
+ HASH_ZIP_CALC
+ curMatch = p->hash[hv];
+ p->hash[hv] = p->pos;
+ }
+ SKIP_FOOTER
+}
+
+static void Bt3_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+{
+ SKIP_HEADER(3)
+ {
+ UInt32 h2;
+ UInt32 *hash;
+ HASH3_CALC
+ hash = p->hash;
+ curMatch = (hash + kFix3HashSize)[hv];
+ hash[h2] =
+ (hash + kFix3HashSize)[hv] = p->pos;
+ }
+ SKIP_FOOTER
+}
+
+static void Bt4_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+{
+ SKIP_HEADER(4)
+ {
+ UInt32 h2, h3;
+ UInt32 *hash;
+ HASH4_CALC
+ hash = p->hash;
+ curMatch = (hash + kFix4HashSize)[hv];
+ hash [h2] =
+ (hash + kFix3HashSize)[h3] =
+ (hash + kFix4HashSize)[hv] = p->pos;
+ }
+ SKIP_FOOTER
+}
+
+static void Bt5_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+{
+ SKIP_HEADER(5)
+ {
+ UInt32 h2, h3;
+ UInt32 *hash;
+ HASH5_CALC
+ hash = p->hash;
+ curMatch = (hash + kFix5HashSize)[hv];
+ hash [h2] =
+ (hash + kFix3HashSize)[h3] =
+ // (hash + kFix4HashSize)[h4] =
+ (hash + kFix5HashSize)[hv] = p->pos;
+ }
+ SKIP_FOOTER
+}
+
+
+#define HC_SKIP_HEADER(minLen) \
+ do { if (p->lenLimit < minLen) { MatchFinder_MovePos(p); num--; continue; } { \
+ const Byte *cur; \
+ UInt32 *hash; \
+ UInt32 *son; \
+ UInt32 pos = p->pos; \
+ UInt32 num2 = num; \
+ /* (p->pos == p->posLimit) is not allowed here !!! */ \
+ { const UInt32 rem = p->posLimit - pos; if (num2 > rem) num2 = rem; } \
+ num -= num2; \
+ { const UInt32 cycPos = p->cyclicBufferPos; \
+ son = p->son + cycPos; \
+ p->cyclicBufferPos = cycPos + num2; } \
+ cur = p->buffer; \
+ hash = p->hash; \
+ do { \
+ UInt32 curMatch; \
+ UInt32 hv;
+
+
+#define HC_SKIP_FOOTER \
+ cur++; pos++; *son++ = curMatch; \
+ } while (--num2); \
+ p->buffer = cur; \
+ p->pos = pos; \
+ if (pos == p->posLimit) MatchFinder_CheckLimits(p); \
+ }} while(num); \
+
+
+static void Hc4_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+{
+ HC_SKIP_HEADER(4)
+
+ UInt32 h2, h3;
+ HASH4_CALC
+ curMatch = (hash + kFix4HashSize)[hv];
+ hash [h2] =
+ (hash + kFix3HashSize)[h3] =
+ (hash + kFix4HashSize)[hv] = pos;
+
+ HC_SKIP_FOOTER
+}
+
+
+static void Hc5_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+{
+ HC_SKIP_HEADER(5)
+
+ UInt32 h2, h3;
+ HASH5_CALC
+ curMatch = (hash + kFix5HashSize)[hv];
+ hash [h2] =
+ (hash + kFix3HashSize)[h3] =
+ // (hash + kFix4HashSize)[h4] =
+ (hash + kFix5HashSize)[hv] = pos;
+
+ HC_SKIP_FOOTER
+}
+
+
+void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+{
+ HC_SKIP_HEADER(3)
+
+ HASH_ZIP_CALC
+ curMatch = hash[hv];
+ hash[hv] = pos;
+
+ HC_SKIP_FOOTER
+}
+
+
+void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder2 *vTable)
+{
+ vTable->Init = (Mf_Init_Func)MatchFinder_Init;
+ vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinder_GetNumAvailableBytes;
+ vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinder_GetPointerToCurrentPos;
+ if (!p->btMode)
+ {
+ if (p->numHashBytes <= 4)
+ {
+ vTable->GetMatches = (Mf_GetMatches_Func)Hc4_MatchFinder_GetMatches;
+ vTable->Skip = (Mf_Skip_Func)Hc4_MatchFinder_Skip;
+ }
+ else
+ {
+ vTable->GetMatches = (Mf_GetMatches_Func)Hc5_MatchFinder_GetMatches;
+ vTable->Skip = (Mf_Skip_Func)Hc5_MatchFinder_Skip;
+ }
+ }
+ else if (p->numHashBytes == 2)
+ {
+ vTable->GetMatches = (Mf_GetMatches_Func)Bt2_MatchFinder_GetMatches;
+ vTable->Skip = (Mf_Skip_Func)Bt2_MatchFinder_Skip;
+ }
+ else if (p->numHashBytes == 3)
+ {
+ vTable->GetMatches = (Mf_GetMatches_Func)Bt3_MatchFinder_GetMatches;
+ vTable->Skip = (Mf_Skip_Func)Bt3_MatchFinder_Skip;
+ }
+ else if (p->numHashBytes == 4)
+ {
+ vTable->GetMatches = (Mf_GetMatches_Func)Bt4_MatchFinder_GetMatches;
+ vTable->Skip = (Mf_Skip_Func)Bt4_MatchFinder_Skip;
+ }
+ else
+ {
+ vTable->GetMatches = (Mf_GetMatches_Func)Bt5_MatchFinder_GetMatches;
+ vTable->Skip = (Mf_Skip_Func)Bt5_MatchFinder_Skip;
+ }
+}
+
+
+
+void LzFindPrepare(void)
+{
+ #ifndef FORCE_LZFIND_SATUR_SUB_128
+ #ifdef USE_LZFIND_SATUR_SUB_128
+ LZFIND_SATUR_SUB_CODE_FUNC f = NULL;
+ #ifdef MY_CPU_ARM_OR_ARM64
+ {
+ if (CPU_IsSupported_NEON())
+ {
+ // #pragma message ("=== LzFind NEON")
+ PRF(printf("\n=== LzFind NEON\n"));
+ f = LzFind_SaturSub_128;
+ }
+ // f = 0; // for debug
+ }
+ #else // MY_CPU_ARM_OR_ARM64
+ if (CPU_IsSupported_SSE41())
+ {
+ // #pragma message ("=== LzFind SSE41")
+ PRF(printf("\n=== LzFind SSE41\n"));
+ f = LzFind_SaturSub_128;
+
+ #ifdef USE_LZFIND_SATUR_SUB_256
+ if (CPU_IsSupported_AVX2())
+ {
+ // #pragma message ("=== LzFind AVX2")
+ PRF(printf("\n=== LzFind AVX2\n"));
+ f = LzFind_SaturSub_256;
+ }
+ #endif
+ }
+ #endif // MY_CPU_ARM_OR_ARM64
+ g_LzFind_SaturSub = f;
+ #endif // USE_LZFIND_SATUR_SUB_128
+ #endif // FORCE_LZFIND_SATUR_SUB_128
+}
+
+
+#undef MOVE_POS
+#undef MOVE_POS_RET
+#undef PRF
diff --git a/src/Common/lzma/LzFind.h b/src/Common/lzma/LzFind.h
new file mode 100644
index 00000000..a3f72c98
--- /dev/null
+++ b/src/Common/lzma/LzFind.h
@@ -0,0 +1,159 @@
+/* LzFind.h -- Match finder for LZ algorithms
+2023-03-04 : Igor Pavlov : Public domain */
+
+#ifndef ZIP7_INC_LZ_FIND_H
+#define ZIP7_INC_LZ_FIND_H
+
+#include "7zTypes.h"
+
+EXTERN_C_BEGIN
+
+typedef UInt32 CLzRef;
+
+typedef struct
+{
+ const Byte *buffer;
+ UInt32 pos;
+ UInt32 posLimit;
+ UInt32 streamPos; /* wrap over Zero is allowed (streamPos < pos). Use (UInt32)(streamPos - pos) */
+ UInt32 lenLimit;
+
+ UInt32 cyclicBufferPos;
+ UInt32 cyclicBufferSize; /* it must be = (historySize + 1) */
+
+ Byte streamEndWasReached;
+ Byte btMode;
+ Byte bigHash;
+ Byte directInput;
+
+ UInt32 matchMaxLen;
+ CLzRef *hash;
+ CLzRef *son;
+ UInt32 hashMask;
+ UInt32 cutValue;
+
+ Byte *bufBase;
+ ISeqInStreamPtr stream;
+
+ UInt32 blockSize;
+ UInt32 keepSizeBefore;
+ UInt32 keepSizeAfter;
+
+ UInt32 numHashBytes;
+ size_t directInputRem;
+ UInt32 historySize;
+ UInt32 fixedHashSize;
+ Byte numHashBytes_Min;
+ Byte numHashOutBits;
+ Byte _pad2_[2];
+ SRes result;
+ UInt32 crc[256];
+ size_t numRefs;
+
+ UInt64 expectedDataSize;
+} CMatchFinder;
+
+#define Inline_MatchFinder_GetPointerToCurrentPos(p) ((const Byte *)(p)->buffer)
+
+#define Inline_MatchFinder_GetNumAvailableBytes(p) ((UInt32)((p)->streamPos - (p)->pos))
+
+/*
+#define Inline_MatchFinder_IsFinishedOK(p) \
+ ((p)->streamEndWasReached \
+ && (p)->streamPos == (p)->pos \
+ && (!(p)->directInput || (p)->directInputRem == 0))
+*/
+
+int MatchFinder_NeedMove(CMatchFinder *p);
+/* Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p); */
+void MatchFinder_MoveBlock(CMatchFinder *p);
+void MatchFinder_ReadIfRequired(CMatchFinder *p);
+
+void MatchFinder_Construct(CMatchFinder *p);
+
+/* (directInput = 0) is default value.
+ It's required to provide correct (directInput) value
+ before calling MatchFinder_Create().
+ You can set (directInput) by any of the following calls:
+ - MatchFinder_SET_DIRECT_INPUT_BUF()
+ - MatchFinder_SET_STREAM()
+ - MatchFinder_SET_STREAM_MODE()
+*/
+
+#define MatchFinder_SET_DIRECT_INPUT_BUF(p, _src_, _srcLen_) { \
+ (p)->stream = NULL; \
+ (p)->directInput = 1; \
+ (p)->buffer = (_src_); \
+ (p)->directInputRem = (_srcLen_); }
+
+/*
+#define MatchFinder_SET_STREAM_MODE(p) { \
+ (p)->directInput = 0; }
+*/
+
+#define MatchFinder_SET_STREAM(p, _stream_) { \
+ (p)->stream = _stream_; \
+ (p)->directInput = 0; }
+
+
+int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
+ UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter,
+ ISzAllocPtr alloc);
+void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc);
+void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems);
+
+/*
+#define MatchFinder_INIT_POS(p, val) \
+ (p)->pos = (val); \
+ (p)->streamPos = (val);
+*/
+
+// void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue);
+#define MatchFinder_REDUCE_OFFSETS(p, subValue) \
+ (p)->pos -= (subValue); \
+ (p)->streamPos -= (subValue);
+
+
+UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *buffer, CLzRef *son,
+ size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue,
+ UInt32 *distances, UInt32 maxLen);
+
+/*
+Conditions:
+ Mf_GetNumAvailableBytes_Func must be called before each Mf_GetMatchLen_Func.
+ Mf_GetPointerToCurrentPos_Func's result must be used only before any other function
+*/
+
+typedef void (*Mf_Init_Func)(void *object);
+typedef UInt32 (*Mf_GetNumAvailableBytes_Func)(void *object);
+typedef const Byte * (*Mf_GetPointerToCurrentPos_Func)(void *object);
+typedef UInt32 * (*Mf_GetMatches_Func)(void *object, UInt32 *distances);
+typedef void (*Mf_Skip_Func)(void *object, UInt32);
+
+typedef struct
+{
+ Mf_Init_Func Init;
+ Mf_GetNumAvailableBytes_Func GetNumAvailableBytes;
+ Mf_GetPointerToCurrentPos_Func GetPointerToCurrentPos;
+ Mf_GetMatches_Func GetMatches;
+ Mf_Skip_Func Skip;
+} IMatchFinder2;
+
+void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder2 *vTable);
+
+void MatchFinder_Init_LowHash(CMatchFinder *p);
+void MatchFinder_Init_HighHash(CMatchFinder *p);
+void MatchFinder_Init_4(CMatchFinder *p);
+void MatchFinder_Init(CMatchFinder *p);
+
+UInt32* Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);
+UInt32* Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);
+
+void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num);
+void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num);
+
+void LzFindPrepare(void);
+
+EXTERN_C_END
+
+#endif
diff --git a/src/Common/lzma/LzFindMt.c b/src/Common/lzma/LzFindMt.c
new file mode 100644
index 00000000..5253e6eb
--- /dev/null
+++ b/src/Common/lzma/LzFindMt.c
@@ -0,0 +1,1406 @@
+/* LzFindMt.c -- multithreaded Match finder for LZ algorithms
+2023-04-02 : Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+// #include <stdio.h>
+
+#include "CpuArch.h"
+
+#include "LzHash.h"
+#include "LzFindMt.h"
+
+// #define LOG_ITERS
+
+// #define LOG_THREAD
+
+#ifdef LOG_THREAD
+#include <stdio.h>
+#define PRF(x) x
+#else
+#define PRF(x)
+#endif
+
+#ifdef LOG_ITERS
+#include <stdio.h>
+extern UInt64 g_NumIters_Tree;
+extern UInt64 g_NumIters_Loop;
+extern UInt64 g_NumIters_Bytes;
+#define LOG_ITER(x) x
+#else
+#define LOG_ITER(x)
+#endif
+
+#define kMtHashBlockSize ((UInt32)1 << 17)
+#define kMtHashNumBlocks (1 << 1)
+
+#define GET_HASH_BLOCK_OFFSET(i) (((i) & (kMtHashNumBlocks - 1)) * kMtHashBlockSize)
+
+#define kMtBtBlockSize ((UInt32)1 << 16)
+#define kMtBtNumBlocks (1 << 4)
+
+#define GET_BT_BLOCK_OFFSET(i) (((i) & (kMtBtNumBlocks - 1)) * (size_t)kMtBtBlockSize)
+
+/*
+ HASH functions:
+ We use raw 8/16 bits from a[1] and a[2],
+ xored with crc(a[0]) and crc(a[3]).
+ We check a[0], a[3] only. We don't need to compare a[1] and a[2] in matches.
+ our crc() function provides one-to-one correspondence for low 8-bit values:
+ (crc[0...0xFF] & 0xFF) <-> [0...0xFF]
+*/
+
+#define MF(mt) ((mt)->MatchFinder)
+#define MF_CRC (p->crc)
+
+// #define MF(mt) (&(mt)->MatchFinder)
+// #define MF_CRC (p->MatchFinder.crc)
+
+#define MT_HASH2_CALC \
+ h2 = (MF_CRC[cur[0]] ^ cur[1]) & (kHash2Size - 1);
+
+#define MT_HASH3_CALC { \
+ UInt32 temp = MF_CRC[cur[0]] ^ cur[1]; \
+ h2 = temp & (kHash2Size - 1); \
+ h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); }
+
+/*
+#define MT_HASH3_CALC__NO_2 { \
+ UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
+ h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); }
+
+#define MT_HASH4_CALC { \
+ UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
+ h2 = temp & (kHash2Size - 1); \
+ temp ^= ((UInt32)cur[2] << 8); \
+ h3 = temp & (kHash3Size - 1); \
+ h4 = (temp ^ (p->crc[cur[3]] << kLzHash_CrcShift_1)) & p->hash4Mask; }
+ // (kHash4Size - 1);
+*/
+
+
+Z7_NO_INLINE
+static void MtSync_Construct(CMtSync *p)
+{
+ p->affinity = 0;
+ p->wasCreated = False;
+ p->csWasInitialized = False;
+ p->csWasEntered = False;
+ Thread_CONSTRUCT(&p->thread)
+ Event_Construct(&p->canStart);
+ Event_Construct(&p->wasStopped);
+ Semaphore_Construct(&p->freeSemaphore);
+ Semaphore_Construct(&p->filledSemaphore);
+}
+
+
+#define DEBUG_BUFFER_LOCK // define it to debug lock state
+
+#ifdef DEBUG_BUFFER_LOCK
+#include <stdlib.h>
+#define BUFFER_MUST_BE_LOCKED(p) if (!(p)->csWasEntered) exit(1);
+#define BUFFER_MUST_BE_UNLOCKED(p) if ( (p)->csWasEntered) exit(1);
+#else
+#define BUFFER_MUST_BE_LOCKED(p)
+#define BUFFER_MUST_BE_UNLOCKED(p)
+#endif
+
+#define LOCK_BUFFER(p) { \
+ BUFFER_MUST_BE_UNLOCKED(p); \
+ CriticalSection_Enter(&(p)->cs); \
+ (p)->csWasEntered = True; }
+
+#define UNLOCK_BUFFER(p) { \
+ BUFFER_MUST_BE_LOCKED(p); \
+ CriticalSection_Leave(&(p)->cs); \
+ (p)->csWasEntered = False; }
+
+
+Z7_NO_INLINE
+static UInt32 MtSync_GetNextBlock(CMtSync *p)
+{
+ UInt32 numBlocks = 0;
+ if (p->needStart)
+ {
+ BUFFER_MUST_BE_UNLOCKED(p)
+ p->numProcessedBlocks = 1;
+ p->needStart = False;
+ p->stopWriting = False;
+ p->exit = False;
+ Event_Reset(&p->wasStopped);
+ Event_Set(&p->canStart);
+ }
+ else
+ {
+ UNLOCK_BUFFER(p)
+ // we free current block
+ numBlocks = p->numProcessedBlocks++;
+ Semaphore_Release1(&p->freeSemaphore);
+ }
+
+ // buffer is UNLOCKED here
+ Semaphore_Wait(&p->filledSemaphore);
+ LOCK_BUFFER(p)
+ return numBlocks;
+}
+
+
+/* if Writing (Processing) thread was started, we must call MtSync_StopWriting() */
+
+Z7_NO_INLINE
+static void MtSync_StopWriting(CMtSync *p)
+{
+ if (!Thread_WasCreated(&p->thread) || p->needStart)
+ return;
+
+ PRF(printf("\nMtSync_StopWriting %p\n", p));
+
+ if (p->csWasEntered)
+ {
+ /* we don't use buffer in this thread after StopWriting().
+ So we UNLOCK buffer.
+ And we restore default UNLOCKED state for stopped thread */
+ UNLOCK_BUFFER(p)
+ }
+
+ /* We send (p->stopWriting) message and release freeSemaphore
+ to free current block.
+ So the thread will see (p->stopWriting) at some
+ iteration after Wait(freeSemaphore).
+ The thread doesn't need to fill all avail free blocks,
+ so we can get fast thread stop.
+ */
+
+ p->stopWriting = True;
+ Semaphore_Release1(&p->freeSemaphore); // check semaphore count !!!
+
+ PRF(printf("\nMtSync_StopWriting %p : Event_Wait(&p->wasStopped)\n", p));
+ Event_Wait(&p->wasStopped);
+ PRF(printf("\nMtSync_StopWriting %p : Event_Wait() finsihed\n", p));
+
+ /* 21.03 : we don't restore samaphore counters here.
+ We will recreate and reinit samaphores in next start */
+
+ p->needStart = True;
+}
+
+
+Z7_NO_INLINE
+static void MtSync_Destruct(CMtSync *p)
+{
+ PRF(printf("\nMtSync_Destruct %p\n", p));
+
+ if (Thread_WasCreated(&p->thread))
+ {
+ /* we want thread to be in Stopped state before sending EXIT command.
+ note: stop(btSync) will stop (htSync) also */
+ MtSync_StopWriting(p);
+ /* thread in Stopped state here : (p->needStart == true) */
+ p->exit = True;
+ // if (p->needStart) // it's (true)
+ Event_Set(&p->canStart); // we send EXIT command to thread
+ Thread_Wait_Close(&p->thread); // we wait thread finishing
+ }
+
+ if (p->csWasInitialized)
+ {
+ CriticalSection_Delete(&p->cs);
+ p->csWasInitialized = False;
+ }
+ p->csWasEntered = False;
+
+ Event_Close(&p->canStart);
+ Event_Close(&p->wasStopped);
+ Semaphore_Close(&p->freeSemaphore);
+ Semaphore_Close(&p->filledSemaphore);
+
+ p->wasCreated = False;
+}
+
+
+// #define RINOK_THREAD(x) { if ((x) != 0) return SZ_ERROR_THREAD; }
+// we want to get real system error codes here instead of SZ_ERROR_THREAD
+#define RINOK_THREAD(x) RINOK_WRes(x)
+
+
+// call it before each new file (when new starting is required):
+Z7_NO_INLINE
+static SRes MtSync_Init(CMtSync *p, UInt32 numBlocks)
+{
+ WRes wres;
+ // BUFFER_MUST_BE_UNLOCKED(p)
+ if (!p->needStart || p->csWasEntered)
+ return SZ_ERROR_FAIL;
+ wres = Semaphore_OptCreateInit(&p->freeSemaphore, numBlocks, numBlocks);
+ if (wres == 0)
+ wres = Semaphore_OptCreateInit(&p->filledSemaphore, 0, numBlocks);
+ return MY_SRes_HRESULT_FROM_WRes(wres);
+}
+
+
+static WRes MtSync_Create_WRes(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj)
+{
+ WRes wres;
+
+ if (p->wasCreated)
+ return SZ_OK;
+
+ RINOK_THREAD(CriticalSection_Init(&p->cs))
+ p->csWasInitialized = True;
+ p->csWasEntered = False;
+
+ RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->canStart))
+ RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->wasStopped))
+
+ p->needStart = True;
+ p->exit = True; /* p->exit is unused before (canStart) Event.
+ But in case of some unexpected code failure we will get fast exit from thread */
+
+ // return ERROR_TOO_MANY_POSTS; // for debug
+ // return EINVAL; // for debug
+
+ if (p->affinity != 0)
+ wres = Thread_Create_With_Affinity(&p->thread, startAddress, obj, (CAffinityMask)p->affinity);
+ else
+ wres = Thread_Create(&p->thread, startAddress, obj);
+
+ RINOK_THREAD(wres)
+ p->wasCreated = True;
+ return SZ_OK;
+}
+
+
+Z7_NO_INLINE
+static SRes MtSync_Create(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj)
+{
+ const WRes wres = MtSync_Create_WRes(p, startAddress, obj);
+ if (wres == 0)
+ return 0;
+ MtSync_Destruct(p);
+ return MY_SRes_HRESULT_FROM_WRes(wres);
+}
+
+
+// ---------- HASH THREAD ----------
+
+#define kMtMaxValForNormalize 0xFFFFFFFF
+// #define kMtMaxValForNormalize ((1 << 21)) // for debug
+// #define kNormalizeAlign (1 << 7) // alignment for speculated accesses
+
+#ifdef MY_CPU_LE_UNALIGN
+ #define GetUi24hi_from32(p) ((UInt32)GetUi32(p) >> 8)
+#else
+ #define GetUi24hi_from32(p) ((p)[1] ^ ((UInt32)(p)[2] << 8) ^ ((UInt32)(p)[3] << 16))
+#endif
+
+#define GetHeads_DECL(name) \
+ static void GetHeads ## name(const Byte *p, UInt32 pos, \
+ UInt32 *hash, UInt32 hashMask, UInt32 *heads, UInt32 numHeads, const UInt32 *crc)
+
+#define GetHeads_LOOP(v) \
+ for (; numHeads != 0; numHeads--) { \
+ const UInt32 value = (v); \
+ p++; \
+ *heads++ = pos - hash[value]; \
+ hash[value] = pos++; }
+
+#define DEF_GetHeads2(name, v, action) \
+ GetHeads_DECL(name) { action \
+ GetHeads_LOOP(v) }
+
+#define DEF_GetHeads(name, v) DEF_GetHeads2(name, v, ;)
+
+DEF_GetHeads2(2, GetUi16(p), UNUSED_VAR(hashMask); UNUSED_VAR(crc); )
+DEF_GetHeads(3, (crc[p[0]] ^ GetUi16(p + 1)) & hashMask)
+DEF_GetHeads2(3b, GetUi16(p) ^ ((UInt32)(p)[2] << 16), UNUSED_VAR(hashMask); UNUSED_VAR(crc); )
+// BT3 is not good for crc collisions for big hashMask values.
+
+/*
+GetHeads_DECL(3b)
+{
+ UNUSED_VAR(hashMask);
+ UNUSED_VAR(crc);
+ {
+ const Byte *pLim = p + numHeads;
+ if (numHeads == 0)
+ return;
+ pLim--;
+ while (p < pLim)
+ {
+ UInt32 v1 = GetUi32(p);
+ UInt32 v0 = v1 & 0xFFFFFF;
+ UInt32 h0, h1;
+ p += 2;
+ v1 >>= 8;
+ h0 = hash[v0]; hash[v0] = pos; heads[0] = pos - h0; pos++;
+ h1 = hash[v1]; hash[v1] = pos; heads[1] = pos - h1; pos++;
+ heads += 2;
+ }
+ if (p == pLim)
+ {
+ UInt32 v0 = GetUi16(p) ^ ((UInt32)(p)[2] << 16);
+ *heads = pos - hash[v0];
+ hash[v0] = pos;
+ }
+ }
+}
+*/
+
+/*
+GetHeads_DECL(4)
+{
+ unsigned sh = 0;
+ UNUSED_VAR(crc)
+ while ((hashMask & 0x80000000) == 0)
+ {
+ hashMask <<= 1;
+ sh++;
+ }
+ GetHeads_LOOP((GetUi32(p) * 0xa54a1) >> sh)
+}
+#define GetHeads4b GetHeads4
+*/
+
+#define USE_GetHeads_LOCAL_CRC
+
+#ifdef USE_GetHeads_LOCAL_CRC
+
+GetHeads_DECL(4)
+{
+ UInt32 crc0[256];
+ UInt32 crc1[256];
+ {
+ unsigned i;
+ for (i = 0; i < 256; i++)
+ {
+ UInt32 v = crc[i];
+ crc0[i] = v & hashMask;
+ crc1[i] = (v << kLzHash_CrcShift_1) & hashMask;
+ // crc1[i] = rotlFixed(v, 8) & hashMask;
+ }
+ }
+ GetHeads_LOOP(crc0[p[0]] ^ crc1[p[3]] ^ (UInt32)GetUi16(p+1))
+}
+
+GetHeads_DECL(4b)
+{
+ UInt32 crc0[256];
+ {
+ unsigned i;
+ for (i = 0; i < 256; i++)
+ crc0[i] = crc[i] & hashMask;
+ }
+ GetHeads_LOOP(crc0[p[0]] ^ GetUi24hi_from32(p))
+}
+
+GetHeads_DECL(5)
+{
+ UInt32 crc0[256];
+ UInt32 crc1[256];
+ UInt32 crc2[256];
+ {
+ unsigned i;
+ for (i = 0; i < 256; i++)
+ {
+ UInt32 v = crc[i];
+ crc0[i] = v & hashMask;
+ crc1[i] = (v << kLzHash_CrcShift_1) & hashMask;
+ crc2[i] = (v << kLzHash_CrcShift_2) & hashMask;
+ }
+ }
+ GetHeads_LOOP(crc0[p[0]] ^ crc1[p[3]] ^ crc2[p[4]] ^ (UInt32)GetUi16(p+1))
+}
+
+GetHeads_DECL(5b)
+{
+ UInt32 crc0[256];
+ UInt32 crc1[256];
+ {
+ unsigned i;
+ for (i = 0; i < 256; i++)
+ {
+ UInt32 v = crc[i];
+ crc0[i] = v & hashMask;
+ crc1[i] = (v << kLzHash_CrcShift_1) & hashMask;
+ }
+ }
+ GetHeads_LOOP(crc0[p[0]] ^ crc1[p[4]] ^ GetUi24hi_from32(p))
+}
+
+#else
+
+DEF_GetHeads(4, (crc[p[0]] ^ (crc[p[3]] << kLzHash_CrcShift_1) ^ (UInt32)GetUi16(p+1)) & hashMask)
+DEF_GetHeads(4b, (crc[p[0]] ^ GetUi24hi_from32(p)) & hashMask)
+DEF_GetHeads(5, (crc[p[0]] ^ (crc[p[3]] << kLzHash_CrcShift_1) ^ (crc[p[4]] << kLzHash_CrcShift_2) ^ (UInt32)GetUi16(p + 1)) & hashMask)
+DEF_GetHeads(5b, (crc[p[0]] ^ (crc[p[4]] << kLzHash_CrcShift_1) ^ GetUi24hi_from32(p)) & hashMask)
+
+#endif
+
+
+static void HashThreadFunc(CMatchFinderMt *mt)
+{
+ CMtSync *p = &mt->hashSync;
+ PRF(printf("\nHashThreadFunc\n"));
+
+ for (;;)
+ {
+ UInt32 blockIndex = 0;
+ PRF(printf("\nHashThreadFunc : Event_Wait(&p->canStart)\n"));
+ Event_Wait(&p->canStart);
+ PRF(printf("\nHashThreadFunc : Event_Wait(&p->canStart) : after \n"));
+ if (p->exit)
+ {
+ PRF(printf("\nHashThreadFunc : exit \n"));
+ return;
+ }
+
+ MatchFinder_Init_HighHash(MF(mt));
+
+ for (;;)
+ {
+ PRF(printf("Hash thread block = %d pos = %d\n", (unsigned)blockIndex, mt->MatchFinder->pos));
+
+ {
+ CMatchFinder *mf = MF(mt);
+ if (MatchFinder_NeedMove(mf))
+ {
+ CriticalSection_Enter(&mt->btSync.cs);
+ CriticalSection_Enter(&mt->hashSync.cs);
+ {
+ const Byte *beforePtr = Inline_MatchFinder_GetPointerToCurrentPos(mf);
+ ptrdiff_t offset;
+ MatchFinder_MoveBlock(mf);
+ offset = beforePtr - Inline_MatchFinder_GetPointerToCurrentPos(mf);
+ mt->pointerToCurPos -= offset;
+ mt->buffer -= offset;
+ }
+ CriticalSection_Leave(&mt->hashSync.cs);
+ CriticalSection_Leave(&mt->btSync.cs);
+ continue;
+ }
+
+ Semaphore_Wait(&p->freeSemaphore);
+
+ if (p->exit) // exit is unexpected here. But we check it here for some failure case
+ return;
+
+ // for faster stop : we check (p->stopWriting) after Wait(freeSemaphore)
+ if (p->stopWriting)
+ break;
+
+ MatchFinder_ReadIfRequired(mf);
+ {
+ UInt32 *heads = mt->hashBuf + GET_HASH_BLOCK_OFFSET(blockIndex++);
+ UInt32 num = Inline_MatchFinder_GetNumAvailableBytes(mf);
+ heads[0] = 2;
+ heads[1] = num;
+
+ /* heads[1] contains the number of avail bytes:
+ if (avail < mf->numHashBytes) :
+ {
+ it means that stream was finished
+ HASH_THREAD and BT_TREAD must move position for heads[1] (avail) bytes.
+ HASH_THREAD doesn't stop,
+ HASH_THREAD fills only the header (2 numbers) for all next blocks:
+ {2, NumHashBytes - 1}, {2,0}, {2,0}, ... , {2,0}
+ }
+ else
+ {
+ HASH_THREAD and BT_TREAD must move position for (heads[0] - 2) bytes;
+ }
+ */
+
+ if (num >= mf->numHashBytes)
+ {
+ num = num - mf->numHashBytes + 1;
+ if (num > kMtHashBlockSize - 2)
+ num = kMtHashBlockSize - 2;
+
+ if (mf->pos > (UInt32)kMtMaxValForNormalize - num)
+ {
+ const UInt32 subValue = (mf->pos - mf->historySize - 1); // & ~(UInt32)(kNormalizeAlign - 1);
+ MatchFinder_REDUCE_OFFSETS(mf, subValue)
+ MatchFinder_Normalize3(subValue, mf->hash + mf->fixedHashSize, (size_t)mf->hashMask + 1);
+ }
+
+ heads[0] = 2 + num;
+ mt->GetHeadsFunc(mf->buffer, mf->pos, mf->hash + mf->fixedHashSize, mf->hashMask, heads + 2, num, mf->crc);
+ }
+
+ mf->pos += num; // wrap over zero is allowed at the end of stream
+ mf->buffer += num;
+ }
+ }
+
+ Semaphore_Release1(&p->filledSemaphore);
+ } // for() processing end
+
+ // p->numBlocks_Sent = blockIndex;
+ Event_Set(&p->wasStopped);
+ } // for() thread end
+}
+
+
+
+
+// ---------- BT THREAD ----------
+
+/* we use one variable instead of two (cyclicBufferPos == pos) before CyclicBuf wrap.
+ here we define fixed offset of (p->pos) from (p->cyclicBufferPos) */
+#define CYC_TO_POS_OFFSET 0
+// #define CYC_TO_POS_OFFSET 1 // for debug
+
+#define MFMT_GM_INLINE
+
+#ifdef MFMT_GM_INLINE
+
+/*
+ we use size_t for (pos) instead of UInt32
+ to eliminate "movsx" BUG in old MSVC x64 compiler.
+*/
+
+
+UInt32 * Z7_FASTCALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,
+ UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,
+ size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,
+ UInt32 *posRes);
+
+#endif
+
+
+static void BtGetMatches(CMatchFinderMt *p, UInt32 *d)
+{
+ UInt32 numProcessed = 0;
+ UInt32 curPos = 2;
+
+ /* GetMatchesSpec() functions don't create (len = 1)
+ in [len, dist] match pairs, if (p->numHashBytes >= 2)
+ Also we suppose here that (matchMaxLen >= 2).
+ So the following code for (reserve) is not required
+ UInt32 reserve = (p->matchMaxLen * 2);
+ const UInt32 kNumHashBytes_Max = 5; // BT_HASH_BYTES_MAX
+ if (reserve < kNumHashBytes_Max - 1)
+ reserve = kNumHashBytes_Max - 1;
+ const UInt32 limit = kMtBtBlockSize - (reserve);
+ */
+
+ const UInt32 limit = kMtBtBlockSize - (p->matchMaxLen * 2);
+
+ d[1] = p->hashNumAvail;
+
+ if (p->failure_BT)
+ {
+ // printf("\n == 1 BtGetMatches() p->failure_BT\n");
+ d[0] = 0;
+ // d[1] = 0;
+ return;
+ }
+
+ while (curPos < limit)
+ {
+ if (p->hashBufPos == p->hashBufPosLimit)
+ {
+ // MatchFinderMt_GetNextBlock_Hash(p);
+ UInt32 avail;
+ {
+ const UInt32 bi = MtSync_GetNextBlock(&p->hashSync);
+ const UInt32 k = GET_HASH_BLOCK_OFFSET(bi);
+ const UInt32 *h = p->hashBuf + k;
+ avail = h[1];
+ p->hashBufPosLimit = k + h[0];
+ p->hashNumAvail = avail;
+ p->hashBufPos = k + 2;
+ }
+
+ {
+ /* we must prevent UInt32 overflow for avail total value,
+ if avail was increased with new hash block */
+ UInt32 availSum = numProcessed + avail;
+ if (availSum < numProcessed)
+ availSum = (UInt32)(Int32)-1;
+ d[1] = availSum;
+ }
+
+ if (avail >= p->numHashBytes)
+ continue;
+
+ // if (p->hashBufPos != p->hashBufPosLimit) exit(1);
+
+ /* (avail < p->numHashBytes)
+ It means that stream was finished.
+ And (avail) - is a number of remaining bytes,
+ we fill (d) for (avail) bytes for LZ_THREAD (receiver).
+ but we don't update (p->pos) and (p->cyclicBufferPos) here in BT_THREAD */
+
+ /* here we suppose that we have space enough:
+ (kMtBtBlockSize - curPos >= p->hashNumAvail) */
+ p->hashNumAvail = 0;
+ d[0] = curPos + avail;
+ d += curPos;
+ for (; avail != 0; avail--)
+ *d++ = 0;
+ return;
+ }
+ {
+ UInt32 size = p->hashBufPosLimit - p->hashBufPos;
+ UInt32 pos = p->pos;
+ UInt32 cyclicBufferPos = p->cyclicBufferPos;
+ UInt32 lenLimit = p->matchMaxLen;
+ if (lenLimit >= p->hashNumAvail)
+ lenLimit = p->hashNumAvail;
+ {
+ UInt32 size2 = p->hashNumAvail - lenLimit + 1;
+ if (size2 < size)
+ size = size2;
+ size2 = p->cyclicBufferSize - cyclicBufferPos;
+ if (size2 < size)
+ size = size2;
+ }
+
+ if (pos > (UInt32)kMtMaxValForNormalize - size)
+ {
+ const UInt32 subValue = (pos - p->cyclicBufferSize); // & ~(UInt32)(kNormalizeAlign - 1);
+ pos -= subValue;
+ p->pos = pos;
+ MatchFinder_Normalize3(subValue, p->son, (size_t)p->cyclicBufferSize * 2);
+ }
+
+ #ifndef MFMT_GM_INLINE
+ while (curPos < limit && size-- != 0)
+ {
+ UInt32 *startDistances = d + curPos;
+ UInt32 num = (UInt32)(GetMatchesSpec1(lenLimit, pos - p->hashBuf[p->hashBufPos++],
+ pos, p->buffer, p->son, cyclicBufferPos, p->cyclicBufferSize, p->cutValue,
+ startDistances + 1, p->numHashBytes - 1) - startDistances);
+ *startDistances = num - 1;
+ curPos += num;
+ cyclicBufferPos++;
+ pos++;
+ p->buffer++;
+ }
+ #else
+ {
+ UInt32 posRes = pos;
+ const UInt32 *d_end;
+ {
+ d_end = GetMatchesSpecN_2(
+ p->buffer + lenLimit - 1,
+ pos, p->buffer, p->son, p->cutValue, d + curPos,
+ p->numHashBytes - 1, p->hashBuf + p->hashBufPos,
+ d + limit, p->hashBuf + p->hashBufPos + size,
+ cyclicBufferPos, p->cyclicBufferSize,
+ &posRes);
+ }
+ {
+ if (!d_end)
+ {
+ // printf("\n == 2 BtGetMatches() p->failure_BT\n");
+ // internal data failure
+ p->failure_BT = True;
+ d[0] = 0;
+ // d[1] = 0;
+ return;
+ }
+ }
+ curPos = (UInt32)(d_end - d);
+ {
+ const UInt32 processed = posRes - pos;
+ pos = posRes;
+ p->hashBufPos += processed;
+ cyclicBufferPos += processed;
+ p->buffer += processed;
+ }
+ }
+ #endif
+
+ {
+ const UInt32 processed = pos - p->pos;
+ numProcessed += processed;
+ p->hashNumAvail -= processed;
+ p->pos = pos;
+ }
+ if (cyclicBufferPos == p->cyclicBufferSize)
+ cyclicBufferPos = 0;
+ p->cyclicBufferPos = cyclicBufferPos;
+ }
+ }
+
+ d[0] = curPos;
+}
+
+
+static void BtFillBlock(CMatchFinderMt *p, UInt32 globalBlockIndex)
+{
+ CMtSync *sync = &p->hashSync;
+
+ BUFFER_MUST_BE_UNLOCKED(sync)
+
+ if (!sync->needStart)
+ {
+ LOCK_BUFFER(sync)
+ }
+
+ BtGetMatches(p, p->btBuf + GET_BT_BLOCK_OFFSET(globalBlockIndex));
+
+ /* We suppose that we have called GetNextBlock() from start.
+ So buffer is LOCKED */
+
+ UNLOCK_BUFFER(sync)
+}
+
+
+Z7_NO_INLINE
+static void BtThreadFunc(CMatchFinderMt *mt)
+{
+ CMtSync *p = &mt->btSync;
+ for (;;)
+ {
+ UInt32 blockIndex = 0;
+ Event_Wait(&p->canStart);
+
+ for (;;)
+ {
+ PRF(printf(" BT thread block = %d pos = %d\n", (unsigned)blockIndex, mt->pos));
+ /* (p->exit == true) is possible after (p->canStart) at first loop iteration
+ and is unexpected after more Wait(freeSemaphore) iterations */
+ if (p->exit)
+ return;
+
+ Semaphore_Wait(&p->freeSemaphore);
+
+ // for faster stop : we check (p->stopWriting) after Wait(freeSemaphore)
+ if (p->stopWriting)
+ break;
+
+ BtFillBlock(mt, blockIndex++);
+
+ Semaphore_Release1(&p->filledSemaphore);
+ }
+
+ // we stop HASH_THREAD here
+ MtSync_StopWriting(&mt->hashSync);
+
+ // p->numBlocks_Sent = blockIndex;
+ Event_Set(&p->wasStopped);
+ }
+}
+
+
+void MatchFinderMt_Construct(CMatchFinderMt *p)
+{
+ p->hashBuf = NULL;
+ MtSync_Construct(&p->hashSync);
+ MtSync_Construct(&p->btSync);
+}
+
+static void MatchFinderMt_FreeMem(CMatchFinderMt *p, ISzAllocPtr alloc)
+{
+ ISzAlloc_Free(alloc, p->hashBuf);
+ p->hashBuf = NULL;
+}
+
+void MatchFinderMt_Destruct(CMatchFinderMt *p, ISzAllocPtr alloc)
+{
+ /*
+ HASH_THREAD can use CriticalSection(s) btSync.cs and hashSync.cs.
+ So we must be sure that HASH_THREAD will not use CriticalSection(s)
+ after deleting CriticalSection here.
+
+ we call ReleaseStream(p)
+ that calls StopWriting(btSync)
+ that calls StopWriting(hashSync), if it's required to stop HASH_THREAD.
+ after StopWriting() it's safe to destruct MtSync(s) in any order */
+
+ MatchFinderMt_ReleaseStream(p);
+
+ MtSync_Destruct(&p->btSync);
+ MtSync_Destruct(&p->hashSync);
+
+ LOG_ITER(
+ printf("\nTree %9d * %7d iter = %9d = sum : bytes = %9d\n",
+ (UInt32)(g_NumIters_Tree / 1000),
+ (UInt32)(((UInt64)g_NumIters_Loop * 1000) / (g_NumIters_Tree + 1)),
+ (UInt32)(g_NumIters_Loop / 1000),
+ (UInt32)(g_NumIters_Bytes / 1000)
+ ));
+
+ MatchFinderMt_FreeMem(p, alloc);
+}
+
+
+#define kHashBufferSize (kMtHashBlockSize * kMtHashNumBlocks)
+#define kBtBufferSize (kMtBtBlockSize * kMtBtNumBlocks)
+
+
+static THREAD_FUNC_DECL HashThreadFunc2(void *p) { HashThreadFunc((CMatchFinderMt *)p); return 0; }
+static THREAD_FUNC_DECL BtThreadFunc2(void *p)
+{
+ Byte allocaDummy[0x180];
+ unsigned i = 0;
+ for (i = 0; i < 16; i++)
+ allocaDummy[i] = (Byte)0;
+ if (allocaDummy[0] == 0)
+ BtThreadFunc((CMatchFinderMt *)p);
+ return 0;
+}
+
+
+SRes MatchFinderMt_Create(CMatchFinderMt *p, UInt32 historySize, UInt32 keepAddBufferBefore,
+ UInt32 matchMaxLen, UInt32 keepAddBufferAfter, ISzAllocPtr alloc)
+{
+ CMatchFinder *mf = MF(p);
+ p->historySize = historySize;
+ if (kMtBtBlockSize <= matchMaxLen * 4)
+ return SZ_ERROR_PARAM;
+ if (!p->hashBuf)
+ {
+ p->hashBuf = (UInt32 *)ISzAlloc_Alloc(alloc, ((size_t)kHashBufferSize + (size_t)kBtBufferSize) * sizeof(UInt32));
+ if (!p->hashBuf)
+ return SZ_ERROR_MEM;
+ p->btBuf = p->hashBuf + kHashBufferSize;
+ }
+ keepAddBufferBefore += (kHashBufferSize + kBtBufferSize);
+ keepAddBufferAfter += kMtHashBlockSize;
+ if (!MatchFinder_Create(mf, historySize, keepAddBufferBefore, matchMaxLen, keepAddBufferAfter, alloc))
+ return SZ_ERROR_MEM;
+
+ RINOK(MtSync_Create(&p->hashSync, HashThreadFunc2, p))
+ RINOK(MtSync_Create(&p->btSync, BtThreadFunc2, p))
+ return SZ_OK;
+}
+
+
+SRes MatchFinderMt_InitMt(CMatchFinderMt *p)
+{
+ RINOK(MtSync_Init(&p->hashSync, kMtHashNumBlocks))
+ return MtSync_Init(&p->btSync, kMtBtNumBlocks);
+}
+
+
+static void MatchFinderMt_Init(CMatchFinderMt *p)
+{
+ CMatchFinder *mf = MF(p);
+
+ p->btBufPos =
+ p->btBufPosLimit = NULL;
+ p->hashBufPos =
+ p->hashBufPosLimit = 0;
+ p->hashNumAvail = 0; // 21.03
+
+ p->failure_BT = False;
+
+ /* Init without data reading. We don't want to read data in this thread */
+ MatchFinder_Init_4(mf);
+
+ MatchFinder_Init_LowHash(mf);
+
+ p->pointerToCurPos = Inline_MatchFinder_GetPointerToCurrentPos(mf);
+ p->btNumAvailBytes = 0;
+ p->failure_LZ_BT = False;
+ // p->failure_LZ_LZ = False;
+
+ p->lzPos =
+ 1; // optimal smallest value
+ // 0; // for debug: ignores match to start
+ // kNormalizeAlign; // for debug
+
+ p->hash = mf->hash;
+ p->fixedHashSize = mf->fixedHashSize;
+ // p->hash4Mask = mf->hash4Mask;
+ p->crc = mf->crc;
+ // memcpy(p->crc, mf->crc, sizeof(mf->crc));
+
+ p->son = mf->son;
+ p->matchMaxLen = mf->matchMaxLen;
+ p->numHashBytes = mf->numHashBytes;
+
+ /* (mf->pos) and (mf->streamPos) were already initialized to 1 in MatchFinder_Init_4() */
+ // mf->streamPos = mf->pos = 1; // optimal smallest value
+ // 0; // for debug: ignores match to start
+ // kNormalizeAlign; // for debug
+
+ /* we must init (p->pos = mf->pos) for BT, because
+ BT code needs (p->pos == delta_value_for_empty_hash_record == mf->pos) */
+ p->pos = mf->pos; // do not change it
+
+ p->cyclicBufferPos = (p->pos - CYC_TO_POS_OFFSET);
+ p->cyclicBufferSize = mf->cyclicBufferSize;
+ p->buffer = mf->buffer;
+ p->cutValue = mf->cutValue;
+ // p->son[0] = p->son[1] = 0; // unused: to init skipped record for speculated accesses.
+}
+
+
+/* ReleaseStream is required to finish multithreading */
+void MatchFinderMt_ReleaseStream(CMatchFinderMt *p)
+{
+ // Sleep(1); // for debug
+ MtSync_StopWriting(&p->btSync);
+ // Sleep(200); // for debug
+ /* p->MatchFinder->ReleaseStream(); */
+}
+
+
+Z7_NO_INLINE
+static UInt32 MatchFinderMt_GetNextBlock_Bt(CMatchFinderMt *p)
+{
+ if (p->failure_LZ_BT)
+ p->btBufPos = p->failureBuf;
+ else
+ {
+ const UInt32 bi = MtSync_GetNextBlock(&p->btSync);
+ const UInt32 *bt = p->btBuf + GET_BT_BLOCK_OFFSET(bi);
+ {
+ const UInt32 numItems = bt[0];
+ p->btBufPosLimit = bt + numItems;
+ p->btNumAvailBytes = bt[1];
+ p->btBufPos = bt + 2;
+ if (numItems < 2 || numItems > kMtBtBlockSize)
+ {
+ p->failureBuf[0] = 0;
+ p->btBufPos = p->failureBuf;
+ p->btBufPosLimit = p->failureBuf + 1;
+ p->failure_LZ_BT = True;
+ // p->btNumAvailBytes = 0;
+ /* we don't want to decrease AvailBytes, that was load before.
+ that can be unxepected for the code that have loaded anopther value before */
+ }
+ }
+
+ if (p->lzPos >= (UInt32)kMtMaxValForNormalize - (UInt32)kMtBtBlockSize)
+ {
+ /* we don't check (lzPos) over exact avail bytes in (btBuf).
+ (fixedHashSize) is small, so normalization is fast */
+ const UInt32 subValue = (p->lzPos - p->historySize - 1); // & ~(UInt32)(kNormalizeAlign - 1);
+ p->lzPos -= subValue;
+ MatchFinder_Normalize3(subValue, p->hash, p->fixedHashSize);
+ }
+ }
+ return p->btNumAvailBytes;
+}
+
+
+
+static const Byte * MatchFinderMt_GetPointerToCurrentPos(CMatchFinderMt *p)
+{
+ return p->pointerToCurPos;
+}
+
+
+#define GET_NEXT_BLOCK_IF_REQUIRED if (p->btBufPos == p->btBufPosLimit) MatchFinderMt_GetNextBlock_Bt(p);
+
+
+static UInt32 MatchFinderMt_GetNumAvailableBytes(CMatchFinderMt *p)
+{
+ if (p->btBufPos != p->btBufPosLimit)
+ return p->btNumAvailBytes;
+ return MatchFinderMt_GetNextBlock_Bt(p);
+}
+
+
+// #define CHECK_FAILURE_LZ(_match_, _pos_) if (_match_ >= _pos_) { p->failure_LZ_LZ = True; return d; }
+#define CHECK_FAILURE_LZ(_match_, _pos_)
+
+static UInt32 * MixMatches2(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d)
+{
+ UInt32 h2, c2;
+ UInt32 *hash = p->hash;
+ const Byte *cur = p->pointerToCurPos;
+ const UInt32 m = p->lzPos;
+ MT_HASH2_CALC
+
+ c2 = hash[h2];
+ hash[h2] = m;
+
+ if (c2 >= matchMinPos)
+ {
+ CHECK_FAILURE_LZ(c2, m)
+ if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0])
+ {
+ *d++ = 2;
+ *d++ = m - c2 - 1;
+ }
+ }
+
+ return d;
+}
+
+static UInt32 * MixMatches3(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d)
+{
+ UInt32 h2, h3, c2, c3;
+ UInt32 *hash = p->hash;
+ const Byte *cur = p->pointerToCurPos;
+ const UInt32 m = p->lzPos;
+ MT_HASH3_CALC
+
+ c2 = hash[h2];
+ c3 = (hash + kFix3HashSize)[h3];
+
+ hash[h2] = m;
+ (hash + kFix3HashSize)[h3] = m;
+
+ if (c2 >= matchMinPos)
+ {
+ CHECK_FAILURE_LZ(c2, m)
+ if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0])
+ {
+ d[1] = m - c2 - 1;
+ if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 2] == cur[2])
+ {
+ d[0] = 3;
+ return d + 2;
+ }
+ d[0] = 2;
+ d += 2;
+ }
+ }
+
+ if (c3 >= matchMinPos)
+ {
+ CHECK_FAILURE_LZ(c3, m)
+ if (cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0])
+ {
+ *d++ = 3;
+ *d++ = m - c3 - 1;
+ }
+ }
+
+ return d;
+}
+
+
+#define INCREASE_LZ_POS p->lzPos++; p->pointerToCurPos++;
+
+/*
+static
+UInt32* MatchFinderMt_GetMatches_Bt4(CMatchFinderMt *p, UInt32 *d)
+{
+ const UInt32 *bt = p->btBufPos;
+ const UInt32 len = *bt++;
+ const UInt32 *btLim = bt + len;
+ UInt32 matchMinPos;
+ UInt32 avail = p->btNumAvailBytes - 1;
+ p->btBufPos = btLim;
+
+ {
+ p->btNumAvailBytes = avail;
+
+ #define BT_HASH_BYTES_MAX 5
+
+ matchMinPos = p->lzPos;
+
+ if (len != 0)
+ matchMinPos -= bt[1];
+ else if (avail < (BT_HASH_BYTES_MAX - 1) - 1)
+ {
+ INCREASE_LZ_POS
+ return d;
+ }
+ else
+ {
+ const UInt32 hs = p->historySize;
+ if (matchMinPos > hs)
+ matchMinPos -= hs;
+ else
+ matchMinPos = 1;
+ }
+ }
+
+ for (;;)
+ {
+
+ UInt32 h2, h3, c2, c3;
+ UInt32 *hash = p->hash;
+ const Byte *cur = p->pointerToCurPos;
+ UInt32 m = p->lzPos;
+ MT_HASH3_CALC
+
+ c2 = hash[h2];
+ c3 = (hash + kFix3HashSize)[h3];
+
+ hash[h2] = m;
+ (hash + kFix3HashSize)[h3] = m;
+
+ if (c2 >= matchMinPos && cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0])
+ {
+ d[1] = m - c2 - 1;
+ if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 2] == cur[2])
+ {
+ d[0] = 3;
+ d += 2;
+ break;
+ }
+ // else
+ {
+ d[0] = 2;
+ d += 2;
+ }
+ }
+ if (c3 >= matchMinPos && cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0])
+ {
+ *d++ = 3;
+ *d++ = m - c3 - 1;
+ }
+ break;
+ }
+
+ if (len != 0)
+ {
+ do
+ {
+ const UInt32 v0 = bt[0];
+ const UInt32 v1 = bt[1];
+ bt += 2;
+ d[0] = v0;
+ d[1] = v1;
+ d += 2;
+ }
+ while (bt != btLim);
+ }
+ INCREASE_LZ_POS
+ return d;
+}
+*/
+
+
+static UInt32 * MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d)
+{
+ UInt32 h2, h3, /* h4, */ c2, c3 /* , c4 */;
+ UInt32 *hash = p->hash;
+ const Byte *cur = p->pointerToCurPos;
+ const UInt32 m = p->lzPos;
+ MT_HASH3_CALC
+ // MT_HASH4_CALC
+ c2 = hash[h2];
+ c3 = (hash + kFix3HashSize)[h3];
+ // c4 = (hash + kFix4HashSize)[h4];
+
+ hash[h2] = m;
+ (hash + kFix3HashSize)[h3] = m;
+ // (hash + kFix4HashSize)[h4] = m;
+
+ // #define BT5_USE_H2
+ // #ifdef BT5_USE_H2
+ if (c2 >= matchMinPos && cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0])
+ {
+ d[1] = m - c2 - 1;
+ if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 2] == cur[2])
+ {
+ // d[0] = (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 3] == cur[3]) ? 4 : 3;
+ // return d + 2;
+
+ if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 3] == cur[3])
+ {
+ d[0] = 4;
+ return d + 2;
+ }
+ d[0] = 3;
+ d += 2;
+
+ #ifdef BT5_USE_H4
+ if (c4 >= matchMinPos)
+ if (
+ cur[(ptrdiff_t)c4 - (ptrdiff_t)m] == cur[0] &&
+ cur[(ptrdiff_t)c4 - (ptrdiff_t)m + 3] == cur[3]
+ )
+ {
+ *d++ = 4;
+ *d++ = m - c4 - 1;
+ }
+ #endif
+ return d;
+ }
+ d[0] = 2;
+ d += 2;
+ }
+ // #endif
+
+ if (c3 >= matchMinPos && cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0])
+ {
+ d[1] = m - c3 - 1;
+ if (cur[(ptrdiff_t)c3 - (ptrdiff_t)m + 3] == cur[3])
+ {
+ d[0] = 4;
+ return d + 2;
+ }
+ d[0] = 3;
+ d += 2;
+ }
+
+ #ifdef BT5_USE_H4
+ if (c4 >= matchMinPos)
+ if (
+ cur[(ptrdiff_t)c4 - (ptrdiff_t)m] == cur[0] &&
+ cur[(ptrdiff_t)c4 - (ptrdiff_t)m + 3] == cur[3]
+ )
+ {
+ *d++ = 4;
+ *d++ = m - c4 - 1;
+ }
+ #endif
+
+ return d;
+}
+
+
+static UInt32 * MatchFinderMt2_GetMatches(CMatchFinderMt *p, UInt32 *d)
+{
+ const UInt32 *bt = p->btBufPos;
+ const UInt32 len = *bt++;
+ const UInt32 *btLim = bt + len;
+ p->btBufPos = btLim;
+ p->btNumAvailBytes--;
+ INCREASE_LZ_POS
+ {
+ while (bt != btLim)
+ {
+ const UInt32 v0 = bt[0];
+ const UInt32 v1 = bt[1];
+ bt += 2;
+ d[0] = v0;
+ d[1] = v1;
+ d += 2;
+ }
+ }
+ return d;
+}
+
+
+
+static UInt32 * MatchFinderMt_GetMatches(CMatchFinderMt *p, UInt32 *d)
+{
+ const UInt32 *bt = p->btBufPos;
+ UInt32 len = *bt++;
+ const UInt32 avail = p->btNumAvailBytes - 1;
+ p->btNumAvailBytes = avail;
+ p->btBufPos = bt + len;
+ if (len == 0)
+ {
+ #define BT_HASH_BYTES_MAX 5
+ if (avail >= (BT_HASH_BYTES_MAX - 1) - 1)
+ {
+ UInt32 m = p->lzPos;
+ if (m > p->historySize)
+ m -= p->historySize;
+ else
+ m = 1;
+ d = p->MixMatchesFunc(p, m, d);
+ }
+ }
+ else
+ {
+ /*
+ first match pair from BinTree: (match_len, match_dist),
+ (match_len >= numHashBytes).
+ MixMatchesFunc() inserts only hash matches that are nearer than (match_dist)
+ */
+ d = p->MixMatchesFunc(p, p->lzPos - bt[1], d);
+ // if (d) // check for failure
+ do
+ {
+ const UInt32 v0 = bt[0];
+ const UInt32 v1 = bt[1];
+ bt += 2;
+ d[0] = v0;
+ d[1] = v1;
+ d += 2;
+ }
+ while (len -= 2);
+ }
+ INCREASE_LZ_POS
+ return d;
+}
+
+#define SKIP_HEADER2_MT do { GET_NEXT_BLOCK_IF_REQUIRED
+#define SKIP_HEADER_MT(n) SKIP_HEADER2_MT if (p->btNumAvailBytes-- >= (n)) { const Byte *cur = p->pointerToCurPos; UInt32 *hash = p->hash;
+#define SKIP_FOOTER_MT } INCREASE_LZ_POS p->btBufPos += (size_t)*p->btBufPos + 1; } while (--num != 0);
+
+static void MatchFinderMt0_Skip(CMatchFinderMt *p, UInt32 num)
+{
+ SKIP_HEADER2_MT { p->btNumAvailBytes--;
+ SKIP_FOOTER_MT
+}
+
+static void MatchFinderMt2_Skip(CMatchFinderMt *p, UInt32 num)
+{
+ SKIP_HEADER_MT(2)
+ UInt32 h2;
+ MT_HASH2_CALC
+ hash[h2] = p->lzPos;
+ SKIP_FOOTER_MT
+}
+
+static void MatchFinderMt3_Skip(CMatchFinderMt *p, UInt32 num)
+{
+ SKIP_HEADER_MT(3)
+ UInt32 h2, h3;
+ MT_HASH3_CALC
+ (hash + kFix3HashSize)[h3] =
+ hash[ h2] =
+ p->lzPos;
+ SKIP_FOOTER_MT
+}
+
+/*
+// MatchFinderMt4_Skip() is similar to MatchFinderMt3_Skip().
+// The difference is that MatchFinderMt3_Skip() updates hash for last 3 bytes of stream.
+
+static void MatchFinderMt4_Skip(CMatchFinderMt *p, UInt32 num)
+{
+ SKIP_HEADER_MT(4)
+ UInt32 h2, h3; // h4
+ MT_HASH3_CALC
+ // MT_HASH4_CALC
+ // (hash + kFix4HashSize)[h4] =
+ (hash + kFix3HashSize)[h3] =
+ hash[ h2] =
+ p->lzPos;
+ SKIP_FOOTER_MT
+}
+*/
+
+void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder2 *vTable)
+{
+ vTable->Init = (Mf_Init_Func)MatchFinderMt_Init;
+ vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinderMt_GetNumAvailableBytes;
+ vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinderMt_GetPointerToCurrentPos;
+ vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt_GetMatches;
+
+ switch (MF(p)->numHashBytes)
+ {
+ case 2:
+ p->GetHeadsFunc = GetHeads2;
+ p->MixMatchesFunc = (Mf_Mix_Matches)NULL;
+ vTable->Skip = (Mf_Skip_Func)MatchFinderMt0_Skip;
+ vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt2_GetMatches;
+ break;
+ case 3:
+ p->GetHeadsFunc = MF(p)->bigHash ? GetHeads3b : GetHeads3;
+ p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches2;
+ vTable->Skip = (Mf_Skip_Func)MatchFinderMt2_Skip;
+ break;
+ case 4:
+ p->GetHeadsFunc = MF(p)->bigHash ? GetHeads4b : GetHeads4;
+
+ // it's fast inline version of GetMatches()
+ // vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt_GetMatches_Bt4;
+
+ p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches3;
+ vTable->Skip = (Mf_Skip_Func)MatchFinderMt3_Skip;
+ break;
+ default:
+ p->GetHeadsFunc = MF(p)->bigHash ? GetHeads5b : GetHeads5;
+ p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches4;
+ vTable->Skip =
+ (Mf_Skip_Func)MatchFinderMt3_Skip;
+ // (Mf_Skip_Func)MatchFinderMt4_Skip;
+ break;
+ }
+}
+
+#undef RINOK_THREAD
+#undef PRF
+#undef MF
+#undef GetUi24hi_from32
+#undef LOCK_BUFFER
+#undef UNLOCK_BUFFER
diff --git a/src/Common/lzma/LzFindMt.h b/src/Common/lzma/LzFindMt.h
new file mode 100644
index 00000000..db5923ea
--- /dev/null
+++ b/src/Common/lzma/LzFindMt.h
@@ -0,0 +1,109 @@
+/* LzFindMt.h -- multithreaded Match finder for LZ algorithms
+2023-03-05 : Igor Pavlov : Public domain */
+
+#ifndef ZIP7_INC_LZ_FIND_MT_H
+#define ZIP7_INC_LZ_FIND_MT_H
+
+#include "LzFind.h"
+#include "Threads.h"
+
+EXTERN_C_BEGIN
+
+typedef struct
+{
+ UInt32 numProcessedBlocks;
+ CThread thread;
+ UInt64 affinity;
+
+ BoolInt wasCreated;
+ BoolInt needStart;
+ BoolInt csWasInitialized;
+ BoolInt csWasEntered;
+
+ BoolInt exit;
+ BoolInt stopWriting;
+
+ CAutoResetEvent canStart;
+ CAutoResetEvent wasStopped;
+ CSemaphore freeSemaphore;
+ CSemaphore filledSemaphore;
+ CCriticalSection cs;
+ // UInt32 numBlocks_Sent;
+} CMtSync;
+
+typedef UInt32 * (*Mf_Mix_Matches)(void *p, UInt32 matchMinPos, UInt32 *distances);
+
+/* kMtCacheLineDummy must be >= size_of_CPU_cache_line */
+#define kMtCacheLineDummy 128
+
+typedef void (*Mf_GetHeads)(const Byte *buffer, UInt32 pos,
+ UInt32 *hash, UInt32 hashMask, UInt32 *heads, UInt32 numHeads, const UInt32 *crc);
+
+typedef struct
+{
+ /* LZ */
+ const Byte *pointerToCurPos;
+ UInt32 *btBuf;
+ const UInt32 *btBufPos;
+ const UInt32 *btBufPosLimit;
+ UInt32 lzPos;
+ UInt32 btNumAvailBytes;
+
+ UInt32 *hash;
+ UInt32 fixedHashSize;
+ // UInt32 hash4Mask;
+ UInt32 historySize;
+ const UInt32 *crc;
+
+ Mf_Mix_Matches MixMatchesFunc;
+ UInt32 failure_LZ_BT; // failure in BT transfered to LZ
+ // UInt32 failure_LZ_LZ; // failure in LZ tables
+ UInt32 failureBuf[1];
+ // UInt32 crc[256];
+
+ /* LZ + BT */
+ CMtSync btSync;
+ Byte btDummy[kMtCacheLineDummy];
+
+ /* BT */
+ UInt32 *hashBuf;
+ UInt32 hashBufPos;
+ UInt32 hashBufPosLimit;
+ UInt32 hashNumAvail;
+ UInt32 failure_BT;
+
+
+ CLzRef *son;
+ UInt32 matchMaxLen;
+ UInt32 numHashBytes;
+ UInt32 pos;
+ const Byte *buffer;
+ UInt32 cyclicBufferPos;
+ UInt32 cyclicBufferSize; /* it must be = (historySize + 1) */
+ UInt32 cutValue;
+
+ /* BT + Hash */
+ CMtSync hashSync;
+ /* Byte hashDummy[kMtCacheLineDummy]; */
+
+ /* Hash */
+ Mf_GetHeads GetHeadsFunc;
+ CMatchFinder *MatchFinder;
+ // CMatchFinder MatchFinder;
+} CMatchFinderMt;
+
+// only for Mt part
+void MatchFinderMt_Construct(CMatchFinderMt *p);
+void MatchFinderMt_Destruct(CMatchFinderMt *p, ISzAllocPtr alloc);
+
+SRes MatchFinderMt_Create(CMatchFinderMt *p, UInt32 historySize, UInt32 keepAddBufferBefore,
+ UInt32 matchMaxLen, UInt32 keepAddBufferAfter, ISzAllocPtr alloc);
+void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder2 *vTable);
+
+/* call MatchFinderMt_InitMt() before IMatchFinder::Init() */
+SRes MatchFinderMt_InitMt(CMatchFinderMt *p);
+void MatchFinderMt_ReleaseStream(CMatchFinderMt *p);
+
+EXTERN_C_END
+
+#endif
diff --git a/src/Common/lzma/LzFindOpt.c b/src/Common/lzma/LzFindOpt.c
new file mode 100644
index 00000000..85bdc136
--- /dev/null
+++ b/src/Common/lzma/LzFindOpt.c
@@ -0,0 +1,578 @@
+/* LzFindOpt.c -- multithreaded Match finder for LZ algorithms
+2023-04-02 : Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+#include "CpuArch.h"
+#include "LzFind.h"
+
+// #include "LzFindMt.h"
+
+// #define LOG_ITERS
+
+// #define LOG_THREAD
+
+#ifdef LOG_THREAD
+#include <stdio.h>
+#define PRF(x) x
+#else
+// #define PRF(x)
+#endif
+
+#ifdef LOG_ITERS
+#include <stdio.h>
+UInt64 g_NumIters_Tree;
+UInt64 g_NumIters_Loop;
+UInt64 g_NumIters_Bytes;
+#define LOG_ITER(x) x
+#else
+#define LOG_ITER(x)
+#endif
+
+// ---------- BT THREAD ----------
+
+#define USE_SON_PREFETCH
+#define USE_LONG_MATCH_OPT
+
+#define kEmptyHashValue 0
+
+// #define CYC_TO_POS_OFFSET 0
+
+// #define CYC_TO_POS_OFFSET 1 // for debug
+
+/*
+Z7_NO_INLINE
+UInt32 * Z7_FASTCALL GetMatchesSpecN_1(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,
+ UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size, UInt32 *posRes)
+{
+ do
+ {
+ UInt32 delta;
+ if (hash == size)
+ break;
+ delta = *hash++;
+
+ if (delta == 0 || delta > (UInt32)pos)
+ return NULL;
+
+ lenLimit++;
+
+ if (delta == (UInt32)pos)
+ {
+ CLzRef *ptr1 = son + ((size_t)pos << 1) - CYC_TO_POS_OFFSET * 2;
+ *d++ = 0;
+ ptr1[0] = kEmptyHashValue;
+ ptr1[1] = kEmptyHashValue;
+ }
+else
+{
+ UInt32 *_distances = ++d;
+
+ CLzRef *ptr0 = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2 + 1;
+ CLzRef *ptr1 = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2;
+
+ const Byte *len0 = cur, *len1 = cur;
+ UInt32 cutValue = _cutValue;
+ const Byte *maxLen = cur + _maxLen;
+
+ for (LOG_ITER(g_NumIters_Tree++);;)
+ {
+ LOG_ITER(g_NumIters_Loop++);
+ {
+ const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
+ CLzRef *pair = son + ((size_t)(((ptrdiff_t)pos - CYC_TO_POS_OFFSET) + diff) << 1);
+ const Byte *len = (len0 < len1 ? len0 : len1);
+
+ #ifdef USE_SON_PREFETCH
+ const UInt32 pair0 = *pair;
+ #endif
+
+ if (len[diff] == len[0])
+ {
+ if (++len != lenLimit && len[diff] == len[0])
+ while (++len != lenLimit)
+ {
+ LOG_ITER(g_NumIters_Bytes++);
+ if (len[diff] != len[0])
+ break;
+ }
+ if (maxLen < len)
+ {
+ maxLen = len;
+ *d++ = (UInt32)(len - cur);
+ *d++ = delta - 1;
+
+ if (len == lenLimit)
+ {
+ const UInt32 pair1 = pair[1];
+ *ptr1 =
+ #ifdef USE_SON_PREFETCH
+ pair0;
+ #else
+ pair[0];
+ #endif
+ *ptr0 = pair1;
+
+ _distances[-1] = (UInt32)(d - _distances);
+
+ #ifdef USE_LONG_MATCH_OPT
+
+ if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)
+ break;
+
+ {
+ for (;;)
+ {
+ hash++;
+ pos++;
+ cur++;
+ lenLimit++;
+ {
+ CLzRef *ptr = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2;
+ #if 0
+ *(UInt64 *)(void *)ptr = ((const UInt64 *)(const void *)ptr)[diff];
+ #else
+ const UInt32 p0 = ptr[0 + (diff * 2)];
+ const UInt32 p1 = ptr[1 + (diff * 2)];
+ ptr[0] = p0;
+ ptr[1] = p1;
+ // ptr[0] = ptr[0 + (diff * 2)];
+ // ptr[1] = ptr[1 + (diff * 2)];
+ #endif
+ }
+ // PrintSon(son + 2, pos - 1);
+ // printf("\npos = %x delta = %x\n", pos, delta);
+ len++;
+ *d++ = 2;
+ *d++ = (UInt32)(len - cur);
+ *d++ = delta - 1;
+ if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)
+ break;
+ }
+ }
+ #endif
+
+ break;
+ }
+ }
+ }
+
+ {
+ const UInt32 curMatch = (UInt32)pos - delta; // (UInt32)(pos + diff);
+ if (len[diff] < len[0])
+ {
+ delta = pair[1];
+ if (delta >= curMatch)
+ return NULL;
+ *ptr1 = curMatch;
+ ptr1 = pair + 1;
+ len1 = len;
+ }
+ else
+ {
+ delta = *pair;
+ if (delta >= curMatch)
+ return NULL;
+ *ptr0 = curMatch;
+ ptr0 = pair;
+ len0 = len;
+ }
+
+ delta = (UInt32)pos - delta;
+
+ if (--cutValue == 0 || delta >= pos)
+ {
+ *ptr0 = *ptr1 = kEmptyHashValue;
+ _distances[-1] = (UInt32)(d - _distances);
+ break;
+ }
+ }
+ }
+ } // for (tree iterations)
+}
+ pos++;
+ cur++;
+ }
+ while (d < limit);
+ *posRes = (UInt32)pos;
+ return d;
+}
+*/
+
+/* define cbs if you use 2 functions.
+ GetMatchesSpecN_1() : (pos < _cyclicBufferSize)
+ GetMatchesSpecN_2() : (pos >= _cyclicBufferSize)
+
+ do not define cbs if you use 1 function:
+ GetMatchesSpecN_2()
+*/
+
+// #define cbs _cyclicBufferSize
+
+/*
+ we use size_t for (pos) and (_cyclicBufferPos_ instead of UInt32
+ to eliminate "movsx" BUG in old MSVC x64 compiler.
+*/
+
+UInt32 * Z7_FASTCALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,
+ UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,
+ size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,
+ UInt32 *posRes);
+
+Z7_NO_INLINE
+UInt32 * Z7_FASTCALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,
+ UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,
+ size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,
+ UInt32 *posRes)
+{
+ do // while (hash != size)
+ {
+ UInt32 delta;
+
+ #ifndef cbs
+ UInt32 cbs;
+ #endif
+
+ if (hash == size)
+ break;
+
+ delta = *hash++;
+
+ if (delta == 0)
+ return NULL;
+
+ lenLimit++;
+
+ #ifndef cbs
+ cbs = _cyclicBufferSize;
+ if ((UInt32)pos < cbs)
+ {
+ if (delta > (UInt32)pos)
+ return NULL;
+ cbs = (UInt32)pos;
+ }
+ #endif
+
+ if (delta >= cbs)
+ {
+ CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
+ *d++ = 0;
+ ptr1[0] = kEmptyHashValue;
+ ptr1[1] = kEmptyHashValue;
+ }
+else
+{
+ UInt32 *_distances = ++d;
+
+ CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
+ CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
+
+ UInt32 cutValue = _cutValue;
+ const Byte *len0 = cur, *len1 = cur;
+ const Byte *maxLen = cur + _maxLen;
+
+ // if (cutValue == 0) { *ptr0 = *ptr1 = kEmptyHashValue; } else
+ for (LOG_ITER(g_NumIters_Tree++);;)
+ {
+ LOG_ITER(g_NumIters_Loop++);
+ {
+ // SPEC code
+ CLzRef *pair = son + ((size_t)((ptrdiff_t)_cyclicBufferPos - (ptrdiff_t)delta
+ + (ptrdiff_t)(UInt32)(_cyclicBufferPos < delta ? cbs : 0)
+ ) << 1);
+
+ const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
+ const Byte *len = (len0 < len1 ? len0 : len1);
+
+ #ifdef USE_SON_PREFETCH
+ const UInt32 pair0 = *pair;
+ #endif
+
+ if (len[diff] == len[0])
+ {
+ if (++len != lenLimit && len[diff] == len[0])
+ while (++len != lenLimit)
+ {
+ LOG_ITER(g_NumIters_Bytes++);
+ if (len[diff] != len[0])
+ break;
+ }
+ if (maxLen < len)
+ {
+ maxLen = len;
+ *d++ = (UInt32)(len - cur);
+ *d++ = delta - 1;
+
+ if (len == lenLimit)
+ {
+ const UInt32 pair1 = pair[1];
+ *ptr1 =
+ #ifdef USE_SON_PREFETCH
+ pair0;
+ #else
+ pair[0];
+ #endif
+ *ptr0 = pair1;
+
+ _distances[-1] = (UInt32)(d - _distances);
+
+ #ifdef USE_LONG_MATCH_OPT
+
+ if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)
+ break;
+
+ {
+ for (;;)
+ {
+ *d++ = 2;
+ *d++ = (UInt32)(lenLimit - cur);
+ *d++ = delta - 1;
+ cur++;
+ lenLimit++;
+ // SPEC
+ _cyclicBufferPos++;
+ {
+ // SPEC code
+ CLzRef *dest = son + ((size_t)(_cyclicBufferPos) << 1);
+ const CLzRef *src = dest + ((diff
+ + (ptrdiff_t)(UInt32)((_cyclicBufferPos < delta) ? cbs : 0)) << 1);
+ // CLzRef *ptr = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2;
+ #if 0
+ *(UInt64 *)(void *)dest = *((const UInt64 *)(const void *)src);
+ #else
+ const UInt32 p0 = src[0];
+ const UInt32 p1 = src[1];
+ dest[0] = p0;
+ dest[1] = p1;
+ #endif
+ }
+ pos++;
+ hash++;
+ if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)
+ break;
+ } // for() end for long matches
+ }
+ #endif
+
+ break; // break from TREE iterations
+ }
+ }
+ }
+ {
+ const UInt32 curMatch = (UInt32)pos - delta; // (UInt32)(pos + diff);
+ if (len[diff] < len[0])
+ {
+ delta = pair[1];
+ *ptr1 = curMatch;
+ ptr1 = pair + 1;
+ len1 = len;
+ if (delta >= curMatch)
+ return NULL;
+ }
+ else
+ {
+ delta = *pair;
+ *ptr0 = curMatch;
+ ptr0 = pair;
+ len0 = len;
+ if (delta >= curMatch)
+ return NULL;
+ }
+ delta = (UInt32)pos - delta;
+
+ if (--cutValue == 0 || delta >= cbs)
+ {
+ *ptr0 = *ptr1 = kEmptyHashValue;
+ _distances[-1] = (UInt32)(d - _distances);
+ break;
+ }
+ }
+ }
+ } // for (tree iterations)
+}
+ pos++;
+ _cyclicBufferPos++;
+ cur++;
+ }
+ while (d < limit);
+ *posRes = (UInt32)pos;
+ return d;
+}
+
+
+
+/*
+typedef UInt32 uint32plus; // size_t
+
+UInt32 * Z7_FASTCALL GetMatchesSpecN_3(uint32plus lenLimit, size_t pos, const Byte *cur, CLzRef *son,
+ UInt32 _cutValue, UInt32 *d, uint32plus _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,
+ size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,
+ UInt32 *posRes)
+{
+ do // while (hash != size)
+ {
+ UInt32 delta;
+
+ #ifndef cbs
+ UInt32 cbs;
+ #endif
+
+ if (hash == size)
+ break;
+
+ delta = *hash++;
+
+ if (delta == 0)
+ return NULL;
+
+ #ifndef cbs
+ cbs = _cyclicBufferSize;
+ if ((UInt32)pos < cbs)
+ {
+ if (delta > (UInt32)pos)
+ return NULL;
+ cbs = (UInt32)pos;
+ }
+ #endif
+
+ if (delta >= cbs)
+ {
+ CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
+ *d++ = 0;
+ ptr1[0] = kEmptyHashValue;
+ ptr1[1] = kEmptyHashValue;
+ }
+else
+{
+ CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
+ CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
+ UInt32 *_distances = ++d;
+ uint32plus len0 = 0, len1 = 0;
+ UInt32 cutValue = _cutValue;
+ uint32plus maxLen = _maxLen;
+ // lenLimit++; // const Byte *lenLimit = cur + _lenLimit;
+
+ for (LOG_ITER(g_NumIters_Tree++);;)
+ {
+ LOG_ITER(g_NumIters_Loop++);
+ {
+ // const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
+ CLzRef *pair = son + ((size_t)((ptrdiff_t)_cyclicBufferPos - delta
+ + (ptrdiff_t)(UInt32)(_cyclicBufferPos < delta ? cbs : 0)
+ ) << 1);
+ const Byte *pb = cur - delta;
+ uint32plus len = (len0 < len1 ? len0 : len1);
+
+ #ifdef USE_SON_PREFETCH
+ const UInt32 pair0 = *pair;
+ #endif
+
+ if (pb[len] == cur[len])
+ {
+ if (++len != lenLimit && pb[len] == cur[len])
+ while (++len != lenLimit)
+ if (pb[len] != cur[len])
+ break;
+ if (maxLen < len)
+ {
+ maxLen = len;
+ *d++ = (UInt32)len;
+ *d++ = delta - 1;
+ if (len == lenLimit)
+ {
+ {
+ const UInt32 pair1 = pair[1];
+ *ptr0 = pair1;
+ *ptr1 =
+ #ifdef USE_SON_PREFETCH
+ pair0;
+ #else
+ pair[0];
+ #endif
+ }
+
+ _distances[-1] = (UInt32)(d - _distances);
+
+ #ifdef USE_LONG_MATCH_OPT
+
+ if (hash == size || *hash != delta || pb[lenLimit] != cur[lenLimit] || d >= limit)
+ break;
+
+ {
+ const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
+ for (;;)
+ {
+ *d++ = 2;
+ *d++ = (UInt32)lenLimit;
+ *d++ = delta - 1;
+ _cyclicBufferPos++;
+ {
+ CLzRef *dest = son + ((size_t)_cyclicBufferPos << 1);
+ const CLzRef *src = dest + ((diff +
+ (ptrdiff_t)(UInt32)(_cyclicBufferPos < delta ? cbs : 0)) << 1);
+ #if 0
+ *(UInt64 *)(void *)dest = *((const UInt64 *)(const void *)src);
+ #else
+ const UInt32 p0 = src[0];
+ const UInt32 p1 = src[1];
+ dest[0] = p0;
+ dest[1] = p1;
+ #endif
+ }
+ hash++;
+ pos++;
+ cur++;
+ pb++;
+ if (hash == size || *hash != delta || pb[lenLimit] != cur[lenLimit] || d >= limit)
+ break;
+ }
+ }
+ #endif
+
+ break;
+ }
+ }
+ }
+ {
+ const UInt32 curMatch = (UInt32)pos - delta;
+ if (pb[len] < cur[len])
+ {
+ delta = pair[1];
+ *ptr1 = curMatch;
+ ptr1 = pair + 1;
+ len1 = len;
+ }
+ else
+ {
+ delta = *pair;
+ *ptr0 = curMatch;
+ ptr0 = pair;
+ len0 = len;
+ }
+
+ {
+ if (delta >= curMatch)
+ return NULL;
+ delta = (UInt32)pos - delta;
+ if (delta >= cbs
+ // delta >= _cyclicBufferSize || delta >= pos
+ || --cutValue == 0)
+ {
+ *ptr0 = *ptr1 = kEmptyHashValue;
+ _distances[-1] = (UInt32)(d - _distances);
+ break;
+ }
+ }
+ }
+ }
+ } // for (tree iterations)
+}
+ pos++;
+ _cyclicBufferPos++;
+ cur++;
+ }
+ while (d < limit);
+ *posRes = (UInt32)pos;
+ return d;
+}
+*/
diff --git a/src/Common/lzma/LzHash.h b/src/Common/lzma/LzHash.h
new file mode 100644
index 00000000..2b6290b6
--- /dev/null
+++ b/src/Common/lzma/LzHash.h
@@ -0,0 +1,34 @@
+/* LzHash.h -- HASH constants for LZ algorithms
+2023-03-05 : Igor Pavlov : Public domain */
+
+#ifndef ZIP7_INC_LZ_HASH_H
+#define ZIP7_INC_LZ_HASH_H
+
+/*
+ (kHash2Size >= (1 << 8)) : Required
+ (kHash3Size >= (1 << 16)) : Required
+*/
+
+#define kHash2Size (1 << 10)
+#define kHash3Size (1 << 16)
+// #define kHash4Size (1 << 20)
+
+#define kFix3HashSize (kHash2Size)
+#define kFix4HashSize (kHash2Size + kHash3Size)
+// #define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size)
+
+/*
+ We use up to 3 crc values for hash:
+ crc0
+ crc1 << Shift_1
+ crc2 << Shift_2
+ (Shift_1 = 5) and (Shift_2 = 10) is good tradeoff.
+ Small values for Shift are not good for collision rate.
+ Big value for Shift_2 increases the minimum size
+ of hash table, that will be slow for small files.
+*/
+
+#define kLzHash_CrcShift_1 5
+#define kLzHash_CrcShift_2 10
+
+#endif
diff --git a/src/Common/lzma/LzmaDec.c b/src/Common/lzma/LzmaDec.c
new file mode 100644
index 00000000..69bb8bba
--- /dev/null
+++ b/src/Common/lzma/LzmaDec.c
@@ -0,0 +1,1363 @@
+/* LzmaDec.c -- LZMA Decoder
+2023-04-07 : Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+#include <string.h>
+
+/* #include "CpuArch.h" */
+#include "LzmaDec.h"
+
+// #define kNumTopBits 24
+#define kTopValue ((UInt32)1 << 24)
+
+#define kNumBitModelTotalBits 11
+#define kBitModelTotal (1 << kNumBitModelTotalBits)
+
+#define RC_INIT_SIZE 5
+
+#ifndef Z7_LZMA_DEC_OPT
+
+#define kNumMoveBits 5
+#define NORMALIZE if (range < kTopValue) { range <<= 8; code = (code << 8) | (*buf++); }
+
+#define IF_BIT_0(p) ttt = *(p); NORMALIZE; bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound)
+#define UPDATE_0(p) range = bound; *(p) = (CLzmaProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits));
+#define UPDATE_1(p) range -= bound; code -= bound; *(p) = (CLzmaProb)(ttt - (ttt >> kNumMoveBits));
+#define GET_BIT2(p, i, A0, A1) IF_BIT_0(p) \
+ { UPDATE_0(p) i = (i + i); A0; } else \
+ { UPDATE_1(p) i = (i + i) + 1; A1; }
+
+#define TREE_GET_BIT(probs, i) { GET_BIT2(probs + i, i, ;, ;); }
+
+#define REV_BIT(p, i, A0, A1) IF_BIT_0(p + i) \
+ { UPDATE_0(p + i) A0; } else \
+ { UPDATE_1(p + i) A1; }
+#define REV_BIT_VAR( p, i, m) REV_BIT(p, i, i += m; m += m, m += m; i += m; )
+#define REV_BIT_CONST(p, i, m) REV_BIT(p, i, i += m; , i += m * 2; )
+#define REV_BIT_LAST( p, i, m) REV_BIT(p, i, i -= m , ; )
+
+#define TREE_DECODE(probs, limit, i) \
+ { i = 1; do { TREE_GET_BIT(probs, i); } while (i < limit); i -= limit; }
+
+/* #define Z7_LZMA_SIZE_OPT */
+
+#ifdef Z7_LZMA_SIZE_OPT
+#define TREE_6_DECODE(probs, i) TREE_DECODE(probs, (1 << 6), i)
+#else
+#define TREE_6_DECODE(probs, i) \
+ { i = 1; \
+ TREE_GET_BIT(probs, i) \
+ TREE_GET_BIT(probs, i) \
+ TREE_GET_BIT(probs, i) \
+ TREE_GET_BIT(probs, i) \
+ TREE_GET_BIT(probs, i) \
+ TREE_GET_BIT(probs, i) \
+ i -= 0x40; }
+#endif
+
+#define NORMAL_LITER_DEC TREE_GET_BIT(prob, symbol)
+#define MATCHED_LITER_DEC \
+ matchByte += matchByte; \
+ bit = offs; \
+ offs &= matchByte; \
+ probLit = prob + (offs + bit + symbol); \
+ GET_BIT2(probLit, symbol, offs ^= bit; , ;)
+
+#endif // Z7_LZMA_DEC_OPT
+
+
+#define NORMALIZE_CHECK if (range < kTopValue) { if (buf >= bufLimit) return DUMMY_INPUT_EOF; range <<= 8; code = (code << 8) | (*buf++); }
+
+#define IF_BIT_0_CHECK(p) ttt = *(p); NORMALIZE_CHECK bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound)
+#define UPDATE_0_CHECK range = bound;
+#define UPDATE_1_CHECK range -= bound; code -= bound;
+#define GET_BIT2_CHECK(p, i, A0, A1) IF_BIT_0_CHECK(p) \
+ { UPDATE_0_CHECK i = (i + i); A0; } else \
+ { UPDATE_1_CHECK i = (i + i) + 1; A1; }
+#define GET_BIT_CHECK(p, i) GET_BIT2_CHECK(p, i, ; , ;)
+#define TREE_DECODE_CHECK(probs, limit, i) \
+ { i = 1; do { GET_BIT_CHECK(probs + i, i) } while (i < limit); i -= limit; }
+
+
+#define REV_BIT_CHECK(p, i, m) IF_BIT_0_CHECK(p + i) \
+ { UPDATE_0_CHECK i += m; m += m; } else \
+ { UPDATE_1_CHECK m += m; i += m; }
+
+
+#define kNumPosBitsMax 4
+#define kNumPosStatesMax (1 << kNumPosBitsMax)
+
+#define kLenNumLowBits 3
+#define kLenNumLowSymbols (1 << kLenNumLowBits)
+#define kLenNumHighBits 8
+#define kLenNumHighSymbols (1 << kLenNumHighBits)
+
+#define LenLow 0
+#define LenHigh (LenLow + 2 * (kNumPosStatesMax << kLenNumLowBits))
+#define kNumLenProbs (LenHigh + kLenNumHighSymbols)
+
+#define LenChoice LenLow
+#define LenChoice2 (LenLow + (1 << kLenNumLowBits))
+
+#define kNumStates 12
+#define kNumStates2 16
+#define kNumLitStates 7
+
+#define kStartPosModelIndex 4
+#define kEndPosModelIndex 14
+#define kNumFullDistances (1 << (kEndPosModelIndex >> 1))
+
+#define kNumPosSlotBits 6
+#define kNumLenToPosStates 4
+
+#define kNumAlignBits 4
+#define kAlignTableSize (1 << kNumAlignBits)
+
+#define kMatchMinLen 2
+#define kMatchSpecLenStart (kMatchMinLen + kLenNumLowSymbols * 2 + kLenNumHighSymbols)
+
+#define kMatchSpecLen_Error_Data (1 << 9)
+#define kMatchSpecLen_Error_Fail (kMatchSpecLen_Error_Data - 1)
+
+/* External ASM code needs same CLzmaProb array layout. So don't change it. */
+
+/* (probs_1664) is faster and better for code size at some platforms */
+/*
+#ifdef MY_CPU_X86_OR_AMD64
+*/
+#define kStartOffset 1664
+#define GET_PROBS p->probs_1664
+/*
+#define GET_PROBS p->probs + kStartOffset
+#else
+#define kStartOffset 0
+#define GET_PROBS p->probs
+#endif
+*/
+
+#define SpecPos (-kStartOffset)
+#define IsRep0Long (SpecPos + kNumFullDistances)
+#define RepLenCoder (IsRep0Long + (kNumStates2 << kNumPosBitsMax))
+#define LenCoder (RepLenCoder + kNumLenProbs)
+#define IsMatch (LenCoder + kNumLenProbs)
+#define Align (IsMatch + (kNumStates2 << kNumPosBitsMax))
+#define IsRep (Align + kAlignTableSize)
+#define IsRepG0 (IsRep + kNumStates)
+#define IsRepG1 (IsRepG0 + kNumStates)
+#define IsRepG2 (IsRepG1 + kNumStates)
+#define PosSlot (IsRepG2 + kNumStates)
+#define Literal (PosSlot + (kNumLenToPosStates << kNumPosSlotBits))
+#define NUM_BASE_PROBS (Literal + kStartOffset)
+
+#if Align != 0 && kStartOffset != 0
+ #error Stop_Compiling_Bad_LZMA_kAlign
+#endif
+
+#if NUM_BASE_PROBS != 1984
+ #error Stop_Compiling_Bad_LZMA_PROBS
+#endif
+
+
+#define LZMA_LIT_SIZE 0x300
+
+#define LzmaProps_GetNumProbs(p) (NUM_BASE_PROBS + ((UInt32)LZMA_LIT_SIZE << ((p)->lc + (p)->lp)))
+
+
+#define CALC_POS_STATE(processedPos, pbMask) (((processedPos) & (pbMask)) << 4)
+#define COMBINED_PS_STATE (posState + state)
+#define GET_LEN_STATE (posState)
+
+#define LZMA_DIC_MIN (1 << 12)
+
+/*
+p->remainLen : shows status of LZMA decoder:
+ < kMatchSpecLenStart : the number of bytes to be copied with (p->rep0) offset
+ = kMatchSpecLenStart : the LZMA stream was finished with end mark
+ = kMatchSpecLenStart + 1 : need init range coder
+ = kMatchSpecLenStart + 2 : need init range coder and state
+ = kMatchSpecLen_Error_Fail : Internal Code Failure
+ = kMatchSpecLen_Error_Data + [0 ... 273] : LZMA Data Error
+*/
+
+/* ---------- LZMA_DECODE_REAL ---------- */
+/*
+LzmaDec_DecodeReal_3() can be implemented in external ASM file.
+3 - is the code compatibility version of that function for check at link time.
+*/
+
+#define LZMA_DECODE_REAL LzmaDec_DecodeReal_3
+
+/*
+LZMA_DECODE_REAL()
+In:
+ RangeCoder is normalized
+ if (p->dicPos == limit)
+ {
+ LzmaDec_TryDummy() was called before to exclude LITERAL and MATCH-REP cases.
+ So first symbol can be only MATCH-NON-REP. And if that MATCH-NON-REP symbol
+ is not END_OF_PAYALOAD_MARKER, then the function doesn't write any byte to dictionary,
+ the function returns SZ_OK, and the caller can use (p->remainLen) and (p->reps[0]) later.
+ }
+
+Processing:
+ The first LZMA symbol will be decoded in any case.
+ All main checks for limits are at the end of main loop,
+ It decodes additional LZMA-symbols while (p->buf < bufLimit && dicPos < limit),
+ RangeCoder is still without last normalization when (p->buf < bufLimit) is being checked.
+ But if (p->buf < bufLimit), the caller provided at least (LZMA_REQUIRED_INPUT_MAX + 1) bytes for
+ next iteration before limit (bufLimit + LZMA_REQUIRED_INPUT_MAX),
+ that is enough for worst case LZMA symbol with one additional RangeCoder normalization for one bit.
+ So that function never reads bufLimit [LZMA_REQUIRED_INPUT_MAX] byte.
+
+Out:
+ RangeCoder is normalized
+ Result:
+ SZ_OK - OK
+ p->remainLen:
+ < kMatchSpecLenStart : the number of bytes to be copied with (p->reps[0]) offset
+ = kMatchSpecLenStart : the LZMA stream was finished with end mark
+
+ SZ_ERROR_DATA - error, when the MATCH-Symbol refers out of dictionary
+ p->remainLen : undefined
+ p->reps[*] : undefined
+*/
+
+
+#ifdef Z7_LZMA_DEC_OPT
+
+int Z7_FASTCALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit);
+
+#else
+
+static
+int Z7_FASTCALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit)
+{
+ CLzmaProb *probs = GET_PROBS;
+ unsigned state = (unsigned)p->state;
+ UInt32 rep0 = p->reps[0], rep1 = p->reps[1], rep2 = p->reps[2], rep3 = p->reps[3];
+ unsigned pbMask = ((unsigned)1 << (p->prop.pb)) - 1;
+ unsigned lc = p->prop.lc;
+ unsigned lpMask = ((unsigned)0x100 << p->prop.lp) - ((unsigned)0x100 >> lc);
+
+ Byte *dic = p->dic;
+ SizeT dicBufSize = p->dicBufSize;
+ SizeT dicPos = p->dicPos;
+
+ UInt32 processedPos = p->processedPos;
+ UInt32 checkDicSize = p->checkDicSize;
+ unsigned len = 0;
+
+ const Byte *buf = p->buf;
+ UInt32 range = p->range;
+ UInt32 code = p->code;
+
+ do
+ {
+ CLzmaProb *prob;
+ UInt32 bound;
+ unsigned ttt;
+ unsigned posState = CALC_POS_STATE(processedPos, pbMask);
+
+ prob = probs + IsMatch + COMBINED_PS_STATE;
+ IF_BIT_0(prob)
+ {
+ unsigned symbol;
+ UPDATE_0(prob)
+ prob = probs + Literal;
+ if (processedPos != 0 || checkDicSize != 0)
+ prob += (UInt32)3 * ((((processedPos << 8) + dic[(dicPos == 0 ? dicBufSize : dicPos) - 1]) & lpMask) << lc);
+ processedPos++;
+
+ if (state < kNumLitStates)
+ {
+ state -= (state < 4) ? state : 3;
+ symbol = 1;
+ #ifdef Z7_LZMA_SIZE_OPT
+ do { NORMAL_LITER_DEC } while (symbol < 0x100);
+ #else
+ NORMAL_LITER_DEC
+ NORMAL_LITER_DEC
+ NORMAL_LITER_DEC
+ NORMAL_LITER_DEC
+ NORMAL_LITER_DEC
+ NORMAL_LITER_DEC
+ NORMAL_LITER_DEC
+ NORMAL_LITER_DEC
+ #endif
+ }
+ else
+ {
+ unsigned matchByte = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)];
+ unsigned offs = 0x100;
+ state -= (state < 10) ? 3 : 6;
+ symbol = 1;
+ #ifdef Z7_LZMA_SIZE_OPT
+ do
+ {
+ unsigned bit;
+ CLzmaProb *probLit;
+ MATCHED_LITER_DEC
+ }
+ while (symbol < 0x100);
+ #else
+ {
+ unsigned bit;
+ CLzmaProb *probLit;
+ MATCHED_LITER_DEC
+ MATCHED_LITER_DEC
+ MATCHED_LITER_DEC
+ MATCHED_LITER_DEC
+ MATCHED_LITER_DEC
+ MATCHED_LITER_DEC
+ MATCHED_LITER_DEC
+ MATCHED_LITER_DEC
+ }
+ #endif
+ }
+
+ dic[dicPos++] = (Byte)symbol;
+ continue;
+ }
+
+ {
+ UPDATE_1(prob)
+ prob = probs + IsRep + state;
+ IF_BIT_0(prob)
+ {
+ UPDATE_0(prob)
+ state += kNumStates;
+ prob = probs + LenCoder;
+ }
+ else
+ {
+ UPDATE_1(prob)
+ prob = probs + IsRepG0 + state;
+ IF_BIT_0(prob)
+ {
+ UPDATE_0(prob)
+ prob = probs + IsRep0Long + COMBINED_PS_STATE;
+ IF_BIT_0(prob)
+ {
+ UPDATE_0(prob)
+
+ // that case was checked before with kBadRepCode
+ // if (checkDicSize == 0 && processedPos == 0) { len = kMatchSpecLen_Error_Data + 1; break; }
+ // The caller doesn't allow (dicPos == limit) case here
+ // so we don't need the following check:
+ // if (dicPos == limit) { state = state < kNumLitStates ? 9 : 11; len = 1; break; }
+
+ dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)];
+ dicPos++;
+ processedPos++;
+ state = state < kNumLitStates ? 9 : 11;
+ continue;
+ }
+ UPDATE_1(prob)
+ }
+ else
+ {
+ UInt32 distance;
+ UPDATE_1(prob)
+ prob = probs + IsRepG1 + state;
+ IF_BIT_0(prob)
+ {
+ UPDATE_0(prob)
+ distance = rep1;
+ }
+ else
+ {
+ UPDATE_1(prob)
+ prob = probs + IsRepG2 + state;
+ IF_BIT_0(prob)
+ {
+ UPDATE_0(prob)
+ distance = rep2;
+ }
+ else
+ {
+ UPDATE_1(prob)
+ distance = rep3;
+ rep3 = rep2;
+ }
+ rep2 = rep1;
+ }
+ rep1 = rep0;
+ rep0 = distance;
+ }
+ state = state < kNumLitStates ? 8 : 11;
+ prob = probs + RepLenCoder;
+ }
+
+ #ifdef Z7_LZMA_SIZE_OPT
+ {
+ unsigned lim, offset;
+ CLzmaProb *probLen = prob + LenChoice;
+ IF_BIT_0(probLen)
+ {
+ UPDATE_0(probLen)
+ probLen = prob + LenLow + GET_LEN_STATE;
+ offset = 0;
+ lim = (1 << kLenNumLowBits);
+ }
+ else
+ {
+ UPDATE_1(probLen)
+ probLen = prob + LenChoice2;
+ IF_BIT_0(probLen)
+ {
+ UPDATE_0(probLen)
+ probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits);
+ offset = kLenNumLowSymbols;
+ lim = (1 << kLenNumLowBits);
+ }
+ else
+ {
+ UPDATE_1(probLen)
+ probLen = prob + LenHigh;
+ offset = kLenNumLowSymbols * 2;
+ lim = (1 << kLenNumHighBits);
+ }
+ }
+ TREE_DECODE(probLen, lim, len)
+ len += offset;
+ }
+ #else
+ {
+ CLzmaProb *probLen = prob + LenChoice;
+ IF_BIT_0(probLen)
+ {
+ UPDATE_0(probLen)
+ probLen = prob + LenLow + GET_LEN_STATE;
+ len = 1;
+ TREE_GET_BIT(probLen, len)
+ TREE_GET_BIT(probLen, len)
+ TREE_GET_BIT(probLen, len)
+ len -= 8;
+ }
+ else
+ {
+ UPDATE_1(probLen)
+ probLen = prob + LenChoice2;
+ IF_BIT_0(probLen)
+ {
+ UPDATE_0(probLen)
+ probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits);
+ len = 1;
+ TREE_GET_BIT(probLen, len)
+ TREE_GET_BIT(probLen, len)
+ TREE_GET_BIT(probLen, len)
+ }
+ else
+ {
+ UPDATE_1(probLen)
+ probLen = prob + LenHigh;
+ TREE_DECODE(probLen, (1 << kLenNumHighBits), len)
+ len += kLenNumLowSymbols * 2;
+ }
+ }
+ }
+ #endif
+
+ if (state >= kNumStates)
+ {
+ UInt32 distance;
+ prob = probs + PosSlot +
+ ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits);
+ TREE_6_DECODE(prob, distance)
+ if (distance >= kStartPosModelIndex)
+ {
+ unsigned posSlot = (unsigned)distance;
+ unsigned numDirectBits = (unsigned)(((distance >> 1) - 1));
+ distance = (2 | (distance & 1));
+ if (posSlot < kEndPosModelIndex)
+ {
+ distance <<= numDirectBits;
+ prob = probs + SpecPos;
+ {
+ UInt32 m = 1;
+ distance++;
+ do
+ {
+ REV_BIT_VAR(prob, distance, m)
+ }
+ while (--numDirectBits);
+ distance -= m;
+ }
+ }
+ else
+ {
+ numDirectBits -= kNumAlignBits;
+ do
+ {
+ NORMALIZE
+ range >>= 1;
+
+ {
+ UInt32 t;
+ code -= range;
+ t = (0 - ((UInt32)code >> 31)); /* (UInt32)((Int32)code >> 31) */
+ distance = (distance << 1) + (t + 1);
+ code += range & t;
+ }
+ /*
+ distance <<= 1;
+ if (code >= range)
+ {
+ code -= range;
+ distance |= 1;
+ }
+ */
+ }
+ while (--numDirectBits);
+ prob = probs + Align;
+ distance <<= kNumAlignBits;
+ {
+ unsigned i = 1;
+ REV_BIT_CONST(prob, i, 1)
+ REV_BIT_CONST(prob, i, 2)
+ REV_BIT_CONST(prob, i, 4)
+ REV_BIT_LAST (prob, i, 8)
+ distance |= i;
+ }
+ if (distance == (UInt32)0xFFFFFFFF)
+ {
+ len = kMatchSpecLenStart;
+ state -= kNumStates;
+ break;
+ }
+ }
+ }
+
+ rep3 = rep2;
+ rep2 = rep1;
+ rep1 = rep0;
+ rep0 = distance + 1;
+ state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3;
+ if (distance >= (checkDicSize == 0 ? processedPos: checkDicSize))
+ {
+ len += kMatchSpecLen_Error_Data + kMatchMinLen;
+ // len = kMatchSpecLen_Error_Data;
+ // len += kMatchMinLen;
+ break;
+ }
+ }
+
+ len += kMatchMinLen;
+
+ {
+ SizeT rem;
+ unsigned curLen;
+ SizeT pos;
+
+ if ((rem = limit - dicPos) == 0)
+ {
+ /*
+ We stop decoding and return SZ_OK, and we can resume decoding later.
+ Any error conditions can be tested later in caller code.
+ For more strict mode we can stop decoding with error
+ // len += kMatchSpecLen_Error_Data;
+ */
+ break;
+ }
+
+ curLen = ((rem < len) ? (unsigned)rem : len);
+ pos = dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0);
+
+ processedPos += (UInt32)curLen;
+
+ len -= curLen;
+ if (curLen <= dicBufSize - pos)
+ {
+ Byte *dest = dic + dicPos;
+ ptrdiff_t src = (ptrdiff_t)pos - (ptrdiff_t)dicPos;
+ const Byte *lim = dest + curLen;
+ dicPos += (SizeT)curLen;
+ do
+ *(dest) = (Byte)*(dest + src);
+ while (++dest != lim);
+ }
+ else
+ {
+ do
+ {
+ dic[dicPos++] = dic[pos];
+ if (++pos == dicBufSize)
+ pos = 0;
+ }
+ while (--curLen != 0);
+ }
+ }
+ }
+ }
+ while (dicPos < limit && buf < bufLimit);
+
+ NORMALIZE
+
+ p->buf = buf;
+ p->range = range;
+ p->code = code;
+ p->remainLen = (UInt32)len; // & (kMatchSpecLen_Error_Data - 1); // we can write real length for error matches too.
+ p->dicPos = dicPos;
+ p->processedPos = processedPos;
+ p->reps[0] = rep0;
+ p->reps[1] = rep1;
+ p->reps[2] = rep2;
+ p->reps[3] = rep3;
+ p->state = (UInt32)state;
+ if (len >= kMatchSpecLen_Error_Data)
+ return SZ_ERROR_DATA;
+ return SZ_OK;
+}
+#endif
+
+
+
+static void Z7_FASTCALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit)
+{
+ unsigned len = (unsigned)p->remainLen;
+ if (len == 0 /* || len >= kMatchSpecLenStart */)
+ return;
+ {
+ SizeT dicPos = p->dicPos;
+ Byte *dic;
+ SizeT dicBufSize;
+ SizeT rep0; /* we use SizeT to avoid the BUG of VC14 for AMD64 */
+ {
+ SizeT rem = limit - dicPos;
+ if (rem < len)
+ {
+ len = (unsigned)(rem);
+ if (len == 0)
+ return;
+ }
+ }
+
+ if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= len)
+ p->checkDicSize = p->prop.dicSize;
+
+ p->processedPos += (UInt32)len;
+ p->remainLen -= (UInt32)len;
+ dic = p->dic;
+ rep0 = p->reps[0];
+ dicBufSize = p->dicBufSize;
+ do
+ {
+ dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)];
+ dicPos++;
+ }
+ while (--len);
+ p->dicPos = dicPos;
+ }
+}
+
+
+/*
+At staring of new stream we have one of the following symbols:
+ - Literal - is allowed
+ - Non-Rep-Match - is allowed only if it's end marker symbol
+ - Rep-Match - is not allowed
+We use early check of (RangeCoder:Code) over kBadRepCode to simplify main decoding code
+*/
+
+#define kRange0 0xFFFFFFFF
+#define kBound0 ((kRange0 >> kNumBitModelTotalBits) << (kNumBitModelTotalBits - 1))
+#define kBadRepCode (kBound0 + (((kRange0 - kBound0) >> kNumBitModelTotalBits) << (kNumBitModelTotalBits - 1)))
+#if kBadRepCode != (0xC0000000 - 0x400)
+ #error Stop_Compiling_Bad_LZMA_Check
+#endif
+
+
+/*
+LzmaDec_DecodeReal2():
+ It calls LZMA_DECODE_REAL() and it adjusts limit according (p->checkDicSize).
+
+We correct (p->checkDicSize) after LZMA_DECODE_REAL() and in LzmaDec_WriteRem(),
+and we support the following state of (p->checkDicSize):
+ if (total_processed < p->prop.dicSize) then
+ {
+ (total_processed == p->processedPos)
+ (p->checkDicSize == 0)
+ }
+ else
+ (p->checkDicSize == p->prop.dicSize)
+*/
+
+static int Z7_FASTCALL LzmaDec_DecodeReal2(CLzmaDec *p, SizeT limit, const Byte *bufLimit)
+{
+ if (p->checkDicSize == 0)
+ {
+ UInt32 rem = p->prop.dicSize - p->processedPos;
+ if (limit - p->dicPos > rem)
+ limit = p->dicPos + rem;
+ }
+ {
+ int res = LZMA_DECODE_REAL(p, limit, bufLimit);
+ if (p->checkDicSize == 0 && p->processedPos >= p->prop.dicSize)
+ p->checkDicSize = p->prop.dicSize;
+ return res;
+ }
+}
+
+
+
+typedef enum
+{
+ DUMMY_INPUT_EOF, /* need more input data */
+ DUMMY_LIT,
+ DUMMY_MATCH,
+ DUMMY_REP
+} ELzmaDummy;
+
+
+#define IS_DUMMY_END_MARKER_POSSIBLE(dummyRes) ((dummyRes) == DUMMY_MATCH)
+
+static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byte **bufOut)
+{
+ UInt32 range = p->range;
+ UInt32 code = p->code;
+ const Byte *bufLimit = *bufOut;
+ const CLzmaProb *probs = GET_PROBS;
+ unsigned state = (unsigned)p->state;
+ ELzmaDummy res;
+
+ for (;;)
+ {
+ const CLzmaProb *prob;
+ UInt32 bound;
+ unsigned ttt;
+ unsigned posState = CALC_POS_STATE(p->processedPos, ((unsigned)1 << p->prop.pb) - 1);
+
+ prob = probs + IsMatch + COMBINED_PS_STATE;
+ IF_BIT_0_CHECK(prob)
+ {
+ UPDATE_0_CHECK
+
+ prob = probs + Literal;
+ if (p->checkDicSize != 0 || p->processedPos != 0)
+ prob += ((UInt32)LZMA_LIT_SIZE *
+ ((((p->processedPos) & (((unsigned)1 << (p->prop.lp)) - 1)) << p->prop.lc) +
+ ((unsigned)p->dic[(p->dicPos == 0 ? p->dicBufSize : p->dicPos) - 1] >> (8 - p->prop.lc))));
+
+ if (state < kNumLitStates)
+ {
+ unsigned symbol = 1;
+ do { GET_BIT_CHECK(prob + symbol, symbol) } while (symbol < 0x100);
+ }
+ else
+ {
+ unsigned matchByte = p->dic[p->dicPos - p->reps[0] +
+ (p->dicPos < p->reps[0] ? p->dicBufSize : 0)];
+ unsigned offs = 0x100;
+ unsigned symbol = 1;
+ do
+ {
+ unsigned bit;
+ const CLzmaProb *probLit;
+ matchByte += matchByte;
+ bit = offs;
+ offs &= matchByte;
+ probLit = prob + (offs + bit + symbol);
+ GET_BIT2_CHECK(probLit, symbol, offs ^= bit; , ; )
+ }
+ while (symbol < 0x100);
+ }
+ res = DUMMY_LIT;
+ }
+ else
+ {
+ unsigned len;
+ UPDATE_1_CHECK
+
+ prob = probs + IsRep + state;
+ IF_BIT_0_CHECK(prob)
+ {
+ UPDATE_0_CHECK
+ state = 0;
+ prob = probs + LenCoder;
+ res = DUMMY_MATCH;
+ }
+ else
+ {
+ UPDATE_1_CHECK
+ res = DUMMY_REP;
+ prob = probs + IsRepG0 + state;
+ IF_BIT_0_CHECK(prob)
+ {
+ UPDATE_0_CHECK
+ prob = probs + IsRep0Long + COMBINED_PS_STATE;
+ IF_BIT_0_CHECK(prob)
+ {
+ UPDATE_0_CHECK
+ break;
+ }
+ else
+ {
+ UPDATE_1_CHECK
+ }
+ }
+ else
+ {
+ UPDATE_1_CHECK
+ prob = probs + IsRepG1 + state;
+ IF_BIT_0_CHECK(prob)
+ {
+ UPDATE_0_CHECK
+ }
+ else
+ {
+ UPDATE_1_CHECK
+ prob = probs + IsRepG2 + state;
+ IF_BIT_0_CHECK(prob)
+ {
+ UPDATE_0_CHECK
+ }
+ else
+ {
+ UPDATE_1_CHECK
+ }
+ }
+ }
+ state = kNumStates;
+ prob = probs + RepLenCoder;
+ }
+ {
+ unsigned limit, offset;
+ const CLzmaProb *probLen = prob + LenChoice;
+ IF_BIT_0_CHECK(probLen)
+ {
+ UPDATE_0_CHECK
+ probLen = prob + LenLow + GET_LEN_STATE;
+ offset = 0;
+ limit = 1 << kLenNumLowBits;
+ }
+ else
+ {
+ UPDATE_1_CHECK
+ probLen = prob + LenChoice2;
+ IF_BIT_0_CHECK(probLen)
+ {
+ UPDATE_0_CHECK
+ probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits);
+ offset = kLenNumLowSymbols;
+ limit = 1 << kLenNumLowBits;
+ }
+ else
+ {
+ UPDATE_1_CHECK
+ probLen = prob + LenHigh;
+ offset = kLenNumLowSymbols * 2;
+ limit = 1 << kLenNumHighBits;
+ }
+ }
+ TREE_DECODE_CHECK(probLen, limit, len)
+ len += offset;
+ }
+
+ if (state < 4)
+ {
+ unsigned posSlot;
+ prob = probs + PosSlot +
+ ((len < kNumLenToPosStates - 1 ? len : kNumLenToPosStates - 1) <<
+ kNumPosSlotBits);
+ TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot)
+ if (posSlot >= kStartPosModelIndex)
+ {
+ unsigned numDirectBits = ((posSlot >> 1) - 1);
+
+ if (posSlot < kEndPosModelIndex)
+ {
+ prob = probs + SpecPos + ((2 | (posSlot & 1)) << numDirectBits);
+ }
+ else
+ {
+ numDirectBits -= kNumAlignBits;
+ do
+ {
+ NORMALIZE_CHECK
+ range >>= 1;
+ code -= range & (((code - range) >> 31) - 1);
+ /* if (code >= range) code -= range; */
+ }
+ while (--numDirectBits);
+ prob = probs + Align;
+ numDirectBits = kNumAlignBits;
+ }
+ {
+ unsigned i = 1;
+ unsigned m = 1;
+ do
+ {
+ REV_BIT_CHECK(prob, i, m)
+ }
+ while (--numDirectBits);
+ }
+ }
+ }
+ }
+ break;
+ }
+ NORMALIZE_CHECK
+
+ *bufOut = buf;
+ return res;
+}
+
+void LzmaDec_InitDicAndState(CLzmaDec *p, BoolInt initDic, BoolInt initState);
+void LzmaDec_InitDicAndState(CLzmaDec *p, BoolInt initDic, BoolInt initState)
+{
+ p->remainLen = kMatchSpecLenStart + 1;
+ p->tempBufSize = 0;
+
+ if (initDic)
+ {
+ p->processedPos = 0;
+ p->checkDicSize = 0;
+ p->remainLen = kMatchSpecLenStart + 2;
+ }
+ if (initState)
+ p->remainLen = kMatchSpecLenStart + 2;
+}
+
+void LzmaDec_Init(CLzmaDec *p)
+{
+ p->dicPos = 0;
+ LzmaDec_InitDicAndState(p, True, True);
+}
+
+
+/*
+LZMA supports optional end_marker.
+So the decoder can lookahead for one additional LZMA-Symbol to check end_marker.
+That additional LZMA-Symbol can require up to LZMA_REQUIRED_INPUT_MAX bytes in input stream.
+When the decoder reaches dicLimit, it looks (finishMode) parameter:
+ if (finishMode == LZMA_FINISH_ANY), the decoder doesn't lookahead
+ if (finishMode != LZMA_FINISH_ANY), the decoder lookahead, if end_marker is possible for current position
+
+When the decoder lookahead, and the lookahead symbol is not end_marker, we have two ways:
+ 1) Strict mode (default) : the decoder returns SZ_ERROR_DATA.
+ 2) The relaxed mode (alternative mode) : we could return SZ_OK, and the caller
+ must check (status) value. The caller can show the error,
+ if the end of stream is expected, and the (status) is noit
+ LZMA_STATUS_FINISHED_WITH_MARK or LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK.
+*/
+
+
+#define RETURN_NOT_FINISHED_FOR_FINISH \
+ *status = LZMA_STATUS_NOT_FINISHED; \
+ return SZ_ERROR_DATA; // for strict mode
+ // return SZ_OK; // for relaxed mode
+
+
+SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *srcLen,
+ ELzmaFinishMode finishMode, ELzmaStatus *status)
+{
+ SizeT inSize = *srcLen;
+ (*srcLen) = 0;
+ *status = LZMA_STATUS_NOT_SPECIFIED;
+
+ if (p->remainLen > kMatchSpecLenStart)
+ {
+ if (p->remainLen > kMatchSpecLenStart + 2)
+ return p->remainLen == kMatchSpecLen_Error_Fail ? SZ_ERROR_FAIL : SZ_ERROR_DATA;
+
+ for (; inSize > 0 && p->tempBufSize < RC_INIT_SIZE; (*srcLen)++, inSize--)
+ p->tempBuf[p->tempBufSize++] = *src++;
+ if (p->tempBufSize != 0 && p->tempBuf[0] != 0)
+ return SZ_ERROR_DATA;
+ if (p->tempBufSize < RC_INIT_SIZE)
+ {
+ *status = LZMA_STATUS_NEEDS_MORE_INPUT;
+ return SZ_OK;
+ }
+ p->code =
+ ((UInt32)p->tempBuf[1] << 24)
+ | ((UInt32)p->tempBuf[2] << 16)
+ | ((UInt32)p->tempBuf[3] << 8)
+ | ((UInt32)p->tempBuf[4]);
+
+ if (p->checkDicSize == 0
+ && p->processedPos == 0
+ && p->code >= kBadRepCode)
+ return SZ_ERROR_DATA;
+
+ p->range = 0xFFFFFFFF;
+ p->tempBufSize = 0;
+
+ if (p->remainLen > kMatchSpecLenStart + 1)
+ {
+ SizeT numProbs = LzmaProps_GetNumProbs(&p->prop);
+ SizeT i;
+ CLzmaProb *probs = p->probs;
+ for (i = 0; i < numProbs; i++)
+ probs[i] = kBitModelTotal >> 1;
+ p->reps[0] = p->reps[1] = p->reps[2] = p->reps[3] = 1;
+ p->state = 0;
+ }
+
+ p->remainLen = 0;
+ }
+
+ for (;;)
+ {
+ if (p->remainLen == kMatchSpecLenStart)
+ {
+ if (p->code != 0)
+ return SZ_ERROR_DATA;
+ *status = LZMA_STATUS_FINISHED_WITH_MARK;
+ return SZ_OK;
+ }
+
+ LzmaDec_WriteRem(p, dicLimit);
+
+ {
+ // (p->remainLen == 0 || p->dicPos == dicLimit)
+
+ int checkEndMarkNow = 0;
+
+ if (p->dicPos >= dicLimit)
+ {
+ if (p->remainLen == 0 && p->code == 0)
+ {
+ *status = LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK;
+ return SZ_OK;
+ }
+ if (finishMode == LZMA_FINISH_ANY)
+ {
+ *status = LZMA_STATUS_NOT_FINISHED;
+ return SZ_OK;
+ }
+ if (p->remainLen != 0)
+ {
+ RETURN_NOT_FINISHED_FOR_FINISH
+ }
+ checkEndMarkNow = 1;
+ }
+
+ // (p->remainLen == 0)
+
+ if (p->tempBufSize == 0)
+ {
+ const Byte *bufLimit;
+ int dummyProcessed = -1;
+
+ if (inSize < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow)
+ {
+ const Byte *bufOut = src + inSize;
+
+ ELzmaDummy dummyRes = LzmaDec_TryDummy(p, src, &bufOut);
+
+ if (dummyRes == DUMMY_INPUT_EOF)
+ {
+ size_t i;
+ if (inSize >= LZMA_REQUIRED_INPUT_MAX)
+ break;
+ (*srcLen) += inSize;
+ p->tempBufSize = (unsigned)inSize;
+ for (i = 0; i < inSize; i++)
+ p->tempBuf[i] = src[i];
+ *status = LZMA_STATUS_NEEDS_MORE_INPUT;
+ return SZ_OK;
+ }
+
+ dummyProcessed = (int)(bufOut - src);
+ if ((unsigned)dummyProcessed > LZMA_REQUIRED_INPUT_MAX)
+ break;
+
+ if (checkEndMarkNow && !IS_DUMMY_END_MARKER_POSSIBLE(dummyRes))
+ {
+ unsigned i;
+ (*srcLen) += (unsigned)dummyProcessed;
+ p->tempBufSize = (unsigned)dummyProcessed;
+ for (i = 0; i < (unsigned)dummyProcessed; i++)
+ p->tempBuf[i] = src[i];
+ // p->remainLen = kMatchSpecLen_Error_Data;
+ RETURN_NOT_FINISHED_FOR_FINISH
+ }
+
+ bufLimit = src;
+ // we will decode only one iteration
+ }
+ else
+ bufLimit = src + inSize - LZMA_REQUIRED_INPUT_MAX;
+
+ p->buf = src;
+
+ {
+ int res = LzmaDec_DecodeReal2(p, dicLimit, bufLimit);
+
+ SizeT processed = (SizeT)(p->buf - src);
+
+ if (dummyProcessed < 0)
+ {
+ if (processed > inSize)
+ break;
+ }
+ else if ((unsigned)dummyProcessed != processed)
+ break;
+
+ src += processed;
+ inSize -= processed;
+ (*srcLen) += processed;
+
+ if (res != SZ_OK)
+ {
+ p->remainLen = kMatchSpecLen_Error_Data;
+ return SZ_ERROR_DATA;
+ }
+ }
+ continue;
+ }
+
+ {
+ // we have some data in (p->tempBuf)
+ // in strict mode: tempBufSize is not enough for one Symbol decoding.
+ // in relaxed mode: tempBufSize not larger than required for one Symbol decoding.
+
+ unsigned rem = p->tempBufSize;
+ unsigned ahead = 0;
+ int dummyProcessed = -1;
+
+ while (rem < LZMA_REQUIRED_INPUT_MAX && ahead < inSize)
+ p->tempBuf[rem++] = src[ahead++];
+
+ // ahead - the size of new data copied from (src) to (p->tempBuf)
+ // rem - the size of temp buffer including new data from (src)
+
+ if (rem < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow)
+ {
+ const Byte *bufOut = p->tempBuf + rem;
+
+ ELzmaDummy dummyRes = LzmaDec_TryDummy(p, p->tempBuf, &bufOut);
+
+ if (dummyRes == DUMMY_INPUT_EOF)
+ {
+ if (rem >= LZMA_REQUIRED_INPUT_MAX)
+ break;
+ p->tempBufSize = rem;
+ (*srcLen) += (SizeT)ahead;
+ *status = LZMA_STATUS_NEEDS_MORE_INPUT;
+ return SZ_OK;
+ }
+
+ dummyProcessed = (int)(bufOut - p->tempBuf);
+
+ if ((unsigned)dummyProcessed < p->tempBufSize)
+ break;
+
+ if (checkEndMarkNow && !IS_DUMMY_END_MARKER_POSSIBLE(dummyRes))
+ {
+ (*srcLen) += (unsigned)dummyProcessed - p->tempBufSize;
+ p->tempBufSize = (unsigned)dummyProcessed;
+ // p->remainLen = kMatchSpecLen_Error_Data;
+ RETURN_NOT_FINISHED_FOR_FINISH
+ }
+ }
+
+ p->buf = p->tempBuf;
+
+ {
+ // we decode one symbol from (p->tempBuf) here, so the (bufLimit) is equal to (p->buf)
+ int res = LzmaDec_DecodeReal2(p, dicLimit, p->buf);
+
+ SizeT processed = (SizeT)(p->buf - p->tempBuf);
+ rem = p->tempBufSize;
+
+ if (dummyProcessed < 0)
+ {
+ if (processed > LZMA_REQUIRED_INPUT_MAX)
+ break;
+ if (processed < rem)
+ break;
+ }
+ else if ((unsigned)dummyProcessed != processed)
+ break;
+
+ processed -= rem;
+
+ src += processed;
+ inSize -= processed;
+ (*srcLen) += processed;
+ p->tempBufSize = 0;
+
+ if (res != SZ_OK)
+ {
+ p->remainLen = kMatchSpecLen_Error_Data;
+ return SZ_ERROR_DATA;
+ }
+ }
+ }
+ }
+ }
+
+ /* Some unexpected error: internal error of code, memory corruption or hardware failure */
+ p->remainLen = kMatchSpecLen_Error_Fail;
+ return SZ_ERROR_FAIL;
+}
+
+
+
+SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status)
+{
+ SizeT outSize = *destLen;
+ SizeT inSize = *srcLen;
+ *srcLen = *destLen = 0;
+ for (;;)
+ {
+ SizeT inSizeCur = inSize, outSizeCur, dicPos;
+ ELzmaFinishMode curFinishMode;
+ SRes res;
+ if (p->dicPos == p->dicBufSize)
+ p->dicPos = 0;
+ dicPos = p->dicPos;
+ if (outSize > p->dicBufSize - dicPos)
+ {
+ outSizeCur = p->dicBufSize;
+ curFinishMode = LZMA_FINISH_ANY;
+ }
+ else
+ {
+ outSizeCur = dicPos + outSize;
+ curFinishMode = finishMode;
+ }
+
+ res = LzmaDec_DecodeToDic(p, outSizeCur, src, &inSizeCur, curFinishMode, status);
+ src += inSizeCur;
+ inSize -= inSizeCur;
+ *srcLen += inSizeCur;
+ outSizeCur = p->dicPos - dicPos;
+ memcpy(dest, p->dic + dicPos, outSizeCur);
+ dest += outSizeCur;
+ outSize -= outSizeCur;
+ *destLen += outSizeCur;
+ if (res != 0)
+ return res;
+ if (outSizeCur == 0 || outSize == 0)
+ return SZ_OK;
+ }
+}
+
+void LzmaDec_FreeProbs(CLzmaDec *p, ISzAllocPtr alloc)
+{
+ ISzAlloc_Free(alloc, p->probs);
+ p->probs = NULL;
+}
+
+static void LzmaDec_FreeDict(CLzmaDec *p, ISzAllocPtr alloc)
+{
+ ISzAlloc_Free(alloc, p->dic);
+ p->dic = NULL;
+}
+
+void LzmaDec_Free(CLzmaDec *p, ISzAllocPtr alloc)
+{
+ LzmaDec_FreeProbs(p, alloc);
+ LzmaDec_FreeDict(p, alloc);
+}
+
+SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size)
+{
+ UInt32 dicSize;
+ Byte d;
+
+ if (size < LZMA_PROPS_SIZE)
+ return SZ_ERROR_UNSUPPORTED;
+ else
+ dicSize = data[1] | ((UInt32)data[2] << 8) | ((UInt32)data[3] << 16) | ((UInt32)data[4] << 24);
+
+ if (dicSize < LZMA_DIC_MIN)
+ dicSize = LZMA_DIC_MIN;
+ p->dicSize = dicSize;
+
+ d = data[0];
+ if (d >= (9 * 5 * 5))
+ return SZ_ERROR_UNSUPPORTED;
+
+ p->lc = (Byte)(d % 9);
+ d /= 9;
+ p->pb = (Byte)(d / 5);
+ p->lp = (Byte)(d % 5);
+
+ return SZ_OK;
+}
+
+static SRes LzmaDec_AllocateProbs2(CLzmaDec *p, const CLzmaProps *propNew, ISzAllocPtr alloc)
+{
+ UInt32 numProbs = LzmaProps_GetNumProbs(propNew);
+ if (!p->probs || numProbs != p->numProbs)
+ {
+ LzmaDec_FreeProbs(p, alloc);
+ p->probs = (CLzmaProb *)ISzAlloc_Alloc(alloc, numProbs * sizeof(CLzmaProb));
+ if (!p->probs)
+ return SZ_ERROR_MEM;
+ p->probs_1664 = p->probs + 1664;
+ p->numProbs = numProbs;
+ }
+ return SZ_OK;
+}
+
+SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc)
+{
+ CLzmaProps propNew;
+ RINOK(LzmaProps_Decode(&propNew, props, propsSize))
+ RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc))
+ p->prop = propNew;
+ return SZ_OK;
+}
+
+SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc)
+{
+ CLzmaProps propNew;
+ SizeT dicBufSize;
+ RINOK(LzmaProps_Decode(&propNew, props, propsSize))
+ RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc))
+
+ {
+ UInt32 dictSize = propNew.dicSize;
+ SizeT mask = ((UInt32)1 << 12) - 1;
+ if (dictSize >= ((UInt32)1 << 30)) mask = ((UInt32)1 << 22) - 1;
+ else if (dictSize >= ((UInt32)1 << 22)) mask = ((UInt32)1 << 20) - 1;
+ dicBufSize = ((SizeT)dictSize + mask) & ~mask;
+ if (dicBufSize < dictSize)
+ dicBufSize = dictSize;
+ }
+
+ if (!p->dic || dicBufSize != p->dicBufSize)
+ {
+ LzmaDec_FreeDict(p, alloc);
+ p->dic = (Byte *)ISzAlloc_Alloc(alloc, dicBufSize);
+ if (!p->dic)
+ {
+ LzmaDec_FreeProbs(p, alloc);
+ return SZ_ERROR_MEM;
+ }
+ }
+ p->dicBufSize = dicBufSize;
+ p->prop = propNew;
+ return SZ_OK;
+}
+
+SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
+ const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode,
+ ELzmaStatus *status, ISzAllocPtr alloc)
+{
+ CLzmaDec p;
+ SRes res;
+ SizeT outSize = *destLen, inSize = *srcLen;
+ *destLen = *srcLen = 0;
+ *status = LZMA_STATUS_NOT_SPECIFIED;
+ if (inSize < RC_INIT_SIZE)
+ return SZ_ERROR_INPUT_EOF;
+ LzmaDec_CONSTRUCT(&p)
+ RINOK(LzmaDec_AllocateProbs(&p, propData, propSize, alloc))
+ p.dic = dest;
+ p.dicBufSize = outSize;
+ LzmaDec_Init(&p);
+ *srcLen = inSize;
+ res = LzmaDec_DecodeToDic(&p, outSize, src, srcLen, finishMode, status);
+ *destLen = p.dicPos;
+ if (res == SZ_OK && *status == LZMA_STATUS_NEEDS_MORE_INPUT)
+ res = SZ_ERROR_INPUT_EOF;
+ LzmaDec_FreeProbs(&p, alloc);
+ return res;
+}
diff --git a/src/Common/lzma/LzmaDec.h b/src/Common/lzma/LzmaDec.h
new file mode 100644
index 00000000..b0ce28fa
--- /dev/null
+++ b/src/Common/lzma/LzmaDec.h
@@ -0,0 +1,237 @@
+/* LzmaDec.h -- LZMA Decoder
+2023-04-02 : Igor Pavlov : Public domain */
+
+#ifndef ZIP7_INC_LZMA_DEC_H
+#define ZIP7_INC_LZMA_DEC_H
+
+#include "7zTypes.h"
+
+EXTERN_C_BEGIN
+
+/* #define Z7_LZMA_PROB32 */
+/* Z7_LZMA_PROB32 can increase the speed on some CPUs,
+ but memory usage for CLzmaDec::probs will be doubled in that case */
+
+typedef
+#ifdef Z7_LZMA_PROB32
+ UInt32
+#else
+ UInt16
+#endif
+ CLzmaProb;
+
+
+/* ---------- LZMA Properties ---------- */
+
+#define LZMA_PROPS_SIZE 5
+
+typedef struct
+{
+ Byte lc;
+ Byte lp;
+ Byte pb;
+ Byte _pad_;
+ UInt32 dicSize;
+} CLzmaProps;
+
+/* LzmaProps_Decode - decodes properties
+Returns:
+ SZ_OK
+ SZ_ERROR_UNSUPPORTED - Unsupported properties
+*/
+
+SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size);
+
+
+/* ---------- LZMA Decoder state ---------- */
+
+/* LZMA_REQUIRED_INPUT_MAX = number of required input bytes for worst case.
+ Num bits = log2((2^11 / 31) ^ 22) + 26 < 134 + 26 = 160; */
+
+#define LZMA_REQUIRED_INPUT_MAX 20
+
+typedef struct
+{
+ /* Don't change this structure. ASM code can use it. */
+ CLzmaProps prop;
+ CLzmaProb *probs;
+ CLzmaProb *probs_1664;
+ Byte *dic;
+ SizeT dicBufSize;
+ SizeT dicPos;
+ const Byte *buf;
+ UInt32 range;
+ UInt32 code;
+ UInt32 processedPos;
+ UInt32 checkDicSize;
+ UInt32 reps[4];
+ UInt32 state;
+ UInt32 remainLen;
+
+ UInt32 numProbs;
+ unsigned tempBufSize;
+ Byte tempBuf[LZMA_REQUIRED_INPUT_MAX];
+} CLzmaDec;
+
+#define LzmaDec_CONSTRUCT(p) { (p)->dic = NULL; (p)->probs = NULL; }
+#define LzmaDec_Construct(p) LzmaDec_CONSTRUCT(p)
+
+void LzmaDec_Init(CLzmaDec *p);
+
+/* There are two types of LZMA streams:
+ - Stream with end mark. That end mark adds about 6 bytes to compressed size.
+ - Stream without end mark. You must know exact uncompressed size to decompress such stream. */
+
+typedef enum
+{
+ LZMA_FINISH_ANY, /* finish at any point */
+ LZMA_FINISH_END /* block must be finished at the end */
+} ELzmaFinishMode;
+
+/* ELzmaFinishMode has meaning only if the decoding reaches output limit !!!
+
+ You must use LZMA_FINISH_END, when you know that current output buffer
+ covers last bytes of block. In other cases you must use LZMA_FINISH_ANY.
+
+ If LZMA decoder sees end marker before reaching output limit, it returns SZ_OK,
+ and output value of destLen will be less than output buffer size limit.
+ You can check status result also.
+
+ You can use multiple checks to test data integrity after full decompression:
+ 1) Check Result and "status" variable.
+ 2) Check that output(destLen) = uncompressedSize, if you know real uncompressedSize.
+ 3) Check that output(srcLen) = compressedSize, if you know real compressedSize.
+ You must use correct finish mode in that case. */
+
+typedef enum
+{
+ LZMA_STATUS_NOT_SPECIFIED, /* use main error code instead */
+ LZMA_STATUS_FINISHED_WITH_MARK, /* stream was finished with end mark. */
+ LZMA_STATUS_NOT_FINISHED, /* stream was not finished */
+ LZMA_STATUS_NEEDS_MORE_INPUT, /* you must provide more input bytes */
+ LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK /* there is probability that stream was finished without end mark */
+} ELzmaStatus;
+
+/* ELzmaStatus is used only as output value for function call */
+
+
+/* ---------- Interfaces ---------- */
+
+/* There are 3 levels of interfaces:
+ 1) Dictionary Interface
+ 2) Buffer Interface
+ 3) One Call Interface
+ You can select any of these interfaces, but don't mix functions from different
+ groups for same object. */
+
+
+/* There are two variants to allocate state for Dictionary Interface:
+ 1) LzmaDec_Allocate / LzmaDec_Free
+ 2) LzmaDec_AllocateProbs / LzmaDec_FreeProbs
+ You can use variant 2, if you set dictionary buffer manually.
+ For Buffer Interface you must always use variant 1.
+
+LzmaDec_Allocate* can return:
+ SZ_OK
+ SZ_ERROR_MEM - Memory allocation error
+ SZ_ERROR_UNSUPPORTED - Unsupported properties
+*/
+
+SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc);
+void LzmaDec_FreeProbs(CLzmaDec *p, ISzAllocPtr alloc);
+
+SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc);
+void LzmaDec_Free(CLzmaDec *p, ISzAllocPtr alloc);
+
+/* ---------- Dictionary Interface ---------- */
+
+/* You can use it, if you want to eliminate the overhead for data copying from
+ dictionary to some other external buffer.
+ You must work with CLzmaDec variables directly in this interface.
+
+ STEPS:
+ LzmaDec_Construct()
+ LzmaDec_Allocate()
+ for (each new stream)
+ {
+ LzmaDec_Init()
+ while (it needs more decompression)
+ {
+ LzmaDec_DecodeToDic()
+ use data from CLzmaDec::dic and update CLzmaDec::dicPos
+ }
+ }
+ LzmaDec_Free()
+*/
+
+/* LzmaDec_DecodeToDic
+
+ The decoding to internal dictionary buffer (CLzmaDec::dic).
+ You must manually update CLzmaDec::dicPos, if it reaches CLzmaDec::dicBufSize !!!
+
+finishMode:
+ It has meaning only if the decoding reaches output limit (dicLimit).
+ LZMA_FINISH_ANY - Decode just dicLimit bytes.
+ LZMA_FINISH_END - Stream must be finished after dicLimit.
+
+Returns:
+ SZ_OK
+ status:
+ LZMA_STATUS_FINISHED_WITH_MARK
+ LZMA_STATUS_NOT_FINISHED
+ LZMA_STATUS_NEEDS_MORE_INPUT
+ LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK
+ SZ_ERROR_DATA - Data error
+ SZ_ERROR_FAIL - Some unexpected error: internal error of code, memory corruption or hardware failure
+*/
+
+SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit,
+ const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status);
+
+
+/* ---------- Buffer Interface ---------- */
+
+/* It's zlib-like interface.
+ See LzmaDec_DecodeToDic description for information about STEPS and return results,
+ but you must use LzmaDec_DecodeToBuf instead of LzmaDec_DecodeToDic and you don't need
+ to work with CLzmaDec variables manually.
+
+finishMode:
+ It has meaning only if the decoding reaches output limit (*destLen).
+ LZMA_FINISH_ANY - Decode just destLen bytes.
+ LZMA_FINISH_END - Stream must be finished after (*destLen).
+*/
+
+SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen,
+ const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status);
+
+
+/* ---------- One Call Interface ---------- */
+
+/* LzmaDecode
+
+finishMode:
+ It has meaning only if the decoding reaches output limit (*destLen).
+ LZMA_FINISH_ANY - Decode just destLen bytes.
+ LZMA_FINISH_END - Stream must be finished after (*destLen).
+
+Returns:
+ SZ_OK
+ status:
+ LZMA_STATUS_FINISHED_WITH_MARK
+ LZMA_STATUS_NOT_FINISHED
+ LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK
+ SZ_ERROR_DATA - Data error
+ SZ_ERROR_MEM - Memory allocation error
+ SZ_ERROR_UNSUPPORTED - Unsupported properties
+ SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src).
+ SZ_ERROR_FAIL - Some unexpected error: internal error of code, memory corruption or hardware failure
+*/
+
+SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
+ const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode,
+ ELzmaStatus *status, ISzAllocPtr alloc);
+
+EXTERN_C_END
+
+#endif
diff --git a/src/Common/lzma/LzmaEnc.c b/src/Common/lzma/LzmaEnc.c
new file mode 100644
index 00000000..6d13cac8
--- /dev/null
+++ b/src/Common/lzma/LzmaEnc.c
@@ -0,0 +1,3144 @@
+/* LzmaEnc.c -- LZMA Encoder
+2023-04-13: Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+#include <string.h>
+
+/* #define SHOW_STAT */
+/* #define SHOW_STAT2 */
+
+#if defined(SHOW_STAT) || defined(SHOW_STAT2)
+#include <stdio.h>
+#endif
+
+#include "CpuArch.h"
+#include "LzmaEnc.h"
+
+#include "LzFind.h"
+#ifndef Z7_ST
+#include "LzFindMt.h"
+#endif
+
+/* the following LzmaEnc_* declarations is internal LZMA interface for LZMA2 encoder */
+
+SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle p, ISeqInStreamPtr inStream, UInt32 keepWindowSize,
+ ISzAllocPtr alloc, ISzAllocPtr allocBig);
+SRes LzmaEnc_MemPrepare(CLzmaEncHandle p, const Byte *src, SizeT srcLen,
+ UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig);
+SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle p, BoolInt reInit,
+ Byte *dest, size_t *destLen, UInt32 desiredPackSize, UInt32 *unpackSize);
+const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle p);
+void LzmaEnc_Finish(CLzmaEncHandle p);
+void LzmaEnc_SaveState(CLzmaEncHandle p);
+void LzmaEnc_RestoreState(CLzmaEncHandle p);
+
+#ifdef SHOW_STAT
+static unsigned g_STAT_OFFSET = 0;
+#endif
+
+/* for good normalization speed we still reserve 256 MB before 4 GB range */
+#define kLzmaMaxHistorySize ((UInt32)15 << 28)
+
+// #define kNumTopBits 24
+#define kTopValue ((UInt32)1 << 24)
+
+#define kNumBitModelTotalBits 11
+#define kBitModelTotal (1 << kNumBitModelTotalBits)
+#define kNumMoveBits 5
+#define kProbInitValue (kBitModelTotal >> 1)
+
+#define kNumMoveReducingBits 4
+#define kNumBitPriceShiftBits 4
+// #define kBitPrice (1 << kNumBitPriceShiftBits)
+
+#define REP_LEN_COUNT 64
+
+void LzmaEncProps_Init(CLzmaEncProps *p)
+{
+ p->level = 5;
+ p->dictSize = p->mc = 0;
+ p->reduceSize = (UInt64)(Int64)-1;
+ p->lc = p->lp = p->pb = p->algo = p->fb = p->btMode = p->numHashBytes = p->numThreads = -1;
+ p->numHashOutBits = 0;
+ p->writeEndMark = 0;
+ p->affinity = 0;
+}
+
+void LzmaEncProps_Normalize(CLzmaEncProps *p)
+{
+ int level = p->level;
+ if (level < 0) level = 5;
+ p->level = level;
+
+ if (p->dictSize == 0)
+ p->dictSize =
+ ( level <= 3 ? ((UInt32)1 << (level * 2 + 16)) :
+ ( level <= 6 ? ((UInt32)1 << (level + 19)) :
+ ( level <= 7 ? ((UInt32)1 << 25) : ((UInt32)1 << 26)
+ )));
+
+ if (p->dictSize > p->reduceSize)
+ {
+ UInt32 v = (UInt32)p->reduceSize;
+ const UInt32 kReduceMin = ((UInt32)1 << 12);
+ if (v < kReduceMin)
+ v = kReduceMin;
+ if (p->dictSize > v)
+ p->dictSize = v;
+ }
+
+ if (p->lc < 0) p->lc = 3;
+ if (p->lp < 0) p->lp = 0;
+ if (p->pb < 0) p->pb = 2;
+
+ if (p->algo < 0) p->algo = (level < 5 ? 0 : 1);
+ if (p->fb < 0) p->fb = (level < 7 ? 32 : 64);
+ if (p->btMode < 0) p->btMode = (p->algo == 0 ? 0 : 1);
+ if (p->numHashBytes < 0) p->numHashBytes = (p->btMode ? 4 : 5);
+ if (p->mc == 0) p->mc = (16 + ((unsigned)p->fb >> 1)) >> (p->btMode ? 0 : 1);
+
+ if (p->numThreads < 0)
+ p->numThreads =
+ #ifndef Z7_ST
+ ((p->btMode && p->algo) ? 2 : 1);
+ #else
+ 1;
+ #endif
+}
+
+UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps *props2)
+{
+ CLzmaEncProps props = *props2;
+ LzmaEncProps_Normalize(&props);
+ return props.dictSize;
+}
+
+
+/*
+x86/x64:
+
+BSR:
+ IF (SRC == 0) ZF = 1, DEST is undefined;
+ AMD : DEST is unchanged;
+ IF (SRC != 0) ZF = 0; DEST is index of top non-zero bit
+ BSR is slow in some processors
+
+LZCNT:
+ IF (SRC == 0) CF = 1, DEST is size_in_bits_of_register(src) (32 or 64)
+ IF (SRC != 0) CF = 0, DEST = num_lead_zero_bits
+ IF (DEST == 0) ZF = 1;
+
+LZCNT works only in new processors starting from Haswell.
+if LZCNT is not supported by processor, then it's executed as BSR.
+LZCNT can be faster than BSR, if supported.
+*/
+
+// #define LZMA_LOG_BSR
+
+#if defined(MY_CPU_ARM_OR_ARM64) /* || defined(MY_CPU_X86_OR_AMD64) */
+
+ #if (defined(__clang__) && (__clang_major__ >= 6)) \
+ || (defined(__GNUC__) && (__GNUC__ >= 6))
+ #define LZMA_LOG_BSR
+ #elif defined(_MSC_VER) && (_MSC_VER >= 1300)
+ // #if defined(MY_CPU_ARM_OR_ARM64)
+ #define LZMA_LOG_BSR
+ // #endif
+ #endif
+#endif
+
+// #include <intrin.h>
+
+#ifdef LZMA_LOG_BSR
+
+#if defined(__clang__) \
+ || defined(__GNUC__)
+
+/*
+ C code: : (30 - __builtin_clz(x))
+ gcc9/gcc10 for x64 /x86 : 30 - (bsr(x) xor 31)
+ clang10 for x64 : 31 + (bsr(x) xor -32)
+*/
+
+ #define MY_clz(x) ((unsigned)__builtin_clz(x))
+ // __lzcnt32
+ // __builtin_ia32_lzcnt_u32
+
+#else // #if defined(_MSC_VER)
+
+ #ifdef MY_CPU_ARM_OR_ARM64
+
+ #define MY_clz _CountLeadingZeros
+
+ #else // if defined(MY_CPU_X86_OR_AMD64)
+
+ // #define MY_clz __lzcnt // we can use lzcnt (unsupported by old CPU)
+ // _BitScanReverse code is not optimal for some MSVC compilers
+ #define BSR2_RET(pos, res) { unsigned long zz; _BitScanReverse(&zz, (pos)); zz--; \
+ res = (zz + zz) + (pos >> zz); }
+
+ #endif // MY_CPU_X86_OR_AMD64
+
+#endif // _MSC_VER
+
+
+#ifndef BSR2_RET
+
+ #define BSR2_RET(pos, res) { unsigned zz = 30 - MY_clz(pos); \
+ res = (zz + zz) + (pos >> zz); }
+
+#endif
+
+
+unsigned GetPosSlot1(UInt32 pos);
+unsigned GetPosSlot1(UInt32 pos)
+{
+ unsigned res;
+ BSR2_RET(pos, res);
+ return res;
+}
+#define GetPosSlot2(pos, res) { BSR2_RET(pos, res); }
+#define GetPosSlot(pos, res) { if (pos < 2) res = pos; else BSR2_RET(pos, res); }
+
+
+#else // ! LZMA_LOG_BSR
+
+#define kNumLogBits (11 + sizeof(size_t) / 8 * 3)
+
+#define kDicLogSizeMaxCompress ((kNumLogBits - 1) * 2 + 7)
+
+static void LzmaEnc_FastPosInit(Byte *g_FastPos)
+{
+ unsigned slot;
+ g_FastPos[0] = 0;
+ g_FastPos[1] = 1;
+ g_FastPos += 2;
+
+ for (slot = 2; slot < kNumLogBits * 2; slot++)
+ {
+ size_t k = ((size_t)1 << ((slot >> 1) - 1));
+ size_t j;
+ for (j = 0; j < k; j++)
+ g_FastPos[j] = (Byte)slot;
+ g_FastPos += k;
+ }
+}
+
+/* we can use ((limit - pos) >> 31) only if (pos < ((UInt32)1 << 31)) */
+/*
+#define BSR2_RET(pos, res) { unsigned zz = 6 + ((kNumLogBits - 1) & \
+ (0 - (((((UInt32)1 << (kNumLogBits + 6)) - 1) - pos) >> 31))); \
+ res = p->g_FastPos[pos >> zz] + (zz * 2); }
+*/
+
+/*
+#define BSR2_RET(pos, res) { unsigned zz = 6 + ((kNumLogBits - 1) & \
+ (0 - (((((UInt32)1 << (kNumLogBits)) - 1) - (pos >> 6)) >> 31))); \
+ res = p->g_FastPos[pos >> zz] + (zz * 2); }
+*/
+
+#define BSR2_RET(pos, res) { unsigned zz = (pos < (1 << (kNumLogBits + 6))) ? 6 : 6 + kNumLogBits - 1; \
+ res = p->g_FastPos[pos >> zz] + (zz * 2); }
+
+/*
+#define BSR2_RET(pos, res) { res = (pos < (1 << (kNumLogBits + 6))) ? \
+ p->g_FastPos[pos >> 6] + 12 : \
+ p->g_FastPos[pos >> (6 + kNumLogBits - 1)] + (6 + (kNumLogBits - 1)) * 2; }
+*/
+
+#define GetPosSlot1(pos) p->g_FastPos[pos]
+#define GetPosSlot2(pos, res) { BSR2_RET(pos, res); }
+#define GetPosSlot(pos, res) { if (pos < kNumFullDistances) res = p->g_FastPos[pos & (kNumFullDistances - 1)]; else BSR2_RET(pos, res); }
+
+#endif // LZMA_LOG_BSR
+
+
+#define LZMA_NUM_REPS 4
+
+typedef UInt16 CState;
+typedef UInt16 CExtra;
+
+typedef struct
+{
+ UInt32 price;
+ CState state;
+ CExtra extra;
+ // 0 : normal
+ // 1 : LIT : MATCH
+ // > 1 : MATCH (extra-1) : LIT : REP0 (len)
+ UInt32 len;
+ UInt32 dist;
+ UInt32 reps[LZMA_NUM_REPS];
+} COptimal;
+
+
+// 18.06
+#define kNumOpts (1 << 11)
+#define kPackReserve (kNumOpts * 8)
+// #define kNumOpts (1 << 12)
+// #define kPackReserve (1 + kNumOpts * 2)
+
+#define kNumLenToPosStates 4
+#define kNumPosSlotBits 6
+// #define kDicLogSizeMin 0
+#define kDicLogSizeMax 32
+#define kDistTableSizeMax (kDicLogSizeMax * 2)
+
+#define kNumAlignBits 4
+#define kAlignTableSize (1 << kNumAlignBits)
+#define kAlignMask (kAlignTableSize - 1)
+
+#define kStartPosModelIndex 4
+#define kEndPosModelIndex 14
+#define kNumFullDistances (1 << (kEndPosModelIndex >> 1))
+
+typedef
+#ifdef Z7_LZMA_PROB32
+ UInt32
+#else
+ UInt16
+#endif
+ CLzmaProb;
+
+#define LZMA_PB_MAX 4
+#define LZMA_LC_MAX 8
+#define LZMA_LP_MAX 4
+
+#define LZMA_NUM_PB_STATES_MAX (1 << LZMA_PB_MAX)
+
+#define kLenNumLowBits 3
+#define kLenNumLowSymbols (1 << kLenNumLowBits)
+#define kLenNumHighBits 8
+#define kLenNumHighSymbols (1 << kLenNumHighBits)
+#define kLenNumSymbolsTotal (kLenNumLowSymbols * 2 + kLenNumHighSymbols)
+
+#define LZMA_MATCH_LEN_MIN 2
+#define LZMA_MATCH_LEN_MAX (LZMA_MATCH_LEN_MIN + kLenNumSymbolsTotal - 1)
+
+#define kNumStates 12
+
+
+typedef struct
+{
+ CLzmaProb low[LZMA_NUM_PB_STATES_MAX << (kLenNumLowBits + 1)];
+ CLzmaProb high[kLenNumHighSymbols];
+} CLenEnc;
+
+
+typedef struct
+{
+ unsigned tableSize;
+ UInt32 prices[LZMA_NUM_PB_STATES_MAX][kLenNumSymbolsTotal];
+ // UInt32 prices1[LZMA_NUM_PB_STATES_MAX][kLenNumLowSymbols * 2];
+ // UInt32 prices2[kLenNumSymbolsTotal];
+} CLenPriceEnc;
+
+#define GET_PRICE_LEN(p, posState, len) \
+ ((p)->prices[posState][(size_t)(len) - LZMA_MATCH_LEN_MIN])
+
+/*
+#define GET_PRICE_LEN(p, posState, len) \
+ ((p)->prices2[(size_t)(len) - 2] + ((p)->prices1[posState][((len) - 2) & (kLenNumLowSymbols * 2 - 1)] & (((len) - 2 - kLenNumLowSymbols * 2) >> 9)))
+*/
+
+typedef struct
+{
+ UInt32 range;
+ unsigned cache;
+ UInt64 low;
+ UInt64 cacheSize;
+ Byte *buf;
+ Byte *bufLim;
+ Byte *bufBase;
+ ISeqOutStreamPtr outStream;
+ UInt64 processed;
+ SRes res;
+} CRangeEnc;
+
+
+typedef struct
+{
+ CLzmaProb *litProbs;
+
+ unsigned state;
+ UInt32 reps[LZMA_NUM_REPS];
+
+ CLzmaProb posAlignEncoder[1 << kNumAlignBits];
+ CLzmaProb isRep[kNumStates];
+ CLzmaProb isRepG0[kNumStates];
+ CLzmaProb isRepG1[kNumStates];
+ CLzmaProb isRepG2[kNumStates];
+ CLzmaProb isMatch[kNumStates][LZMA_NUM_PB_STATES_MAX];
+ CLzmaProb isRep0Long[kNumStates][LZMA_NUM_PB_STATES_MAX];
+
+ CLzmaProb posSlotEncoder[kNumLenToPosStates][1 << kNumPosSlotBits];
+ CLzmaProb posEncoders[kNumFullDistances];
+
+ CLenEnc lenProbs;
+ CLenEnc repLenProbs;
+
+} CSaveState;
+
+
+typedef UInt32 CProbPrice;
+
+
+struct CLzmaEnc
+{
+ void *matchFinderObj;
+ IMatchFinder2 matchFinder;
+
+ unsigned optCur;
+ unsigned optEnd;
+
+ unsigned longestMatchLen;
+ unsigned numPairs;
+ UInt32 numAvail;
+
+ unsigned state;
+ unsigned numFastBytes;
+ unsigned additionalOffset;
+ UInt32 reps[LZMA_NUM_REPS];
+ unsigned lpMask, pbMask;
+ CLzmaProb *litProbs;
+ CRangeEnc rc;
+
+ UInt32 backRes;
+
+ unsigned lc, lp, pb;
+ unsigned lclp;
+
+ BoolInt fastMode;
+ BoolInt writeEndMark;
+ BoolInt finished;
+ BoolInt multiThread;
+ BoolInt needInit;
+ // BoolInt _maxMode;
+
+ UInt64 nowPos64;
+
+ unsigned matchPriceCount;
+ // unsigned alignPriceCount;
+ int repLenEncCounter;
+
+ unsigned distTableSize;
+
+ UInt32 dictSize;
+ SRes result;
+
+ #ifndef Z7_ST
+ BoolInt mtMode;
+ // begin of CMatchFinderMt is used in LZ thread
+ CMatchFinderMt matchFinderMt;
+ // end of CMatchFinderMt is used in BT and HASH threads
+ // #else
+ // CMatchFinder matchFinderBase;
+ #endif
+ CMatchFinder matchFinderBase;
+
+
+ // we suppose that we have 8-bytes alignment after CMatchFinder
+
+ #ifndef Z7_ST
+ Byte pad[128];
+ #endif
+
+ // LZ thread
+ CProbPrice ProbPrices[kBitModelTotal >> kNumMoveReducingBits];
+
+ // we want {len , dist} pairs to be 8-bytes aligned in matches array
+ UInt32 matches[LZMA_MATCH_LEN_MAX * 2 + 2];
+
+ // we want 8-bytes alignment here
+ UInt32 alignPrices[kAlignTableSize];
+ UInt32 posSlotPrices[kNumLenToPosStates][kDistTableSizeMax];
+ UInt32 distancesPrices[kNumLenToPosStates][kNumFullDistances];
+
+ CLzmaProb posAlignEncoder[1 << kNumAlignBits];
+ CLzmaProb isRep[kNumStates];
+ CLzmaProb isRepG0[kNumStates];
+ CLzmaProb isRepG1[kNumStates];
+ CLzmaProb isRepG2[kNumStates];
+ CLzmaProb isMatch[kNumStates][LZMA_NUM_PB_STATES_MAX];
+ CLzmaProb isRep0Long[kNumStates][LZMA_NUM_PB_STATES_MAX];
+ CLzmaProb posSlotEncoder[kNumLenToPosStates][1 << kNumPosSlotBits];
+ CLzmaProb posEncoders[kNumFullDistances];
+
+ CLenEnc lenProbs;
+ CLenEnc repLenProbs;
+
+ #ifndef LZMA_LOG_BSR
+ Byte g_FastPos[1 << kNumLogBits];
+ #endif
+
+ CLenPriceEnc lenEnc;
+ CLenPriceEnc repLenEnc;
+
+ COptimal opt[kNumOpts];
+
+ CSaveState saveState;
+
+ // BoolInt mf_Failure;
+ #ifndef Z7_ST
+ Byte pad2[128];
+ #endif
+};
+
+
+#define MFB (p->matchFinderBase)
+/*
+#ifndef Z7_ST
+#define MFB (p->matchFinderMt.MatchFinder)
+#endif
+*/
+
+// #define GET_CLzmaEnc_p CLzmaEnc *p = (CLzmaEnc*)(void *)p;
+// #define GET_const_CLzmaEnc_p const CLzmaEnc *p = (const CLzmaEnc*)(const void *)p;
+
+#define COPY_ARR(dest, src, arr) memcpy((dest)->arr, (src)->arr, sizeof((src)->arr));
+
+#define COPY_LZMA_ENC_STATE(d, s, p) \
+ (d)->state = (s)->state; \
+ COPY_ARR(d, s, reps) \
+ COPY_ARR(d, s, posAlignEncoder) \
+ COPY_ARR(d, s, isRep) \
+ COPY_ARR(d, s, isRepG0) \
+ COPY_ARR(d, s, isRepG1) \
+ COPY_ARR(d, s, isRepG2) \
+ COPY_ARR(d, s, isMatch) \
+ COPY_ARR(d, s, isRep0Long) \
+ COPY_ARR(d, s, posSlotEncoder) \
+ COPY_ARR(d, s, posEncoders) \
+ (d)->lenProbs = (s)->lenProbs; \
+ (d)->repLenProbs = (s)->repLenProbs; \
+ memcpy((d)->litProbs, (s)->litProbs, ((UInt32)0x300 << (p)->lclp) * sizeof(CLzmaProb));
+
+void LzmaEnc_SaveState(CLzmaEncHandle p)
+{
+ // GET_CLzmaEnc_p
+ CSaveState *v = &p->saveState;
+ COPY_LZMA_ENC_STATE(v, p, p)
+}
+
+void LzmaEnc_RestoreState(CLzmaEncHandle p)
+{
+ // GET_CLzmaEnc_p
+ const CSaveState *v = &p->saveState;
+ COPY_LZMA_ENC_STATE(p, v, p)
+}
+
+
+Z7_NO_INLINE
+SRes LzmaEnc_SetProps(CLzmaEncHandle p, const CLzmaEncProps *props2)
+{
+ // GET_CLzmaEnc_p
+ CLzmaEncProps props = *props2;
+ LzmaEncProps_Normalize(&props);
+
+ if (props.lc > LZMA_LC_MAX
+ || props.lp > LZMA_LP_MAX
+ || props.pb > LZMA_PB_MAX)
+ return SZ_ERROR_PARAM;
+
+
+ if (props.dictSize > kLzmaMaxHistorySize)
+ props.dictSize = kLzmaMaxHistorySize;
+
+ #ifndef LZMA_LOG_BSR
+ {
+ const UInt64 dict64 = props.dictSize;
+ if (dict64 > ((UInt64)1 << kDicLogSizeMaxCompress))
+ return SZ_ERROR_PARAM;
+ }
+ #endif
+
+ p->dictSize = props.dictSize;
+ {
+ unsigned fb = (unsigned)props.fb;
+ if (fb < 5)
+ fb = 5;
+ if (fb > LZMA_MATCH_LEN_MAX)
+ fb = LZMA_MATCH_LEN_MAX;
+ p->numFastBytes = fb;
+ }
+ p->lc = (unsigned)props.lc;
+ p->lp = (unsigned)props.lp;
+ p->pb = (unsigned)props.pb;
+ p->fastMode = (props.algo == 0);
+ // p->_maxMode = True;
+ MFB.btMode = (Byte)(props.btMode ? 1 : 0);
+ // MFB.btMode = (Byte)(props.btMode);
+ {
+ unsigned numHashBytes = 4;
+ if (props.btMode)
+ {
+ if (props.numHashBytes < 2) numHashBytes = 2;
+ else if (props.numHashBytes < 4) numHashBytes = (unsigned)props.numHashBytes;
+ }
+ if (props.numHashBytes >= 5) numHashBytes = 5;
+
+ MFB.numHashBytes = numHashBytes;
+ // MFB.numHashBytes_Min = 2;
+ MFB.numHashOutBits = (Byte)props.numHashOutBits;
+ }
+
+ MFB.cutValue = props.mc;
+
+ p->writeEndMark = (BoolInt)props.writeEndMark;
+
+ #ifndef Z7_ST
+ /*
+ if (newMultiThread != _multiThread)
+ {
+ ReleaseMatchFinder();
+ _multiThread = newMultiThread;
+ }
+ */
+ p->multiThread = (props.numThreads > 1);
+ p->matchFinderMt.btSync.affinity =
+ p->matchFinderMt.hashSync.affinity = props.affinity;
+ #endif
+
+ return SZ_OK;
+}
+
+
+void LzmaEnc_SetDataSize(CLzmaEncHandle p, UInt64 expectedDataSiize)
+{
+ // GET_CLzmaEnc_p
+ MFB.expectedDataSize = expectedDataSiize;
+}
+
+
+#define kState_Start 0
+#define kState_LitAfterMatch 4
+#define kState_LitAfterRep 5
+#define kState_MatchAfterLit 7
+#define kState_RepAfterLit 8
+
+static const Byte kLiteralNextStates[kNumStates] = {0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5};
+static const Byte kMatchNextStates[kNumStates] = {7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10};
+static const Byte kRepNextStates[kNumStates] = {8, 8, 8, 8, 8, 8, 8, 11, 11, 11, 11, 11};
+static const Byte kShortRepNextStates[kNumStates]= {9, 9, 9, 9, 9, 9, 9, 11, 11, 11, 11, 11};
+
+#define IsLitState(s) ((s) < 7)
+#define GetLenToPosState2(len) (((len) < kNumLenToPosStates - 1) ? (len) : kNumLenToPosStates - 1)
+#define GetLenToPosState(len) (((len) < kNumLenToPosStates + 1) ? (len) - 2 : kNumLenToPosStates - 1)
+
+#define kInfinityPrice (1 << 30)
+
+static void RangeEnc_Construct(CRangeEnc *p)
+{
+ p->outStream = NULL;
+ p->bufBase = NULL;
+}
+
+#define RangeEnc_GetProcessed(p) ( (p)->processed + (size_t)((p)->buf - (p)->bufBase) + (p)->cacheSize)
+#define RangeEnc_GetProcessed_sizet(p) ((size_t)(p)->processed + (size_t)((p)->buf - (p)->bufBase) + (size_t)(p)->cacheSize)
+
+#define RC_BUF_SIZE (1 << 16)
+
+static int RangeEnc_Alloc(CRangeEnc *p, ISzAllocPtr alloc)
+{
+ if (!p->bufBase)
+ {
+ p->bufBase = (Byte *)ISzAlloc_Alloc(alloc, RC_BUF_SIZE);
+ if (!p->bufBase)
+ return 0;
+ p->bufLim = p->bufBase + RC_BUF_SIZE;
+ }
+ return 1;
+}
+
+static void RangeEnc_Free(CRangeEnc *p, ISzAllocPtr alloc)
+{
+ ISzAlloc_Free(alloc, p->bufBase);
+ p->bufBase = NULL;
+}
+
+static void RangeEnc_Init(CRangeEnc *p)
+{
+ p->range = 0xFFFFFFFF;
+ p->cache = 0;
+ p->low = 0;
+ p->cacheSize = 0;
+
+ p->buf = p->bufBase;
+
+ p->processed = 0;
+ p->res = SZ_OK;
+}
+
+Z7_NO_INLINE static void RangeEnc_FlushStream(CRangeEnc *p)
+{
+ const size_t num = (size_t)(p->buf - p->bufBase);
+ if (p->res == SZ_OK)
+ {
+ if (num != ISeqOutStream_Write(p->outStream, p->bufBase, num))
+ p->res = SZ_ERROR_WRITE;
+ }
+ p->processed += num;
+ p->buf = p->bufBase;
+}
+
+Z7_NO_INLINE static void Z7_FASTCALL RangeEnc_ShiftLow(CRangeEnc *p)
+{
+ UInt32 low = (UInt32)p->low;
+ unsigned high = (unsigned)(p->low >> 32);
+ p->low = (UInt32)(low << 8);
+ if (low < (UInt32)0xFF000000 || high != 0)
+ {
+ {
+ Byte *buf = p->buf;
+ *buf++ = (Byte)(p->cache + high);
+ p->cache = (unsigned)(low >> 24);
+ p->buf = buf;
+ if (buf == p->bufLim)
+ RangeEnc_FlushStream(p);
+ if (p->cacheSize == 0)
+ return;
+ }
+ high += 0xFF;
+ for (;;)
+ {
+ Byte *buf = p->buf;
+ *buf++ = (Byte)(high);
+ p->buf = buf;
+ if (buf == p->bufLim)
+ RangeEnc_FlushStream(p);
+ if (--p->cacheSize == 0)
+ return;
+ }
+ }
+ p->cacheSize++;
+}
+
+static void RangeEnc_FlushData(CRangeEnc *p)
+{
+ int i;
+ for (i = 0; i < 5; i++)
+ RangeEnc_ShiftLow(p);
+}
+
+#define RC_NORM(p) if (range < kTopValue) { range <<= 8; RangeEnc_ShiftLow(p); }
+
+#define RC_BIT_PRE(p, prob) \
+ ttt = *(prob); \
+ newBound = (range >> kNumBitModelTotalBits) * ttt;
+
+// #define Z7_LZMA_ENC_USE_BRANCH
+
+#ifdef Z7_LZMA_ENC_USE_BRANCH
+
+#define RC_BIT(p, prob, bit) { \
+ RC_BIT_PRE(p, prob) \
+ if (bit == 0) { range = newBound; ttt += (kBitModelTotal - ttt) >> kNumMoveBits; } \
+ else { (p)->low += newBound; range -= newBound; ttt -= ttt >> kNumMoveBits; } \
+ *(prob) = (CLzmaProb)ttt; \
+ RC_NORM(p) \
+ }
+
+#else
+
+#define RC_BIT(p, prob, bit) { \
+ UInt32 mask; \
+ RC_BIT_PRE(p, prob) \
+ mask = 0 - (UInt32)bit; \
+ range &= mask; \
+ mask &= newBound; \
+ range -= mask; \
+ (p)->low += mask; \
+ mask = (UInt32)bit - 1; \
+ range += newBound & mask; \
+ mask &= (kBitModelTotal - ((1 << kNumMoveBits) - 1)); \
+ mask += ((1 << kNumMoveBits) - 1); \
+ ttt += (UInt32)((Int32)(mask - ttt) >> kNumMoveBits); \
+ *(prob) = (CLzmaProb)ttt; \
+ RC_NORM(p) \
+ }
+
+#endif
+
+
+
+
+#define RC_BIT_0_BASE(p, prob) \
+ range = newBound; *(prob) = (CLzmaProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits));
+
+#define RC_BIT_1_BASE(p, prob) \
+ range -= newBound; (p)->low += newBound; *(prob) = (CLzmaProb)(ttt - (ttt >> kNumMoveBits)); \
+
+#define RC_BIT_0(p, prob) \
+ RC_BIT_0_BASE(p, prob) \
+ RC_NORM(p)
+
+#define RC_BIT_1(p, prob) \
+ RC_BIT_1_BASE(p, prob) \
+ RC_NORM(p)
+
+static void RangeEnc_EncodeBit_0(CRangeEnc *p, CLzmaProb *prob)
+{
+ UInt32 range, ttt, newBound;
+ range = p->range;
+ RC_BIT_PRE(p, prob)
+ RC_BIT_0(p, prob)
+ p->range = range;
+}
+
+static void LitEnc_Encode(CRangeEnc *p, CLzmaProb *probs, UInt32 sym)
+{
+ UInt32 range = p->range;
+ sym |= 0x100;
+ do
+ {
+ UInt32 ttt, newBound;
+ // RangeEnc_EncodeBit(p, probs + (sym >> 8), (sym >> 7) & 1);
+ CLzmaProb *prob = probs + (sym >> 8);
+ UInt32 bit = (sym >> 7) & 1;
+ sym <<= 1;
+ RC_BIT(p, prob, bit)
+ }
+ while (sym < 0x10000);
+ p->range = range;
+}
+
+static void LitEnc_EncodeMatched(CRangeEnc *p, CLzmaProb *probs, UInt32 sym, UInt32 matchByte)
+{
+ UInt32 range = p->range;
+ UInt32 offs = 0x100;
+ sym |= 0x100;
+ do
+ {
+ UInt32 ttt, newBound;
+ CLzmaProb *prob;
+ UInt32 bit;
+ matchByte <<= 1;
+ // RangeEnc_EncodeBit(p, probs + (offs + (matchByte & offs) + (sym >> 8)), (sym >> 7) & 1);
+ prob = probs + (offs + (matchByte & offs) + (sym >> 8));
+ bit = (sym >> 7) & 1;
+ sym <<= 1;
+ offs &= ~(matchByte ^ sym);
+ RC_BIT(p, prob, bit)
+ }
+ while (sym < 0x10000);
+ p->range = range;
+}
+
+
+
+static void LzmaEnc_InitPriceTables(CProbPrice *ProbPrices)
+{
+ UInt32 i;
+ for (i = 0; i < (kBitModelTotal >> kNumMoveReducingBits); i++)
+ {
+ const unsigned kCyclesBits = kNumBitPriceShiftBits;
+ UInt32 w = (i << kNumMoveReducingBits) + (1 << (kNumMoveReducingBits - 1));
+ unsigned bitCount = 0;
+ unsigned j;
+ for (j = 0; j < kCyclesBits; j++)
+ {
+ w = w * w;
+ bitCount <<= 1;
+ while (w >= ((UInt32)1 << 16))
+ {
+ w >>= 1;
+ bitCount++;
+ }
+ }
+ ProbPrices[i] = (CProbPrice)(((unsigned)kNumBitModelTotalBits << kCyclesBits) - 15 - bitCount);
+ // printf("\n%3d: %5d", i, ProbPrices[i]);
+ }
+}
+
+
+#define GET_PRICE(prob, bit) \
+ p->ProbPrices[((prob) ^ (unsigned)(((-(int)(bit))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits]
+
+#define GET_PRICEa(prob, bit) \
+ ProbPrices[((prob) ^ (unsigned)((-((int)(bit))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits]
+
+#define GET_PRICE_0(prob) p->ProbPrices[(prob) >> kNumMoveReducingBits]
+#define GET_PRICE_1(prob) p->ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits]
+
+#define GET_PRICEa_0(prob) ProbPrices[(prob) >> kNumMoveReducingBits]
+#define GET_PRICEa_1(prob) ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits]
+
+
+static UInt32 LitEnc_GetPrice(const CLzmaProb *probs, UInt32 sym, const CProbPrice *ProbPrices)
+{
+ UInt32 price = 0;
+ sym |= 0x100;
+ do
+ {
+ unsigned bit = sym & 1;
+ sym >>= 1;
+ price += GET_PRICEa(probs[sym], bit);
+ }
+ while (sym >= 2);
+ return price;
+}
+
+
+static UInt32 LitEnc_Matched_GetPrice(const CLzmaProb *probs, UInt32 sym, UInt32 matchByte, const CProbPrice *ProbPrices)
+{
+ UInt32 price = 0;
+ UInt32 offs = 0x100;
+ sym |= 0x100;
+ do
+ {
+ matchByte <<= 1;
+ price += GET_PRICEa(probs[offs + (matchByte & offs) + (sym >> 8)], (sym >> 7) & 1);
+ sym <<= 1;
+ offs &= ~(matchByte ^ sym);
+ }
+ while (sym < 0x10000);
+ return price;
+}
+
+
+static void RcTree_ReverseEncode(CRangeEnc *rc, CLzmaProb *probs, unsigned numBits, unsigned sym)
+{
+ UInt32 range = rc->range;
+ unsigned m = 1;
+ do
+ {
+ UInt32 ttt, newBound;
+ unsigned bit = sym & 1;
+ // RangeEnc_EncodeBit(rc, probs + m, bit);
+ sym >>= 1;
+ RC_BIT(rc, probs + m, bit)
+ m = (m << 1) | bit;
+ }
+ while (--numBits);
+ rc->range = range;
+}
+
+
+
+static void LenEnc_Init(CLenEnc *p)
+{
+ unsigned i;
+ for (i = 0; i < (LZMA_NUM_PB_STATES_MAX << (kLenNumLowBits + 1)); i++)
+ p->low[i] = kProbInitValue;
+ for (i = 0; i < kLenNumHighSymbols; i++)
+ p->high[i] = kProbInitValue;
+}
+
+static void LenEnc_Encode(CLenEnc *p, CRangeEnc *rc, unsigned sym, unsigned posState)
+{
+ UInt32 range, ttt, newBound;
+ CLzmaProb *probs = p->low;
+ range = rc->range;
+ RC_BIT_PRE(rc, probs)
+ if (sym >= kLenNumLowSymbols)
+ {
+ RC_BIT_1(rc, probs)
+ probs += kLenNumLowSymbols;
+ RC_BIT_PRE(rc, probs)
+ if (sym >= kLenNumLowSymbols * 2)
+ {
+ RC_BIT_1(rc, probs)
+ rc->range = range;
+ // RcTree_Encode(rc, p->high, kLenNumHighBits, sym - kLenNumLowSymbols * 2);
+ LitEnc_Encode(rc, p->high, sym - kLenNumLowSymbols * 2);
+ return;
+ }
+ sym -= kLenNumLowSymbols;
+ }
+
+ // RcTree_Encode(rc, probs + (posState << kLenNumLowBits), kLenNumLowBits, sym);
+ {
+ unsigned m;
+ unsigned bit;
+ RC_BIT_0(rc, probs)
+ probs += (posState << (1 + kLenNumLowBits));
+ bit = (sym >> 2) ; RC_BIT(rc, probs + 1, bit) m = (1 << 1) + bit;
+ bit = (sym >> 1) & 1; RC_BIT(rc, probs + m, bit) m = (m << 1) + bit;
+ bit = sym & 1; RC_BIT(rc, probs + m, bit)
+ rc->range = range;
+ }
+}
+
+static void SetPrices_3(const CLzmaProb *probs, UInt32 startPrice, UInt32 *prices, const CProbPrice *ProbPrices)
+{
+ unsigned i;
+ for (i = 0; i < 8; i += 2)
+ {
+ UInt32 price = startPrice;
+ UInt32 prob;
+ price += GET_PRICEa(probs[1 ], (i >> 2));
+ price += GET_PRICEa(probs[2 + (i >> 2)], (i >> 1) & 1);
+ prob = probs[4 + (i >> 1)];
+ prices[i ] = price + GET_PRICEa_0(prob);
+ prices[i + 1] = price + GET_PRICEa_1(prob);
+ }
+}
+
+
+Z7_NO_INLINE static void Z7_FASTCALL LenPriceEnc_UpdateTables(
+ CLenPriceEnc *p,
+ unsigned numPosStates,
+ const CLenEnc *enc,
+ const CProbPrice *ProbPrices)
+{
+ UInt32 b;
+
+ {
+ unsigned prob = enc->low[0];
+ UInt32 a, c;
+ unsigned posState;
+ b = GET_PRICEa_1(prob);
+ a = GET_PRICEa_0(prob);
+ c = b + GET_PRICEa_0(enc->low[kLenNumLowSymbols]);
+ for (posState = 0; posState < numPosStates; posState++)
+ {
+ UInt32 *prices = p->prices[posState];
+ const CLzmaProb *probs = enc->low + (posState << (1 + kLenNumLowBits));
+ SetPrices_3(probs, a, prices, ProbPrices);
+ SetPrices_3(probs + kLenNumLowSymbols, c, prices + kLenNumLowSymbols, ProbPrices);
+ }
+ }
+
+ /*
+ {
+ unsigned i;
+ UInt32 b;
+ a = GET_PRICEa_0(enc->low[0]);
+ for (i = 0; i < kLenNumLowSymbols; i++)
+ p->prices2[i] = a;
+ a = GET_PRICEa_1(enc->low[0]);
+ b = a + GET_PRICEa_0(enc->low[kLenNumLowSymbols]);
+ for (i = kLenNumLowSymbols; i < kLenNumLowSymbols * 2; i++)
+ p->prices2[i] = b;
+ a += GET_PRICEa_1(enc->low[kLenNumLowSymbols]);
+ }
+ */
+
+ // p->counter = numSymbols;
+ // p->counter = 64;
+
+ {
+ unsigned i = p->tableSize;
+
+ if (i > kLenNumLowSymbols * 2)
+ {
+ const CLzmaProb *probs = enc->high;
+ UInt32 *prices = p->prices[0] + kLenNumLowSymbols * 2;
+ i -= kLenNumLowSymbols * 2 - 1;
+ i >>= 1;
+ b += GET_PRICEa_1(enc->low[kLenNumLowSymbols]);
+ do
+ {
+ /*
+ p->prices2[i] = a +
+ // RcTree_GetPrice(enc->high, kLenNumHighBits, i - kLenNumLowSymbols * 2, ProbPrices);
+ LitEnc_GetPrice(probs, i - kLenNumLowSymbols * 2, ProbPrices);
+ */
+ // UInt32 price = a + RcTree_GetPrice(probs, kLenNumHighBits - 1, sym, ProbPrices);
+ unsigned sym = --i + (1 << (kLenNumHighBits - 1));
+ UInt32 price = b;
+ do
+ {
+ unsigned bit = sym & 1;
+ sym >>= 1;
+ price += GET_PRICEa(probs[sym], bit);
+ }
+ while (sym >= 2);
+
+ {
+ unsigned prob = probs[(size_t)i + (1 << (kLenNumHighBits - 1))];
+ prices[(size_t)i * 2 ] = price + GET_PRICEa_0(prob);
+ prices[(size_t)i * 2 + 1] = price + GET_PRICEa_1(prob);
+ }
+ }
+ while (i);
+
+ {
+ unsigned posState;
+ size_t num = (p->tableSize - kLenNumLowSymbols * 2) * sizeof(p->prices[0][0]);
+ for (posState = 1; posState < numPosStates; posState++)
+ memcpy(p->prices[posState] + kLenNumLowSymbols * 2, p->prices[0] + kLenNumLowSymbols * 2, num);
+ }
+ }
+ }
+}
+
+/*
+ #ifdef SHOW_STAT
+ g_STAT_OFFSET += num;
+ printf("\n MovePos %u", num);
+ #endif
+*/
+
+#define MOVE_POS(p, num) { \
+ p->additionalOffset += (num); \
+ p->matchFinder.Skip(p->matchFinderObj, (UInt32)(num)); }
+
+
+static unsigned ReadMatchDistances(CLzmaEnc *p, unsigned *numPairsRes)
+{
+ unsigned numPairs;
+
+ p->additionalOffset++;
+ p->numAvail = p->matchFinder.GetNumAvailableBytes(p->matchFinderObj);
+ {
+ const UInt32 *d = p->matchFinder.GetMatches(p->matchFinderObj, p->matches);
+ // if (!d) { p->mf_Failure = True; *numPairsRes = 0; return 0; }
+ numPairs = (unsigned)(d - p->matches);
+ }
+ *numPairsRes = numPairs;
+
+ #ifdef SHOW_STAT
+ printf("\n i = %u numPairs = %u ", g_STAT_OFFSET, numPairs / 2);
+ g_STAT_OFFSET++;
+ {
+ unsigned i;
+ for (i = 0; i < numPairs; i += 2)
+ printf("%2u %6u | ", p->matches[i], p->matches[i + 1]);
+ }
+ #endif
+
+ if (numPairs == 0)
+ return 0;
+ {
+ const unsigned len = p->matches[(size_t)numPairs - 2];
+ if (len != p->numFastBytes)
+ return len;
+ {
+ UInt32 numAvail = p->numAvail;
+ if (numAvail > LZMA_MATCH_LEN_MAX)
+ numAvail = LZMA_MATCH_LEN_MAX;
+ {
+ const Byte *p1 = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1;
+ const Byte *p2 = p1 + len;
+ const ptrdiff_t dif = (ptrdiff_t)-1 - (ptrdiff_t)p->matches[(size_t)numPairs - 1];
+ const Byte *lim = p1 + numAvail;
+ for (; p2 != lim && *p2 == p2[dif]; p2++)
+ {}
+ return (unsigned)(p2 - p1);
+ }
+ }
+ }
+}
+
+#define MARK_LIT ((UInt32)(Int32)-1)
+
+#define MakeAs_Lit(p) { (p)->dist = MARK_LIT; (p)->extra = 0; }
+#define MakeAs_ShortRep(p) { (p)->dist = 0; (p)->extra = 0; }
+#define IsShortRep(p) ((p)->dist == 0)
+
+
+#define GetPrice_ShortRep(p, state, posState) \
+ ( GET_PRICE_0(p->isRepG0[state]) + GET_PRICE_0(p->isRep0Long[state][posState]))
+
+#define GetPrice_Rep_0(p, state, posState) ( \
+ GET_PRICE_1(p->isMatch[state][posState]) \
+ + GET_PRICE_1(p->isRep0Long[state][posState])) \
+ + GET_PRICE_1(p->isRep[state]) \
+ + GET_PRICE_0(p->isRepG0[state])
+
+Z7_FORCE_INLINE
+static UInt32 GetPrice_PureRep(const CLzmaEnc *p, unsigned repIndex, size_t state, size_t posState)
+{
+ UInt32 price;
+ UInt32 prob = p->isRepG0[state];
+ if (repIndex == 0)
+ {
+ price = GET_PRICE_0(prob);
+ price += GET_PRICE_1(p->isRep0Long[state][posState]);
+ }
+ else
+ {
+ price = GET_PRICE_1(prob);
+ prob = p->isRepG1[state];
+ if (repIndex == 1)
+ price += GET_PRICE_0(prob);
+ else
+ {
+ price += GET_PRICE_1(prob);
+ price += GET_PRICE(p->isRepG2[state], repIndex - 2);
+ }
+ }
+ return price;
+}
+
+
+static unsigned Backward(CLzmaEnc *p, unsigned cur)
+{
+ unsigned wr = cur + 1;
+ p->optEnd = wr;
+
+ for (;;)
+ {
+ UInt32 dist = p->opt[cur].dist;
+ unsigned len = (unsigned)p->opt[cur].len;
+ unsigned extra = (unsigned)p->opt[cur].extra;
+ cur -= len;
+
+ if (extra)
+ {
+ wr--;
+ p->opt[wr].len = (UInt32)len;
+ cur -= extra;
+ len = extra;
+ if (extra == 1)
+ {
+ p->opt[wr].dist = dist;
+ dist = MARK_LIT;
+ }
+ else
+ {
+ p->opt[wr].dist = 0;
+ len--;
+ wr--;
+ p->opt[wr].dist = MARK_LIT;
+ p->opt[wr].len = 1;
+ }
+ }
+
+ if (cur == 0)
+ {
+ p->backRes = dist;
+ p->optCur = wr;
+ return len;
+ }
+
+ wr--;
+ p->opt[wr].dist = dist;
+ p->opt[wr].len = (UInt32)len;
+ }
+}
+
+
+
+#define LIT_PROBS(pos, prevByte) \
+ (p->litProbs + (UInt32)3 * (((((pos) << 8) + (prevByte)) & p->lpMask) << p->lc))
+
+
+static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
+{
+ unsigned last, cur;
+ UInt32 reps[LZMA_NUM_REPS];
+ unsigned repLens[LZMA_NUM_REPS];
+ UInt32 *matches;
+
+ {
+ UInt32 numAvail;
+ unsigned numPairs, mainLen, repMaxIndex, i, posState;
+ UInt32 matchPrice, repMatchPrice;
+ const Byte *data;
+ Byte curByte, matchByte;
+
+ p->optCur = p->optEnd = 0;
+
+ if (p->additionalOffset == 0)
+ mainLen = ReadMatchDistances(p, &numPairs);
+ else
+ {
+ mainLen = p->longestMatchLen;
+ numPairs = p->numPairs;
+ }
+
+ numAvail = p->numAvail;
+ if (numAvail < 2)
+ {
+ p->backRes = MARK_LIT;
+ return 1;
+ }
+ if (numAvail > LZMA_MATCH_LEN_MAX)
+ numAvail = LZMA_MATCH_LEN_MAX;
+
+ data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1;
+ repMaxIndex = 0;
+
+ for (i = 0; i < LZMA_NUM_REPS; i++)
+ {
+ unsigned len;
+ const Byte *data2;
+ reps[i] = p->reps[i];
+ data2 = data - reps[i];
+ if (data[0] != data2[0] || data[1] != data2[1])
+ {
+ repLens[i] = 0;
+ continue;
+ }
+ for (len = 2; len < numAvail && data[len] == data2[len]; len++)
+ {}
+ repLens[i] = len;
+ if (len > repLens[repMaxIndex])
+ repMaxIndex = i;
+ if (len == LZMA_MATCH_LEN_MAX) // 21.03 : optimization
+ break;
+ }
+
+ if (repLens[repMaxIndex] >= p->numFastBytes)
+ {
+ unsigned len;
+ p->backRes = (UInt32)repMaxIndex;
+ len = repLens[repMaxIndex];
+ MOVE_POS(p, len - 1)
+ return len;
+ }
+
+ matches = p->matches;
+ #define MATCHES matches
+ // #define MATCHES p->matches
+
+ if (mainLen >= p->numFastBytes)
+ {
+ p->backRes = MATCHES[(size_t)numPairs - 1] + LZMA_NUM_REPS;
+ MOVE_POS(p, mainLen - 1)
+ return mainLen;
+ }
+
+ curByte = *data;
+ matchByte = *(data - reps[0]);
+
+ last = repLens[repMaxIndex];
+ if (last <= mainLen)
+ last = mainLen;
+
+ if (last < 2 && curByte != matchByte)
+ {
+ p->backRes = MARK_LIT;
+ return 1;
+ }
+
+ p->opt[0].state = (CState)p->state;
+
+ posState = (position & p->pbMask);
+
+ {
+ const CLzmaProb *probs = LIT_PROBS(position, *(data - 1));
+ p->opt[1].price = GET_PRICE_0(p->isMatch[p->state][posState]) +
+ (!IsLitState(p->state) ?
+ LitEnc_Matched_GetPrice(probs, curByte, matchByte, p->ProbPrices) :
+ LitEnc_GetPrice(probs, curByte, p->ProbPrices));
+ }
+
+ MakeAs_Lit(&p->opt[1])
+
+ matchPrice = GET_PRICE_1(p->isMatch[p->state][posState]);
+ repMatchPrice = matchPrice + GET_PRICE_1(p->isRep[p->state]);
+
+ // 18.06
+ if (matchByte == curByte && repLens[0] == 0)
+ {
+ UInt32 shortRepPrice = repMatchPrice + GetPrice_ShortRep(p, p->state, posState);
+ if (shortRepPrice < p->opt[1].price)
+ {
+ p->opt[1].price = shortRepPrice;
+ MakeAs_ShortRep(&p->opt[1])
+ }
+ if (last < 2)
+ {
+ p->backRes = p->opt[1].dist;
+ return 1;
+ }
+ }
+
+ p->opt[1].len = 1;
+
+ p->opt[0].reps[0] = reps[0];
+ p->opt[0].reps[1] = reps[1];
+ p->opt[0].reps[2] = reps[2];
+ p->opt[0].reps[3] = reps[3];
+
+ // ---------- REP ----------
+
+ for (i = 0; i < LZMA_NUM_REPS; i++)
+ {
+ unsigned repLen = repLens[i];
+ UInt32 price;
+ if (repLen < 2)
+ continue;
+ price = repMatchPrice + GetPrice_PureRep(p, i, p->state, posState);
+ do
+ {
+ UInt32 price2 = price + GET_PRICE_LEN(&p->repLenEnc, posState, repLen);
+ COptimal *opt = &p->opt[repLen];
+ if (price2 < opt->price)
+ {
+ opt->price = price2;
+ opt->len = (UInt32)repLen;
+ opt->dist = (UInt32)i;
+ opt->extra = 0;
+ }
+ }
+ while (--repLen >= 2);
+ }
+
+
+ // ---------- MATCH ----------
+ {
+ unsigned len = repLens[0] + 1;
+ if (len <= mainLen)
+ {
+ unsigned offs = 0;
+ UInt32 normalMatchPrice = matchPrice + GET_PRICE_0(p->isRep[p->state]);
+
+ if (len < 2)
+ len = 2;
+ else
+ while (len > MATCHES[offs])
+ offs += 2;
+
+ for (; ; len++)
+ {
+ COptimal *opt;
+ UInt32 dist = MATCHES[(size_t)offs + 1];
+ UInt32 price = normalMatchPrice + GET_PRICE_LEN(&p->lenEnc, posState, len);
+ unsigned lenToPosState = GetLenToPosState(len);
+
+ if (dist < kNumFullDistances)
+ price += p->distancesPrices[lenToPosState][dist & (kNumFullDistances - 1)];
+ else
+ {
+ unsigned slot;
+ GetPosSlot2(dist, slot)
+ price += p->alignPrices[dist & kAlignMask];
+ price += p->posSlotPrices[lenToPosState][slot];
+ }
+
+ opt = &p->opt[len];
+
+ if (price < opt->price)
+ {
+ opt->price = price;
+ opt->len = (UInt32)len;
+ opt->dist = dist + LZMA_NUM_REPS;
+ opt->extra = 0;
+ }
+
+ if (len == MATCHES[offs])
+ {
+ offs += 2;
+ if (offs == numPairs)
+ break;
+ }
+ }
+ }
+ }
+
+
+ cur = 0;
+
+ #ifdef SHOW_STAT2
+ /* if (position >= 0) */
+ {
+ unsigned i;
+ printf("\n pos = %4X", position);
+ for (i = cur; i <= last; i++)
+ printf("\nprice[%4X] = %u", position - cur + i, p->opt[i].price);
+ }
+ #endif
+ }
+
+
+
+ // ---------- Optimal Parsing ----------
+
+ for (;;)
+ {
+ unsigned numAvail;
+ UInt32 numAvailFull;
+ unsigned newLen, numPairs, prev, state, posState, startLen;
+ UInt32 litPrice, matchPrice, repMatchPrice;
+ BoolInt nextIsLit;
+ Byte curByte, matchByte;
+ const Byte *data;
+ COptimal *curOpt, *nextOpt;
+
+ if (++cur == last)
+ break;
+
+ // 18.06
+ if (cur >= kNumOpts - 64)
+ {
+ unsigned j, best;
+ UInt32 price = p->opt[cur].price;
+ best = cur;
+ for (j = cur + 1; j <= last; j++)
+ {
+ UInt32 price2 = p->opt[j].price;
+ if (price >= price2)
+ {
+ price = price2;
+ best = j;
+ }
+ }
+ {
+ unsigned delta = best - cur;
+ if (delta != 0)
+ {
+ MOVE_POS(p, delta)
+ }
+ }
+ cur = best;
+ break;
+ }
+
+ newLen = ReadMatchDistances(p, &numPairs);
+
+ if (newLen >= p->numFastBytes)
+ {
+ p->numPairs = numPairs;
+ p->longestMatchLen = newLen;
+ break;
+ }
+
+ curOpt = &p->opt[cur];
+
+ position++;
+
+ // we need that check here, if skip_items in p->opt are possible
+ /*
+ if (curOpt->price >= kInfinityPrice)
+ continue;
+ */
+
+ prev = cur - curOpt->len;
+
+ if (curOpt->len == 1)
+ {
+ state = (unsigned)p->opt[prev].state;
+ if (IsShortRep(curOpt))
+ state = kShortRepNextStates[state];
+ else
+ state = kLiteralNextStates[state];
+ }
+ else
+ {
+ const COptimal *prevOpt;
+ UInt32 b0;
+ UInt32 dist = curOpt->dist;
+
+ if (curOpt->extra)
+ {
+ prev -= (unsigned)curOpt->extra;
+ state = kState_RepAfterLit;
+ if (curOpt->extra == 1)
+ state = (dist < LZMA_NUM_REPS ? kState_RepAfterLit : kState_MatchAfterLit);
+ }
+ else
+ {
+ state = (unsigned)p->opt[prev].state;
+ if (dist < LZMA_NUM_REPS)
+ state = kRepNextStates[state];
+ else
+ state = kMatchNextStates[state];
+ }
+
+ prevOpt = &p->opt[prev];
+ b0 = prevOpt->reps[0];
+
+ if (dist < LZMA_NUM_REPS)
+ {
+ if (dist == 0)
+ {
+ reps[0] = b0;
+ reps[1] = prevOpt->reps[1];
+ reps[2] = prevOpt->reps[2];
+ reps[3] = prevOpt->reps[3];
+ }
+ else
+ {
+ reps[1] = b0;
+ b0 = prevOpt->reps[1];
+ if (dist == 1)
+ {
+ reps[0] = b0;
+ reps[2] = prevOpt->reps[2];
+ reps[3] = prevOpt->reps[3];
+ }
+ else
+ {
+ reps[2] = b0;
+ reps[0] = prevOpt->reps[dist];
+ reps[3] = prevOpt->reps[dist ^ 1];
+ }
+ }
+ }
+ else
+ {
+ reps[0] = (dist - LZMA_NUM_REPS + 1);
+ reps[1] = b0;
+ reps[2] = prevOpt->reps[1];
+ reps[3] = prevOpt->reps[2];
+ }
+ }
+
+ curOpt->state = (CState)state;
+ curOpt->reps[0] = reps[0];
+ curOpt->reps[1] = reps[1];
+ curOpt->reps[2] = reps[2];
+ curOpt->reps[3] = reps[3];
+
+ data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1;
+ curByte = *data;
+ matchByte = *(data - reps[0]);
+
+ posState = (position & p->pbMask);
+
+ /*
+ The order of Price checks:
+ < LIT
+ <= SHORT_REP
+ < LIT : REP_0
+ < REP [ : LIT : REP_0 ]
+ < MATCH [ : LIT : REP_0 ]
+ */
+
+ {
+ UInt32 curPrice = curOpt->price;
+ unsigned prob = p->isMatch[state][posState];
+ matchPrice = curPrice + GET_PRICE_1(prob);
+ litPrice = curPrice + GET_PRICE_0(prob);
+ }
+
+ nextOpt = &p->opt[(size_t)cur + 1];
+ nextIsLit = False;
+
+ // here we can allow skip_items in p->opt, if we don't check (nextOpt->price < kInfinityPrice)
+ // 18.new.06
+ if ((nextOpt->price < kInfinityPrice
+ // && !IsLitState(state)
+ && matchByte == curByte)
+ || litPrice > nextOpt->price
+ )
+ litPrice = 0;
+ else
+ {
+ const CLzmaProb *probs = LIT_PROBS(position, *(data - 1));
+ litPrice += (!IsLitState(state) ?
+ LitEnc_Matched_GetPrice(probs, curByte, matchByte, p->ProbPrices) :
+ LitEnc_GetPrice(probs, curByte, p->ProbPrices));
+
+ if (litPrice < nextOpt->price)
+ {
+ nextOpt->price = litPrice;
+ nextOpt->len = 1;
+ MakeAs_Lit(nextOpt)
+ nextIsLit = True;
+ }
+ }
+
+ repMatchPrice = matchPrice + GET_PRICE_1(p->isRep[state]);
+
+ numAvailFull = p->numAvail;
+ {
+ unsigned temp = kNumOpts - 1 - cur;
+ if (numAvailFull > temp)
+ numAvailFull = (UInt32)temp;
+ }
+
+ // 18.06
+ // ---------- SHORT_REP ----------
+ if (IsLitState(state)) // 18.new
+ if (matchByte == curByte)
+ if (repMatchPrice < nextOpt->price) // 18.new
+ // if (numAvailFull < 2 || data[1] != *(data - reps[0] + 1))
+ if (
+ // nextOpt->price >= kInfinityPrice ||
+ nextOpt->len < 2 // we can check nextOpt->len, if skip items are not allowed in p->opt
+ || (nextOpt->dist != 0
+ // && nextOpt->extra <= 1 // 17.old
+ )
+ )
+ {
+ UInt32 shortRepPrice = repMatchPrice + GetPrice_ShortRep(p, state, posState);
+ // if (shortRepPrice <= nextOpt->price) // 17.old
+ if (shortRepPrice < nextOpt->price) // 18.new
+ {
+ nextOpt->price = shortRepPrice;
+ nextOpt->len = 1;
+ MakeAs_ShortRep(nextOpt)
+ nextIsLit = False;
+ }
+ }
+
+ if (numAvailFull < 2)
+ continue;
+ numAvail = (numAvailFull <= p->numFastBytes ? numAvailFull : p->numFastBytes);
+
+ // numAvail <= p->numFastBytes
+
+ // ---------- LIT : REP_0 ----------
+
+ if (!nextIsLit
+ && litPrice != 0 // 18.new
+ && matchByte != curByte
+ && numAvailFull > 2)
+ {
+ const Byte *data2 = data - reps[0];
+ if (data[1] == data2[1] && data[2] == data2[2])
+ {
+ unsigned len;
+ unsigned limit = p->numFastBytes + 1;
+ if (limit > numAvailFull)
+ limit = numAvailFull;
+ for (len = 3; len < limit && data[len] == data2[len]; len++)
+ {}
+
+ {
+ unsigned state2 = kLiteralNextStates[state];
+ unsigned posState2 = (position + 1) & p->pbMask;
+ UInt32 price = litPrice + GetPrice_Rep_0(p, state2, posState2);
+ {
+ unsigned offset = cur + len;
+
+ if (last < offset)
+ last = offset;
+
+ // do
+ {
+ UInt32 price2;
+ COptimal *opt;
+ len--;
+ // price2 = price + GetPrice_Len_Rep_0(p, len, state2, posState2);
+ price2 = price + GET_PRICE_LEN(&p->repLenEnc, posState2, len);
+
+ opt = &p->opt[offset];
+ // offset--;
+ if (price2 < opt->price)
+ {
+ opt->price = price2;
+ opt->len = (UInt32)len;
+ opt->dist = 0;
+ opt->extra = 1;
+ }
+ }
+ // while (len >= 3);
+ }
+ }
+ }
+ }
+
+ startLen = 2; /* speed optimization */
+
+ {
+ // ---------- REP ----------
+ unsigned repIndex = 0; // 17.old
+ // unsigned repIndex = IsLitState(state) ? 0 : 1; // 18.notused
+ for (; repIndex < LZMA_NUM_REPS; repIndex++)
+ {
+ unsigned len;
+ UInt32 price;
+ const Byte *data2 = data - reps[repIndex];
+ if (data[0] != data2[0] || data[1] != data2[1])
+ continue;
+
+ for (len = 2; len < numAvail && data[len] == data2[len]; len++)
+ {}
+
+ // if (len < startLen) continue; // 18.new: speed optimization
+
+ {
+ unsigned offset = cur + len;
+ if (last < offset)
+ last = offset;
+ }
+ {
+ unsigned len2 = len;
+ price = repMatchPrice + GetPrice_PureRep(p, repIndex, state, posState);
+ do
+ {
+ UInt32 price2 = price + GET_PRICE_LEN(&p->repLenEnc, posState, len2);
+ COptimal *opt = &p->opt[cur + len2];
+ if (price2 < opt->price)
+ {
+ opt->price = price2;
+ opt->len = (UInt32)len2;
+ opt->dist = (UInt32)repIndex;
+ opt->extra = 0;
+ }
+ }
+ while (--len2 >= 2);
+ }
+
+ if (repIndex == 0) startLen = len + 1; // 17.old
+ // startLen = len + 1; // 18.new
+
+ /* if (_maxMode) */
+ {
+ // ---------- REP : LIT : REP_0 ----------
+ // numFastBytes + 1 + numFastBytes
+
+ unsigned len2 = len + 1;
+ unsigned limit = len2 + p->numFastBytes;
+ if (limit > numAvailFull)
+ limit = numAvailFull;
+
+ len2 += 2;
+ if (len2 <= limit)
+ if (data[len2 - 2] == data2[len2 - 2])
+ if (data[len2 - 1] == data2[len2 - 1])
+ {
+ unsigned state2 = kRepNextStates[state];
+ unsigned posState2 = (position + len) & p->pbMask;
+ price += GET_PRICE_LEN(&p->repLenEnc, posState, len)
+ + GET_PRICE_0(p->isMatch[state2][posState2])
+ + LitEnc_Matched_GetPrice(LIT_PROBS(position + len, data[(size_t)len - 1]),
+ data[len], data2[len], p->ProbPrices);
+
+ // state2 = kLiteralNextStates[state2];
+ state2 = kState_LitAfterRep;
+ posState2 = (posState2 + 1) & p->pbMask;
+
+
+ price += GetPrice_Rep_0(p, state2, posState2);
+
+ for (; len2 < limit && data[len2] == data2[len2]; len2++)
+ {}
+
+ len2 -= len;
+ // if (len2 >= 3)
+ {
+ {
+ unsigned offset = cur + len + len2;
+
+ if (last < offset)
+ last = offset;
+ // do
+ {
+ UInt32 price2;
+ COptimal *opt;
+ len2--;
+ // price2 = price + GetPrice_Len_Rep_0(p, len2, state2, posState2);
+ price2 = price + GET_PRICE_LEN(&p->repLenEnc, posState2, len2);
+
+ opt = &p->opt[offset];
+ // offset--;
+ if (price2 < opt->price)
+ {
+ opt->price = price2;
+ opt->len = (UInt32)len2;
+ opt->extra = (CExtra)(len + 1);
+ opt->dist = (UInt32)repIndex;
+ }
+ }
+ // while (len2 >= 3);
+ }
+ }
+ }
+ }
+ }
+ }
+
+
+ // ---------- MATCH ----------
+ /* for (unsigned len = 2; len <= newLen; len++) */
+ if (newLen > numAvail)
+ {
+ newLen = numAvail;
+ for (numPairs = 0; newLen > MATCHES[numPairs]; numPairs += 2);
+ MATCHES[numPairs] = (UInt32)newLen;
+ numPairs += 2;
+ }
+
+ // startLen = 2; /* speed optimization */
+
+ if (newLen >= startLen)
+ {
+ UInt32 normalMatchPrice = matchPrice + GET_PRICE_0(p->isRep[state]);
+ UInt32 dist;
+ unsigned offs, posSlot, len;
+
+ {
+ unsigned offset = cur + newLen;
+ if (last < offset)
+ last = offset;
+ }
+
+ offs = 0;
+ while (startLen > MATCHES[offs])
+ offs += 2;
+ dist = MATCHES[(size_t)offs + 1];
+
+ // if (dist >= kNumFullDistances)
+ GetPosSlot2(dist, posSlot)
+
+ for (len = /*2*/ startLen; ; len++)
+ {
+ UInt32 price = normalMatchPrice + GET_PRICE_LEN(&p->lenEnc, posState, len);
+ {
+ COptimal *opt;
+ unsigned lenNorm = len - 2;
+ lenNorm = GetLenToPosState2(lenNorm);
+ if (dist < kNumFullDistances)
+ price += p->distancesPrices[lenNorm][dist & (kNumFullDistances - 1)];
+ else
+ price += p->posSlotPrices[lenNorm][posSlot] + p->alignPrices[dist & kAlignMask];
+
+ opt = &p->opt[cur + len];
+ if (price < opt->price)
+ {
+ opt->price = price;
+ opt->len = (UInt32)len;
+ opt->dist = dist + LZMA_NUM_REPS;
+ opt->extra = 0;
+ }
+ }
+
+ if (len == MATCHES[offs])
+ {
+ // if (p->_maxMode) {
+ // MATCH : LIT : REP_0
+
+ const Byte *data2 = data - dist - 1;
+ unsigned len2 = len + 1;
+ unsigned limit = len2 + p->numFastBytes;
+ if (limit > numAvailFull)
+ limit = numAvailFull;
+
+ len2 += 2;
+ if (len2 <= limit)
+ if (data[len2 - 2] == data2[len2 - 2])
+ if (data[len2 - 1] == data2[len2 - 1])
+ {
+ for (; len2 < limit && data[len2] == data2[len2]; len2++)
+ {}
+
+ len2 -= len;
+
+ // if (len2 >= 3)
+ {
+ unsigned state2 = kMatchNextStates[state];
+ unsigned posState2 = (position + len) & p->pbMask;
+ unsigned offset;
+ price += GET_PRICE_0(p->isMatch[state2][posState2]);
+ price += LitEnc_Matched_GetPrice(LIT_PROBS(position + len, data[(size_t)len - 1]),
+ data[len], data2[len], p->ProbPrices);
+
+ // state2 = kLiteralNextStates[state2];
+ state2 = kState_LitAfterMatch;
+
+ posState2 = (posState2 + 1) & p->pbMask;
+ price += GetPrice_Rep_0(p, state2, posState2);
+
+ offset = cur + len + len2;
+
+ if (last < offset)
+ last = offset;
+ // do
+ {
+ UInt32 price2;
+ COptimal *opt;
+ len2--;
+ // price2 = price + GetPrice_Len_Rep_0(p, len2, state2, posState2);
+ price2 = price + GET_PRICE_LEN(&p->repLenEnc, posState2, len2);
+ opt = &p->opt[offset];
+ // offset--;
+ if (price2 < opt->price)
+ {
+ opt->price = price2;
+ opt->len = (UInt32)len2;
+ opt->extra = (CExtra)(len + 1);
+ opt->dist = dist + LZMA_NUM_REPS;
+ }
+ }
+ // while (len2 >= 3);
+ }
+
+ }
+
+ offs += 2;
+ if (offs == numPairs)
+ break;
+ dist = MATCHES[(size_t)offs + 1];
+ // if (dist >= kNumFullDistances)
+ GetPosSlot2(dist, posSlot)
+ }
+ }
+ }
+ }
+
+ do
+ p->opt[last].price = kInfinityPrice;
+ while (--last);
+
+ return Backward(p, cur);
+}
+
+
+
+#define ChangePair(smallDist, bigDist) (((bigDist) >> 7) > (smallDist))
+
+
+
+static unsigned GetOptimumFast(CLzmaEnc *p)
+{
+ UInt32 numAvail, mainDist;
+ unsigned mainLen, numPairs, repIndex, repLen, i;
+ const Byte *data;
+
+ if (p->additionalOffset == 0)
+ mainLen = ReadMatchDistances(p, &numPairs);
+ else
+ {
+ mainLen = p->longestMatchLen;
+ numPairs = p->numPairs;
+ }
+
+ numAvail = p->numAvail;
+ p->backRes = MARK_LIT;
+ if (numAvail < 2)
+ return 1;
+ // if (mainLen < 2 && p->state == 0) return 1; // 18.06.notused
+ if (numAvail > LZMA_MATCH_LEN_MAX)
+ numAvail = LZMA_MATCH_LEN_MAX;
+ data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1;
+ repLen = repIndex = 0;
+
+ for (i = 0; i < LZMA_NUM_REPS; i++)
+ {
+ unsigned len;
+ const Byte *data2 = data - p->reps[i];
+ if (data[0] != data2[0] || data[1] != data2[1])
+ continue;
+ for (len = 2; len < numAvail && data[len] == data2[len]; len++)
+ {}
+ if (len >= p->numFastBytes)
+ {
+ p->backRes = (UInt32)i;
+ MOVE_POS(p, len - 1)
+ return len;
+ }
+ if (len > repLen)
+ {
+ repIndex = i;
+ repLen = len;
+ }
+ }
+
+ if (mainLen >= p->numFastBytes)
+ {
+ p->backRes = p->matches[(size_t)numPairs - 1] + LZMA_NUM_REPS;
+ MOVE_POS(p, mainLen - 1)
+ return mainLen;
+ }
+
+ mainDist = 0; /* for GCC */
+
+ if (mainLen >= 2)
+ {
+ mainDist = p->matches[(size_t)numPairs - 1];
+ while (numPairs > 2)
+ {
+ UInt32 dist2;
+ if (mainLen != p->matches[(size_t)numPairs - 4] + 1)
+ break;
+ dist2 = p->matches[(size_t)numPairs - 3];
+ if (!ChangePair(dist2, mainDist))
+ break;
+ numPairs -= 2;
+ mainLen--;
+ mainDist = dist2;
+ }
+ if (mainLen == 2 && mainDist >= 0x80)
+ mainLen = 1;
+ }
+
+ if (repLen >= 2)
+ if ( repLen + 1 >= mainLen
+ || (repLen + 2 >= mainLen && mainDist >= (1 << 9))
+ || (repLen + 3 >= mainLen && mainDist >= (1 << 15)))
+ {
+ p->backRes = (UInt32)repIndex;
+ MOVE_POS(p, repLen - 1)
+ return repLen;
+ }
+
+ if (mainLen < 2 || numAvail <= 2)
+ return 1;
+
+ {
+ unsigned len1 = ReadMatchDistances(p, &p->numPairs);
+ p->longestMatchLen = len1;
+
+ if (len1 >= 2)
+ {
+ UInt32 newDist = p->matches[(size_t)p->numPairs - 1];
+ if ( (len1 >= mainLen && newDist < mainDist)
+ || (len1 == mainLen + 1 && !ChangePair(mainDist, newDist))
+ || (len1 > mainLen + 1)
+ || (len1 + 1 >= mainLen && mainLen >= 3 && ChangePair(newDist, mainDist)))
+ return 1;
+ }
+ }
+
+ data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1;
+
+ for (i = 0; i < LZMA_NUM_REPS; i++)
+ {
+ unsigned len, limit;
+ const Byte *data2 = data - p->reps[i];
+ if (data[0] != data2[0] || data[1] != data2[1])
+ continue;
+ limit = mainLen - 1;
+ for (len = 2;; len++)
+ {
+ if (len >= limit)
+ return 1;
+ if (data[len] != data2[len])
+ break;
+ }
+ }
+
+ p->backRes = mainDist + LZMA_NUM_REPS;
+ if (mainLen != 2)
+ {
+ MOVE_POS(p, mainLen - 2)
+ }
+ return mainLen;
+}
+
+
+
+
+static void WriteEndMarker(CLzmaEnc *p, unsigned posState)
+{
+ UInt32 range;
+ range = p->rc.range;
+ {
+ UInt32 ttt, newBound;
+ CLzmaProb *prob = &p->isMatch[p->state][posState];
+ RC_BIT_PRE(&p->rc, prob)
+ RC_BIT_1(&p->rc, prob)
+ prob = &p->isRep[p->state];
+ RC_BIT_PRE(&p->rc, prob)
+ RC_BIT_0(&p->rc, prob)
+ }
+ p->state = kMatchNextStates[p->state];
+
+ p->rc.range = range;
+ LenEnc_Encode(&p->lenProbs, &p->rc, 0, posState);
+ range = p->rc.range;
+
+ {
+ // RcTree_Encode_PosSlot(&p->rc, p->posSlotEncoder[0], (1 << kNumPosSlotBits) - 1);
+ CLzmaProb *probs = p->posSlotEncoder[0];
+ unsigned m = 1;
+ do
+ {
+ UInt32 ttt, newBound;
+ RC_BIT_PRE(p, probs + m)
+ RC_BIT_1(&p->rc, probs + m)
+ m = (m << 1) + 1;
+ }
+ while (m < (1 << kNumPosSlotBits));
+ }
+ {
+ // RangeEnc_EncodeDirectBits(&p->rc, ((UInt32)1 << (30 - kNumAlignBits)) - 1, 30 - kNumAlignBits); UInt32 range = p->range;
+ unsigned numBits = 30 - kNumAlignBits;
+ do
+ {
+ range >>= 1;
+ p->rc.low += range;
+ RC_NORM(&p->rc)
+ }
+ while (--numBits);
+ }
+
+ {
+ // RcTree_ReverseEncode(&p->rc, p->posAlignEncoder, kNumAlignBits, kAlignMask);
+ CLzmaProb *probs = p->posAlignEncoder;
+ unsigned m = 1;
+ do
+ {
+ UInt32 ttt, newBound;
+ RC_BIT_PRE(p, probs + m)
+ RC_BIT_1(&p->rc, probs + m)
+ m = (m << 1) + 1;
+ }
+ while (m < kAlignTableSize);
+ }
+ p->rc.range = range;
+}
+
+
+static SRes CheckErrors(CLzmaEnc *p)
+{
+ if (p->result != SZ_OK)
+ return p->result;
+ if (p->rc.res != SZ_OK)
+ p->result = SZ_ERROR_WRITE;
+
+ #ifndef Z7_ST
+ if (
+ // p->mf_Failure ||
+ (p->mtMode &&
+ ( // p->matchFinderMt.failure_LZ_LZ ||
+ p->matchFinderMt.failure_LZ_BT))
+ )
+ {
+ p->result = MY_HRES_ERROR_INTERNAL_ERROR;
+ // printf("\nCheckErrors p->matchFinderMt.failureLZ\n");
+ }
+ #endif
+
+ if (MFB.result != SZ_OK)
+ p->result = SZ_ERROR_READ;
+
+ if (p->result != SZ_OK)
+ p->finished = True;
+ return p->result;
+}
+
+
+Z7_NO_INLINE static SRes Flush(CLzmaEnc *p, UInt32 nowPos)
+{
+ /* ReleaseMFStream(); */
+ p->finished = True;
+ if (p->writeEndMark)
+ WriteEndMarker(p, nowPos & p->pbMask);
+ RangeEnc_FlushData(&p->rc);
+ RangeEnc_FlushStream(&p->rc);
+ return CheckErrors(p);
+}
+
+
+Z7_NO_INLINE static void FillAlignPrices(CLzmaEnc *p)
+{
+ unsigned i;
+ const CProbPrice *ProbPrices = p->ProbPrices;
+ const CLzmaProb *probs = p->posAlignEncoder;
+ // p->alignPriceCount = 0;
+ for (i = 0; i < kAlignTableSize / 2; i++)
+ {
+ UInt32 price = 0;
+ unsigned sym = i;
+ unsigned m = 1;
+ unsigned bit;
+ UInt32 prob;
+ bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[m], bit); m = (m << 1) + bit;
+ bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[m], bit); m = (m << 1) + bit;
+ bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[m], bit); m = (m << 1) + bit;
+ prob = probs[m];
+ p->alignPrices[i ] = price + GET_PRICEa_0(prob);
+ p->alignPrices[i + 8] = price + GET_PRICEa_1(prob);
+ // p->alignPrices[i] = RcTree_ReverseGetPrice(p->posAlignEncoder, kNumAlignBits, i, p->ProbPrices);
+ }
+}
+
+
+Z7_NO_INLINE static void FillDistancesPrices(CLzmaEnc *p)
+{
+ // int y; for (y = 0; y < 100; y++) {
+
+ UInt32 tempPrices[kNumFullDistances];
+ unsigned i, lps;
+
+ const CProbPrice *ProbPrices = p->ProbPrices;
+ p->matchPriceCount = 0;
+
+ for (i = kStartPosModelIndex / 2; i < kNumFullDistances / 2; i++)
+ {
+ unsigned posSlot = GetPosSlot1(i);
+ unsigned footerBits = (posSlot >> 1) - 1;
+ unsigned base = ((2 | (posSlot & 1)) << footerBits);
+ const CLzmaProb *probs = p->posEncoders + (size_t)base * 2;
+ // tempPrices[i] = RcTree_ReverseGetPrice(p->posEncoders + base, footerBits, i - base, p->ProbPrices);
+ UInt32 price = 0;
+ unsigned m = 1;
+ unsigned sym = i;
+ unsigned offset = (unsigned)1 << footerBits;
+ base += i;
+
+ if (footerBits)
+ do
+ {
+ unsigned bit = sym & 1;
+ sym >>= 1;
+ price += GET_PRICEa(probs[m], bit);
+ m = (m << 1) + bit;
+ }
+ while (--footerBits);
+
+ {
+ unsigned prob = probs[m];
+ tempPrices[base ] = price + GET_PRICEa_0(prob);
+ tempPrices[base + offset] = price + GET_PRICEa_1(prob);
+ }
+ }
+
+ for (lps = 0; lps < kNumLenToPosStates; lps++)
+ {
+ unsigned slot;
+ unsigned distTableSize2 = (p->distTableSize + 1) >> 1;
+ UInt32 *posSlotPrices = p->posSlotPrices[lps];
+ const CLzmaProb *probs = p->posSlotEncoder[lps];
+
+ for (slot = 0; slot < distTableSize2; slot++)
+ {
+ // posSlotPrices[slot] = RcTree_GetPrice(encoder, kNumPosSlotBits, slot, p->ProbPrices);
+ UInt32 price;
+ unsigned bit;
+ unsigned sym = slot + (1 << (kNumPosSlotBits - 1));
+ unsigned prob;
+ bit = sym & 1; sym >>= 1; price = GET_PRICEa(probs[sym], bit);
+ bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[sym], bit);
+ bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[sym], bit);
+ bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[sym], bit);
+ bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[sym], bit);
+ prob = probs[(size_t)slot + (1 << (kNumPosSlotBits - 1))];
+ posSlotPrices[(size_t)slot * 2 ] = price + GET_PRICEa_0(prob);
+ posSlotPrices[(size_t)slot * 2 + 1] = price + GET_PRICEa_1(prob);
+ }
+
+ {
+ UInt32 delta = ((UInt32)((kEndPosModelIndex / 2 - 1) - kNumAlignBits) << kNumBitPriceShiftBits);
+ for (slot = kEndPosModelIndex / 2; slot < distTableSize2; slot++)
+ {
+ posSlotPrices[(size_t)slot * 2 ] += delta;
+ posSlotPrices[(size_t)slot * 2 + 1] += delta;
+ delta += ((UInt32)1 << kNumBitPriceShiftBits);
+ }
+ }
+
+ {
+ UInt32 *dp = p->distancesPrices[lps];
+
+ dp[0] = posSlotPrices[0];
+ dp[1] = posSlotPrices[1];
+ dp[2] = posSlotPrices[2];
+ dp[3] = posSlotPrices[3];
+
+ for (i = 4; i < kNumFullDistances; i += 2)
+ {
+ UInt32 slotPrice = posSlotPrices[GetPosSlot1(i)];
+ dp[i ] = slotPrice + tempPrices[i];
+ dp[i + 1] = slotPrice + tempPrices[i + 1];
+ }
+ }
+ }
+ // }
+}
+
+
+
+static void LzmaEnc_Construct(CLzmaEnc *p)
+{
+ RangeEnc_Construct(&p->rc);
+ MatchFinder_Construct(&MFB);
+
+ #ifndef Z7_ST
+ p->matchFinderMt.MatchFinder = &MFB;
+ MatchFinderMt_Construct(&p->matchFinderMt);
+ #endif
+
+ {
+ CLzmaEncProps props;
+ LzmaEncProps_Init(&props);
+ LzmaEnc_SetProps((CLzmaEncHandle)(void *)p, &props);
+ }
+
+ #ifndef LZMA_LOG_BSR
+ LzmaEnc_FastPosInit(p->g_FastPos);
+ #endif
+
+ LzmaEnc_InitPriceTables(p->ProbPrices);
+ p->litProbs = NULL;
+ p->saveState.litProbs = NULL;
+}
+
+CLzmaEncHandle LzmaEnc_Create(ISzAllocPtr alloc)
+{
+ void *p;
+ p = ISzAlloc_Alloc(alloc, sizeof(CLzmaEnc));
+ if (p)
+ LzmaEnc_Construct((CLzmaEnc *)p);
+ return p;
+}
+
+static void LzmaEnc_FreeLits(CLzmaEnc *p, ISzAllocPtr alloc)
+{
+ ISzAlloc_Free(alloc, p->litProbs);
+ ISzAlloc_Free(alloc, p->saveState.litProbs);
+ p->litProbs = NULL;
+ p->saveState.litProbs = NULL;
+}
+
+static void LzmaEnc_Destruct(CLzmaEnc *p, ISzAllocPtr alloc, ISzAllocPtr allocBig)
+{
+ #ifndef Z7_ST
+ MatchFinderMt_Destruct(&p->matchFinderMt, allocBig);
+ #endif
+
+ MatchFinder_Free(&MFB, allocBig);
+ LzmaEnc_FreeLits(p, alloc);
+ RangeEnc_Free(&p->rc, alloc);
+}
+
+void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAllocPtr alloc, ISzAllocPtr allocBig)
+{
+ // GET_CLzmaEnc_p
+ LzmaEnc_Destruct(p, alloc, allocBig);
+ ISzAlloc_Free(alloc, p);
+}
+
+
+Z7_NO_INLINE
+static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpackSize)
+{
+ UInt32 nowPos32, startPos32;
+ if (p->needInit)
+ {
+ #ifndef Z7_ST
+ if (p->mtMode)
+ {
+ RINOK(MatchFinderMt_InitMt(&p->matchFinderMt))
+ }
+ #endif
+ p->matchFinder.Init(p->matchFinderObj);
+ p->needInit = 0;
+ }
+
+ if (p->finished)
+ return p->result;
+ RINOK(CheckErrors(p))
+
+ nowPos32 = (UInt32)p->nowPos64;
+ startPos32 = nowPos32;
+
+ if (p->nowPos64 == 0)
+ {
+ unsigned numPairs;
+ Byte curByte;
+ if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) == 0)
+ return Flush(p, nowPos32);
+ ReadMatchDistances(p, &numPairs);
+ RangeEnc_EncodeBit_0(&p->rc, &p->isMatch[kState_Start][0]);
+ // p->state = kLiteralNextStates[p->state];
+ curByte = *(p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset);
+ LitEnc_Encode(&p->rc, p->litProbs, curByte);
+ p->additionalOffset--;
+ nowPos32++;
+ }
+
+ if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) != 0)
+
+ for (;;)
+ {
+ UInt32 dist;
+ unsigned len, posState;
+ UInt32 range, ttt, newBound;
+ CLzmaProb *probs;
+
+ if (p->fastMode)
+ len = GetOptimumFast(p);
+ else
+ {
+ unsigned oci = p->optCur;
+ if (p->optEnd == oci)
+ len = GetOptimum(p, nowPos32);
+ else
+ {
+ const COptimal *opt = &p->opt[oci];
+ len = opt->len;
+ p->backRes = opt->dist;
+ p->optCur = oci + 1;
+ }
+ }
+
+ posState = (unsigned)nowPos32 & p->pbMask;
+ range = p->rc.range;
+ probs = &p->isMatch[p->state][posState];
+
+ RC_BIT_PRE(&p->rc, probs)
+
+ dist = p->backRes;
+
+ #ifdef SHOW_STAT2
+ printf("\n pos = %6X, len = %3u pos = %6u", nowPos32, len, dist);
+ #endif
+
+ if (dist == MARK_LIT)
+ {
+ Byte curByte;
+ const Byte *data;
+ unsigned state;
+
+ RC_BIT_0(&p->rc, probs)
+ p->rc.range = range;
+ data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset;
+ probs = LIT_PROBS(nowPos32, *(data - 1));
+ curByte = *data;
+ state = p->state;
+ p->state = kLiteralNextStates[state];
+ if (IsLitState(state))
+ LitEnc_Encode(&p->rc, probs, curByte);
+ else
+ LitEnc_EncodeMatched(&p->rc, probs, curByte, *(data - p->reps[0]));
+ }
+ else
+ {
+ RC_BIT_1(&p->rc, probs)
+ probs = &p->isRep[p->state];
+ RC_BIT_PRE(&p->rc, probs)
+
+ if (dist < LZMA_NUM_REPS)
+ {
+ RC_BIT_1(&p->rc, probs)
+ probs = &p->isRepG0[p->state];
+ RC_BIT_PRE(&p->rc, probs)
+ if (dist == 0)
+ {
+ RC_BIT_0(&p->rc, probs)
+ probs = &p->isRep0Long[p->state][posState];
+ RC_BIT_PRE(&p->rc, probs)
+ if (len != 1)
+ {
+ RC_BIT_1_BASE(&p->rc, probs)
+ }
+ else
+ {
+ RC_BIT_0_BASE(&p->rc, probs)
+ p->state = kShortRepNextStates[p->state];
+ }
+ }
+ else
+ {
+ RC_BIT_1(&p->rc, probs)
+ probs = &p->isRepG1[p->state];
+ RC_BIT_PRE(&p->rc, probs)
+ if (dist == 1)
+ {
+ RC_BIT_0_BASE(&p->rc, probs)
+ dist = p->reps[1];
+ }
+ else
+ {
+ RC_BIT_1(&p->rc, probs)
+ probs = &p->isRepG2[p->state];
+ RC_BIT_PRE(&p->rc, probs)
+ if (dist == 2)
+ {
+ RC_BIT_0_BASE(&p->rc, probs)
+ dist = p->reps[2];
+ }
+ else
+ {
+ RC_BIT_1_BASE(&p->rc, probs)
+ dist = p->reps[3];
+ p->reps[3] = p->reps[2];
+ }
+ p->reps[2] = p->reps[1];
+ }
+ p->reps[1] = p->reps[0];
+ p->reps[0] = dist;
+ }
+
+ RC_NORM(&p->rc)
+
+ p->rc.range = range;
+
+ if (len != 1)
+ {
+ LenEnc_Encode(&p->repLenProbs, &p->rc, len - LZMA_MATCH_LEN_MIN, posState);
+ --p->repLenEncCounter;
+ p->state = kRepNextStates[p->state];
+ }
+ }
+ else
+ {
+ unsigned posSlot;
+ RC_BIT_0(&p->rc, probs)
+ p->rc.range = range;
+ p->state = kMatchNextStates[p->state];
+
+ LenEnc_Encode(&p->lenProbs, &p->rc, len - LZMA_MATCH_LEN_MIN, posState);
+ // --p->lenEnc.counter;
+
+ dist -= LZMA_NUM_REPS;
+ p->reps[3] = p->reps[2];
+ p->reps[2] = p->reps[1];
+ p->reps[1] = p->reps[0];
+ p->reps[0] = dist + 1;
+
+ p->matchPriceCount++;
+ GetPosSlot(dist, posSlot)
+ // RcTree_Encode_PosSlot(&p->rc, p->posSlotEncoder[GetLenToPosState(len)], posSlot);
+ {
+ UInt32 sym = (UInt32)posSlot + (1 << kNumPosSlotBits);
+ range = p->rc.range;
+ probs = p->posSlotEncoder[GetLenToPosState(len)];
+ do
+ {
+ CLzmaProb *prob = probs + (sym >> kNumPosSlotBits);
+ UInt32 bit = (sym >> (kNumPosSlotBits - 1)) & 1;
+ sym <<= 1;
+ RC_BIT(&p->rc, prob, bit)
+ }
+ while (sym < (1 << kNumPosSlotBits * 2));
+ p->rc.range = range;
+ }
+
+ if (dist >= kStartPosModelIndex)
+ {
+ unsigned footerBits = ((posSlot >> 1) - 1);
+
+ if (dist < kNumFullDistances)
+ {
+ unsigned base = ((2 | (posSlot & 1)) << footerBits);
+ RcTree_ReverseEncode(&p->rc, p->posEncoders + base, footerBits, (unsigned)(dist /* - base */));
+ }
+ else
+ {
+ UInt32 pos2 = (dist | 0xF) << (32 - footerBits);
+ range = p->rc.range;
+ // RangeEnc_EncodeDirectBits(&p->rc, posReduced >> kNumAlignBits, footerBits - kNumAlignBits);
+ /*
+ do
+ {
+ range >>= 1;
+ p->rc.low += range & (0 - ((dist >> --footerBits) & 1));
+ RC_NORM(&p->rc)
+ }
+ while (footerBits > kNumAlignBits);
+ */
+ do
+ {
+ range >>= 1;
+ p->rc.low += range & (0 - (pos2 >> 31));
+ pos2 += pos2;
+ RC_NORM(&p->rc)
+ }
+ while (pos2 != 0xF0000000);
+
+
+ // RcTree_ReverseEncode(&p->rc, p->posAlignEncoder, kNumAlignBits, posReduced & kAlignMask);
+
+ {
+ unsigned m = 1;
+ unsigned bit;
+ bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit) m = (m << 1) + bit;
+ bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit) m = (m << 1) + bit;
+ bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit) m = (m << 1) + bit;
+ bit = dist & 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit)
+ p->rc.range = range;
+ // p->alignPriceCount++;
+ }
+ }
+ }
+ }
+ }
+
+ nowPos32 += (UInt32)len;
+ p->additionalOffset -= len;
+
+ if (p->additionalOffset == 0)
+ {
+ UInt32 processed;
+
+ if (!p->fastMode)
+ {
+ /*
+ if (p->alignPriceCount >= 16) // kAlignTableSize
+ FillAlignPrices(p);
+ if (p->matchPriceCount >= 128)
+ FillDistancesPrices(p);
+ if (p->lenEnc.counter <= 0)
+ LenPriceEnc_UpdateTables(&p->lenEnc, 1 << p->pb, &p->lenProbs, p->ProbPrices);
+ */
+ if (p->matchPriceCount >= 64)
+ {
+ FillAlignPrices(p);
+ // { int y; for (y = 0; y < 100; y++) {
+ FillDistancesPrices(p);
+ // }}
+ LenPriceEnc_UpdateTables(&p->lenEnc, (unsigned)1 << p->pb, &p->lenProbs, p->ProbPrices);
+ }
+ if (p->repLenEncCounter <= 0)
+ {
+ p->repLenEncCounter = REP_LEN_COUNT;
+ LenPriceEnc_UpdateTables(&p->repLenEnc, (unsigned)1 << p->pb, &p->repLenProbs, p->ProbPrices);
+ }
+ }
+
+ if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) == 0)
+ break;
+ processed = nowPos32 - startPos32;
+
+ if (maxPackSize)
+ {
+ if (processed + kNumOpts + 300 >= maxUnpackSize
+ || RangeEnc_GetProcessed_sizet(&p->rc) + kPackReserve >= maxPackSize)
+ break;
+ }
+ else if (processed >= (1 << 17))
+ {
+ p->nowPos64 += nowPos32 - startPos32;
+ return CheckErrors(p);
+ }
+ }
+ }
+
+ p->nowPos64 += nowPos32 - startPos32;
+ return Flush(p, nowPos32);
+}
+
+
+
+#define kBigHashDicLimit ((UInt32)1 << 24)
+
+static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig)
+{
+ UInt32 beforeSize = kNumOpts;
+ UInt32 dictSize;
+
+ if (!RangeEnc_Alloc(&p->rc, alloc))
+ return SZ_ERROR_MEM;
+
+ #ifndef Z7_ST
+ p->mtMode = (p->multiThread && !p->fastMode && (MFB.btMode != 0));
+ #endif
+
+ {
+ unsigned lclp = p->lc + p->lp;
+ if (!p->litProbs || !p->saveState.litProbs || p->lclp != lclp)
+ {
+ LzmaEnc_FreeLits(p, alloc);
+ p->litProbs = (CLzmaProb *)ISzAlloc_Alloc(alloc, ((UInt32)0x300 << lclp) * sizeof(CLzmaProb));
+ p->saveState.litProbs = (CLzmaProb *)ISzAlloc_Alloc(alloc, ((UInt32)0x300 << lclp) * sizeof(CLzmaProb));
+ if (!p->litProbs || !p->saveState.litProbs)
+ {
+ LzmaEnc_FreeLits(p, alloc);
+ return SZ_ERROR_MEM;
+ }
+ p->lclp = lclp;
+ }
+ }
+
+ MFB.bigHash = (Byte)(p->dictSize > kBigHashDicLimit ? 1 : 0);
+
+
+ dictSize = p->dictSize;
+ if (dictSize == ((UInt32)2 << 30) ||
+ dictSize == ((UInt32)3 << 30))
+ {
+ /* 21.03 : here we reduce the dictionary for 2 reasons:
+ 1) we don't want 32-bit back_distance matches in decoder for 2 GB dictionary.
+ 2) we want to elimate useless last MatchFinder_Normalize3() for corner cases,
+ where data size is aligned for 1 GB: 5/6/8 GB.
+ That reducing must be >= 1 for such corner cases. */
+ dictSize -= 1;
+ }
+
+ if (beforeSize + dictSize < keepWindowSize)
+ beforeSize = keepWindowSize - dictSize;
+
+ /* in worst case we can look ahead for
+ max(LZMA_MATCH_LEN_MAX, numFastBytes + 1 + numFastBytes) bytes.
+ we send larger value for (keepAfter) to MantchFinder_Create():
+ (numFastBytes + LZMA_MATCH_LEN_MAX + 1)
+ */
+
+ #ifndef Z7_ST
+ if (p->mtMode)
+ {
+ RINOK(MatchFinderMt_Create(&p->matchFinderMt, dictSize, beforeSize,
+ p->numFastBytes, LZMA_MATCH_LEN_MAX + 1 /* 18.04 */
+ , allocBig))
+ p->matchFinderObj = &p->matchFinderMt;
+ MFB.bigHash = (Byte)(MFB.hashMask >= 0xFFFFFF ? 1 : 0);
+ MatchFinderMt_CreateVTable(&p->matchFinderMt, &p->matchFinder);
+ }
+ else
+ #endif
+ {
+ if (!MatchFinder_Create(&MFB, dictSize, beforeSize,
+ p->numFastBytes, LZMA_MATCH_LEN_MAX + 1 /* 21.03 */
+ , allocBig))
+ return SZ_ERROR_MEM;
+ p->matchFinderObj = &MFB;
+ MatchFinder_CreateVTable(&MFB, &p->matchFinder);
+ }
+
+ return SZ_OK;
+}
+
+static void LzmaEnc_Init(CLzmaEnc *p)
+{
+ unsigned i;
+ p->state = 0;
+ p->reps[0] =
+ p->reps[1] =
+ p->reps[2] =
+ p->reps[3] = 1;
+
+ RangeEnc_Init(&p->rc);
+
+ for (i = 0; i < (1 << kNumAlignBits); i++)
+ p->posAlignEncoder[i] = kProbInitValue;
+
+ for (i = 0; i < kNumStates; i++)
+ {
+ unsigned j;
+ for (j = 0; j < LZMA_NUM_PB_STATES_MAX; j++)
+ {
+ p->isMatch[i][j] = kProbInitValue;
+ p->isRep0Long[i][j] = kProbInitValue;
+ }
+ p->isRep[i] = kProbInitValue;
+ p->isRepG0[i] = kProbInitValue;
+ p->isRepG1[i] = kProbInitValue;
+ p->isRepG2[i] = kProbInitValue;
+ }
+
+ {
+ for (i = 0; i < kNumLenToPosStates; i++)
+ {
+ CLzmaProb *probs = p->posSlotEncoder[i];
+ unsigned j;
+ for (j = 0; j < (1 << kNumPosSlotBits); j++)
+ probs[j] = kProbInitValue;
+ }
+ }
+ {
+ for (i = 0; i < kNumFullDistances; i++)
+ p->posEncoders[i] = kProbInitValue;
+ }
+
+ {
+ UInt32 num = (UInt32)0x300 << (p->lp + p->lc);
+ UInt32 k;
+ CLzmaProb *probs = p->litProbs;
+ for (k = 0; k < num; k++)
+ probs[k] = kProbInitValue;
+ }
+
+
+ LenEnc_Init(&p->lenProbs);
+ LenEnc_Init(&p->repLenProbs);
+
+ p->optEnd = 0;
+ p->optCur = 0;
+
+ {
+ for (i = 0; i < kNumOpts; i++)
+ p->opt[i].price = kInfinityPrice;
+ }
+
+ p->additionalOffset = 0;
+
+ p->pbMask = ((unsigned)1 << p->pb) - 1;
+ p->lpMask = ((UInt32)0x100 << p->lp) - ((unsigned)0x100 >> p->lc);
+
+ // p->mf_Failure = False;
+}
+
+
+static void LzmaEnc_InitPrices(CLzmaEnc *p)
+{
+ if (!p->fastMode)
+ {
+ FillDistancesPrices(p);
+ FillAlignPrices(p);
+ }
+
+ p->lenEnc.tableSize =
+ p->repLenEnc.tableSize =
+ p->numFastBytes + 1 - LZMA_MATCH_LEN_MIN;
+
+ p->repLenEncCounter = REP_LEN_COUNT;
+
+ LenPriceEnc_UpdateTables(&p->lenEnc, (unsigned)1 << p->pb, &p->lenProbs, p->ProbPrices);
+ LenPriceEnc_UpdateTables(&p->repLenEnc, (unsigned)1 << p->pb, &p->repLenProbs, p->ProbPrices);
+}
+
+static SRes LzmaEnc_AllocAndInit(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig)
+{
+ unsigned i;
+ for (i = kEndPosModelIndex / 2; i < kDicLogSizeMax; i++)
+ if (p->dictSize <= ((UInt32)1 << i))
+ break;
+ p->distTableSize = i * 2;
+
+ p->finished = False;
+ p->result = SZ_OK;
+ p->nowPos64 = 0;
+ p->needInit = 1;
+ RINOK(LzmaEnc_Alloc(p, keepWindowSize, alloc, allocBig))
+ LzmaEnc_Init(p);
+ LzmaEnc_InitPrices(p);
+ return SZ_OK;
+}
+
+static SRes LzmaEnc_Prepare(CLzmaEncHandle p,
+ ISeqOutStreamPtr outStream,
+ ISeqInStreamPtr inStream,
+ ISzAllocPtr alloc, ISzAllocPtr allocBig)
+{
+ // GET_CLzmaEnc_p
+ MatchFinder_SET_STREAM(&MFB, inStream)
+ p->rc.outStream = outStream;
+ return LzmaEnc_AllocAndInit(p, 0, alloc, allocBig);
+}
+
+SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle p,
+ ISeqInStreamPtr inStream, UInt32 keepWindowSize,
+ ISzAllocPtr alloc, ISzAllocPtr allocBig)
+{
+ // GET_CLzmaEnc_p
+ MatchFinder_SET_STREAM(&MFB, inStream)
+ return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig);
+}
+
+SRes LzmaEnc_MemPrepare(CLzmaEncHandle p,
+ const Byte *src, SizeT srcLen,
+ UInt32 keepWindowSize,
+ ISzAllocPtr alloc, ISzAllocPtr allocBig)
+{
+ // GET_CLzmaEnc_p
+ MatchFinder_SET_DIRECT_INPUT_BUF(&MFB, src, srcLen)
+ LzmaEnc_SetDataSize(p, srcLen);
+ return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig);
+}
+
+void LzmaEnc_Finish(CLzmaEncHandle p)
+{
+ #ifndef Z7_ST
+ // GET_CLzmaEnc_p
+ if (p->mtMode)
+ MatchFinderMt_ReleaseStream(&p->matchFinderMt);
+ #else
+ UNUSED_VAR(p)
+ #endif
+}
+
+
+typedef struct
+{
+ ISeqOutStream vt;
+ Byte *data;
+ size_t rem;
+ BoolInt overflow;
+} CLzmaEnc_SeqOutStreamBuf;
+
+static size_t SeqOutStreamBuf_Write(ISeqOutStreamPtr pp, const void *data, size_t size)
+{
+ Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CLzmaEnc_SeqOutStreamBuf)
+ if (p->rem < size)
+ {
+ size = p->rem;
+ p->overflow = True;
+ }
+ if (size != 0)
+ {
+ memcpy(p->data, data, size);
+ p->rem -= size;
+ p->data += size;
+ }
+ return size;
+}
+
+
+/*
+UInt32 LzmaEnc_GetNumAvailableBytes(CLzmaEncHandle p)
+{
+ GET_const_CLzmaEnc_p
+ return p->matchFinder.GetNumAvailableBytes(p->matchFinderObj);
+}
+*/
+
+const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle p)
+{
+ // GET_const_CLzmaEnc_p
+ return p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset;
+}
+
+
+// (desiredPackSize == 0) is not allowed
+SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle p, BoolInt reInit,
+ Byte *dest, size_t *destLen, UInt32 desiredPackSize, UInt32 *unpackSize)
+{
+ // GET_CLzmaEnc_p
+ UInt64 nowPos64;
+ SRes res;
+ CLzmaEnc_SeqOutStreamBuf outStream;
+
+ outStream.vt.Write = SeqOutStreamBuf_Write;
+ outStream.data = dest;
+ outStream.rem = *destLen;
+ outStream.overflow = False;
+
+ p->writeEndMark = False;
+ p->finished = False;
+ p->result = SZ_OK;
+
+ if (reInit)
+ LzmaEnc_Init(p);
+ LzmaEnc_InitPrices(p);
+ RangeEnc_Init(&p->rc);
+ p->rc.outStream = &outStream.vt;
+ nowPos64 = p->nowPos64;
+
+ res = LzmaEnc_CodeOneBlock(p, desiredPackSize, *unpackSize);
+
+ *unpackSize = (UInt32)(p->nowPos64 - nowPos64);
+ *destLen -= outStream.rem;
+ if (outStream.overflow)
+ return SZ_ERROR_OUTPUT_EOF;
+
+ return res;
+}
+
+
+Z7_NO_INLINE
+static SRes LzmaEnc_Encode2(CLzmaEnc *p, ICompressProgressPtr progress)
+{
+ SRes res = SZ_OK;
+
+ #ifndef Z7_ST
+ Byte allocaDummy[0x300];
+ allocaDummy[0] = 0;
+ allocaDummy[1] = allocaDummy[0];
+ #endif
+
+ for (;;)
+ {
+ res = LzmaEnc_CodeOneBlock(p, 0, 0);
+ if (res != SZ_OK || p->finished)
+ break;
+ if (progress)
+ {
+ res = ICompressProgress_Progress(progress, p->nowPos64, RangeEnc_GetProcessed(&p->rc));
+ if (res != SZ_OK)
+ {
+ res = SZ_ERROR_PROGRESS;
+ break;
+ }
+ }
+ }
+
+ LzmaEnc_Finish((CLzmaEncHandle)(void *)p);
+
+ /*
+ if (res == SZ_OK && !Inline_MatchFinder_IsFinishedOK(&MFB))
+ res = SZ_ERROR_FAIL;
+ }
+ */
+
+ return res;
+}
+
+
+SRes LzmaEnc_Encode(CLzmaEncHandle p, ISeqOutStreamPtr outStream, ISeqInStreamPtr inStream, ICompressProgressPtr progress,
+ ISzAllocPtr alloc, ISzAllocPtr allocBig)
+{
+ // GET_CLzmaEnc_p
+ RINOK(LzmaEnc_Prepare(p, outStream, inStream, alloc, allocBig))
+ return LzmaEnc_Encode2(p, progress);
+}
+
+
+SRes LzmaEnc_WriteProperties(CLzmaEncHandle p, Byte *props, SizeT *size)
+{
+ if (*size < LZMA_PROPS_SIZE)
+ return SZ_ERROR_PARAM;
+ *size = LZMA_PROPS_SIZE;
+ {
+ // GET_CLzmaEnc_p
+ const UInt32 dictSize = p->dictSize;
+ UInt32 v;
+ props[0] = (Byte)((p->pb * 5 + p->lp) * 9 + p->lc);
+
+ // we write aligned dictionary value to properties for lzma decoder
+ if (dictSize >= ((UInt32)1 << 21))
+ {
+ const UInt32 kDictMask = ((UInt32)1 << 20) - 1;
+ v = (dictSize + kDictMask) & ~kDictMask;
+ if (v < dictSize)
+ v = dictSize;
+ }
+ else
+ {
+ unsigned i = 11 * 2;
+ do
+ {
+ v = (UInt32)(2 + (i & 1)) << (i >> 1);
+ i++;
+ }
+ while (v < dictSize);
+ }
+
+ SetUi32(props + 1, v)
+ return SZ_OK;
+ }
+}
+
+
+unsigned LzmaEnc_IsWriteEndMark(CLzmaEncHandle p)
+{
+ // GET_CLzmaEnc_p
+ return (unsigned)p->writeEndMark;
+}
+
+
+SRes LzmaEnc_MemEncode(CLzmaEncHandle p, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
+ int writeEndMark, ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig)
+{
+ SRes res;
+ // GET_CLzmaEnc_p
+
+ CLzmaEnc_SeqOutStreamBuf outStream;
+
+ outStream.vt.Write = SeqOutStreamBuf_Write;
+ outStream.data = dest;
+ outStream.rem = *destLen;
+ outStream.overflow = False;
+
+ p->writeEndMark = writeEndMark;
+ p->rc.outStream = &outStream.vt;
+
+ res = LzmaEnc_MemPrepare(p, src, srcLen, 0, alloc, allocBig);
+
+ if (res == SZ_OK)
+ {
+ res = LzmaEnc_Encode2(p, progress);
+ if (res == SZ_OK && p->nowPos64 != srcLen)
+ res = SZ_ERROR_FAIL;
+ }
+
+ *destLen -= (SizeT)outStream.rem;
+ if (outStream.overflow)
+ return SZ_ERROR_OUTPUT_EOF;
+ return res;
+}
+
+
+SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
+ const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark,
+ ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig)
+{
+ CLzmaEncHandle p = LzmaEnc_Create(alloc);
+ SRes res;
+ if (!p)
+ return SZ_ERROR_MEM;
+
+ res = LzmaEnc_SetProps(p, props);
+ if (res == SZ_OK)
+ {
+ res = LzmaEnc_WriteProperties(p, propsEncoded, propsSize);
+ if (res == SZ_OK)
+ res = LzmaEnc_MemEncode(p, dest, destLen, src, srcLen,
+ writeEndMark, progress, alloc, allocBig);
+ }
+
+ LzmaEnc_Destroy(p, alloc, allocBig);
+ return res;
+}
+
+
+/*
+#ifndef Z7_ST
+void LzmaEnc_GetLzThreads(CLzmaEncHandle p, HANDLE lz_threads[2])
+{
+ GET_const_CLzmaEnc_p
+ lz_threads[0] = p->matchFinderMt.hashSync.thread;
+ lz_threads[1] = p->matchFinderMt.btSync.thread;
+}
+#endif
+*/
diff --git a/src/Common/lzma/LzmaEnc.h b/src/Common/lzma/LzmaEnc.h
new file mode 100644
index 00000000..9f8039a1
--- /dev/null
+++ b/src/Common/lzma/LzmaEnc.h
@@ -0,0 +1,83 @@
+/* LzmaEnc.h -- LZMA Encoder
+2023-04-13 : Igor Pavlov : Public domain */
+
+#ifndef ZIP7_INC_LZMA_ENC_H
+#define ZIP7_INC_LZMA_ENC_H
+
+#include "7zTypes.h"
+
+EXTERN_C_BEGIN
+
+#define LZMA_PROPS_SIZE 5
+
+typedef struct
+{
+ int level; /* 0 <= level <= 9 */
+ UInt32 dictSize; /* (1 << 12) <= dictSize <= (1 << 27) for 32-bit version
+ (1 << 12) <= dictSize <= (3 << 29) for 64-bit version
+ default = (1 << 24) */
+ int lc; /* 0 <= lc <= 8, default = 3 */
+ int lp; /* 0 <= lp <= 4, default = 0 */
+ int pb; /* 0 <= pb <= 4, default = 2 */
+ int algo; /* 0 - fast, 1 - normal, default = 1 */
+ int fb; /* 5 <= fb <= 273, default = 32 */
+ int btMode; /* 0 - hashChain Mode, 1 - binTree mode - normal, default = 1 */
+ int numHashBytes; /* 2, 3 or 4, default = 4 */
+ unsigned numHashOutBits; /* default = ? */
+ UInt32 mc; /* 1 <= mc <= (1 << 30), default = 32 */
+ unsigned writeEndMark; /* 0 - do not write EOPM, 1 - write EOPM, default = 0 */
+ int numThreads; /* 1 or 2, default = 2 */
+
+ // int _pad;
+
+ UInt64 reduceSize; /* estimated size of data that will be compressed. default = (UInt64)(Int64)-1.
+ Encoder uses this value to reduce dictionary size */
+
+ UInt64 affinity;
+} CLzmaEncProps;
+
+void LzmaEncProps_Init(CLzmaEncProps *p);
+void LzmaEncProps_Normalize(CLzmaEncProps *p);
+UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps *props2);
+
+
+/* ---------- CLzmaEncHandle Interface ---------- */
+
+/* LzmaEnc* functions can return the following exit codes:
+SRes:
+ SZ_OK - OK
+ SZ_ERROR_MEM - Memory allocation error
+ SZ_ERROR_PARAM - Incorrect paramater in props
+ SZ_ERROR_WRITE - ISeqOutStream write callback error
+ SZ_ERROR_OUTPUT_EOF - output buffer overflow - version with (Byte *) output
+ SZ_ERROR_PROGRESS - some break from progress callback
+ SZ_ERROR_THREAD - error in multithreading functions (only for Mt version)
+*/
+
+typedef struct CLzmaEnc CLzmaEnc;
+typedef CLzmaEnc * CLzmaEncHandle;
+// Z7_DECLARE_HANDLE(CLzmaEncHandle)
+
+CLzmaEncHandle LzmaEnc_Create(ISzAllocPtr alloc);
+void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAllocPtr alloc, ISzAllocPtr allocBig);
+
+SRes LzmaEnc_SetProps(CLzmaEncHandle p, const CLzmaEncProps *props);
+void LzmaEnc_SetDataSize(CLzmaEncHandle p, UInt64 expectedDataSiize);
+SRes LzmaEnc_WriteProperties(CLzmaEncHandle p, Byte *properties, SizeT *size);
+unsigned LzmaEnc_IsWriteEndMark(CLzmaEncHandle p);
+
+SRes LzmaEnc_Encode(CLzmaEncHandle p, ISeqOutStreamPtr outStream, ISeqInStreamPtr inStream,
+ ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig);
+SRes LzmaEnc_MemEncode(CLzmaEncHandle p, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
+ int writeEndMark, ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig);
+
+
+/* ---------- One Call Interface ---------- */
+
+SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
+ const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark,
+ ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig);
+
+EXTERN_C_END
+
+#endif
diff --git a/src/Common/lzma/LzmaLib.c b/src/Common/lzma/LzmaLib.c
new file mode 100644
index 00000000..785e8848
--- /dev/null
+++ b/src/Common/lzma/LzmaLib.c
@@ -0,0 +1,42 @@
+/* LzmaLib.c -- LZMA library wrapper
+2023-04-02 : Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+#include "Alloc.h"
+#include "LzmaDec.h"
+#include "LzmaEnc.h"
+#include "LzmaLib.h"
+
+Z7_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t srcLen,
+ unsigned char *outProps, size_t *outPropsSize,
+ int level, /* 0 <= level <= 9, default = 5 */
+ unsigned dictSize, /* use (1 << N) or (3 << N). 4 KB < dictSize <= 128 MB */
+ int lc, /* 0 <= lc <= 8, default = 3 */
+ int lp, /* 0 <= lp <= 4, default = 0 */
+ int pb, /* 0 <= pb <= 4, default = 2 */
+ int fb, /* 5 <= fb <= 273, default = 32 */
+ int numThreads /* 1 or 2, default = 2 */
+)
+{
+ CLzmaEncProps props;
+ LzmaEncProps_Init(&props);
+ props.level = level;
+ props.dictSize = dictSize;
+ props.lc = lc;
+ props.lp = lp;
+ props.pb = pb;
+ props.fb = fb;
+ props.numThreads = numThreads;
+
+ return LzmaEncode(dest, destLen, src, srcLen, &props, outProps, outPropsSize, 0,
+ NULL, &g_Alloc, &g_Alloc);
+}
+
+
+Z7_STDAPI LzmaUncompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t *srcLen,
+ const unsigned char *props, size_t propsSize)
+{
+ ELzmaStatus status;
+ return LzmaDecode(dest, destLen, src, srcLen, props, (unsigned)propsSize, LZMA_FINISH_ANY, &status, &g_Alloc);
+}
diff --git a/src/Common/lzma/LzmaLib.h b/src/Common/lzma/LzmaLib.h
new file mode 100644
index 00000000..d7c0724d
--- /dev/null
+++ b/src/Common/lzma/LzmaLib.h
@@ -0,0 +1,138 @@
+/* LzmaLib.h -- LZMA library interface
+2023-04-02 : Igor Pavlov : Public domain */
+
+#ifndef ZIP7_INC_LZMA_LIB_H
+#define ZIP7_INC_LZMA_LIB_H
+
+#include "7zTypes.h"
+
+EXTERN_C_BEGIN
+
+#define Z7_STDAPI int Z7_STDCALL
+
+#define LZMA_PROPS_SIZE 5
+
+/*
+RAM requirements for LZMA:
+ for compression: (dictSize * 11.5 + 6 MB) + state_size
+ for decompression: dictSize + state_size
+ state_size = (4 + (1.5 << (lc + lp))) KB
+ by default (lc=3, lp=0), state_size = 16 KB.
+
+LZMA properties (5 bytes) format
+ Offset Size Description
+ 0 1 lc, lp and pb in encoded form.
+ 1 4 dictSize (little endian).
+*/
+
+/*
+LzmaCompress
+------------
+
+outPropsSize -
+ In: the pointer to the size of outProps buffer; *outPropsSize = LZMA_PROPS_SIZE = 5.
+ Out: the pointer to the size of written properties in outProps buffer; *outPropsSize = LZMA_PROPS_SIZE = 5.
+
+ LZMA Encoder will use defult values for any parameter, if it is
+ -1 for any from: level, loc, lp, pb, fb, numThreads
+ 0 for dictSize
+
+level - compression level: 0 <= level <= 9;
+
+ level dictSize algo fb
+ 0: 64 KB 0 32
+ 1: 256 KB 0 32
+ 2: 1 MB 0 32
+ 3: 4 MB 0 32
+ 4: 16 MB 0 32
+ 5: 16 MB 1 32
+ 6: 32 MB 1 32
+ 7: 32 MB 1 64
+ 8: 64 MB 1 64
+ 9: 64 MB 1 64
+
+ The default value for "level" is 5.
+
+ algo = 0 means fast method
+ algo = 1 means normal method
+
+dictSize - The dictionary size in bytes. The maximum value is
+ 128 MB = (1 << 27) bytes for 32-bit version
+ 1 GB = (1 << 30) bytes for 64-bit version
+ The default value is 16 MB = (1 << 24) bytes.
+ It's recommended to use the dictionary that is larger than 4 KB and
+ that can be calculated as (1 << N) or (3 << N) sizes.
+
+lc - The number of literal context bits (high bits of previous literal).
+ It can be in the range from 0 to 8. The default value is 3.
+ Sometimes lc=4 gives the gain for big files.
+
+lp - The number of literal pos bits (low bits of current position for literals).
+ It can be in the range from 0 to 4. The default value is 0.
+ The lp switch is intended for periodical data when the period is equal to 2^lp.
+ For example, for 32-bit (4 bytes) periodical data you can use lp=2. Often it's
+ better to set lc=0, if you change lp switch.
+
+pb - The number of pos bits (low bits of current position).
+ It can be in the range from 0 to 4. The default value is 2.
+ The pb switch is intended for periodical data when the period is equal 2^pb.
+
+fb - Word size (the number of fast bytes).
+ It can be in the range from 5 to 273. The default value is 32.
+ Usually, a big number gives a little bit better compression ratio and
+ slower compression process.
+
+numThreads - The number of thereads. 1 or 2. The default value is 2.
+ Fast mode (algo = 0) can use only 1 thread.
+
+In:
+ dest - output data buffer
+ destLen - output data buffer size
+ src - input data
+ srcLen - input data size
+Out:
+ destLen - processed output size
+Returns:
+ SZ_OK - OK
+ SZ_ERROR_MEM - Memory allocation error
+ SZ_ERROR_PARAM - Incorrect paramater
+ SZ_ERROR_OUTPUT_EOF - output buffer overflow
+ SZ_ERROR_THREAD - errors in multithreading functions (only for Mt version)
+*/
+
+Z7_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t srcLen,
+ unsigned char *outProps, size_t *outPropsSize, /* *outPropsSize must be = 5 */
+ int level, /* 0 <= level <= 9, default = 5 */
+ unsigned dictSize, /* default = (1 << 24) */
+ int lc, /* 0 <= lc <= 8, default = 3 */
+ int lp, /* 0 <= lp <= 4, default = 0 */
+ int pb, /* 0 <= pb <= 4, default = 2 */
+ int fb, /* 5 <= fb <= 273, default = 32 */
+ int numThreads /* 1 or 2, default = 2 */
+ );
+
+/*
+LzmaUncompress
+--------------
+In:
+ dest - output data buffer
+ destLen - output data buffer size
+ src - input data
+ srcLen - input data size
+Out:
+ destLen - processed output size
+ srcLen - processed input size
+Returns:
+ SZ_OK - OK
+ SZ_ERROR_DATA - Data error
+ SZ_ERROR_MEM - Memory allocation arror
+ SZ_ERROR_UNSUPPORTED - Unsupported properties
+ SZ_ERROR_INPUT_EOF - it needs more bytes in input buffer (src)
+*/
+
+Z7_STDAPI LzmaUncompress(unsigned char *dest, size_t *destLen, const unsigned char *src, SizeT *srcLen,
+ const unsigned char *props, size_t propsSize);
+
+EXTERN_C_END
+
+#endif
diff --git a/src/Common/lzma/Precomp.h b/src/Common/lzma/Precomp.h
new file mode 100644
index 00000000..69afb2ff
--- /dev/null
+++ b/src/Common/lzma/Precomp.h
@@ -0,0 +1,10 @@
+/* Precomp.h -- StdAfx
+2023-04-02 : Igor Pavlov : Public domain */
+
+#ifndef ZIP7_INC_PRECOMP_H
+#define ZIP7_INC_PRECOMP_H
+
+#include "Compiler.h"
+/* #include "7zTypes.h" */
+
+#endif
diff --git a/src/Common/lzma/Threads.c b/src/Common/lzma/Threads.c
new file mode 100644
index 00000000..cf52bd30
--- /dev/null
+++ b/src/Common/lzma/Threads.c
@@ -0,0 +1,562 @@
+/* Threads.c -- multithreading library
+2023-03-04 : Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+#ifdef _WIN32
+
+#ifndef USE_THREADS_CreateThread
+#include <process.h>
+#endif
+
+#include "Threads.h"
+
+static WRes GetError(void)
+{
+ const DWORD res = GetLastError();
+ return res ? (WRes)res : 1;
+}
+
+static WRes HandleToWRes(HANDLE h) { return (h != NULL) ? 0 : GetError(); }
+static WRes BOOLToWRes(BOOL v) { return v ? 0 : GetError(); }
+
+WRes HandlePtr_Close(HANDLE *p)
+{
+ if (*p != NULL)
+ {
+ if (!CloseHandle(*p))
+ return GetError();
+ *p = NULL;
+ }
+ return 0;
+}
+
+WRes Handle_WaitObject(HANDLE h)
+{
+ DWORD dw = WaitForSingleObject(h, INFINITE);
+ /*
+ (dw) result:
+ WAIT_OBJECT_0 // 0
+ WAIT_ABANDONED // 0x00000080 : is not compatible with Win32 Error space
+ WAIT_TIMEOUT // 0x00000102 : is compatible with Win32 Error space
+ WAIT_FAILED // 0xFFFFFFFF
+ */
+ if (dw == WAIT_FAILED)
+ {
+ dw = GetLastError();
+ if (dw == 0)
+ return WAIT_FAILED;
+ }
+ return (WRes)dw;
+}
+
+#define Thread_Wait(p) Handle_WaitObject(*(p))
+
+WRes Thread_Wait_Close(CThread *p)
+{
+ WRes res = Thread_Wait(p);
+ WRes res2 = Thread_Close(p);
+ return (res != 0 ? res : res2);
+}
+
+WRes Thread_Create(CThread *p, THREAD_FUNC_TYPE func, LPVOID param)
+{
+ /* Windows Me/98/95: threadId parameter may not be NULL in _beginthreadex/CreateThread functions */
+
+ #ifdef USE_THREADS_CreateThread
+
+ DWORD threadId;
+ *p = CreateThread(NULL, 0, func, param, 0, &threadId);
+
+ #else
+
+ unsigned threadId;
+ *p = (HANDLE)(_beginthreadex(NULL, 0, func, param, 0, &threadId));
+
+ #endif
+
+ /* maybe we must use errno here, but probably GetLastError() is also OK. */
+ return HandleToWRes(*p);
+}
+
+
+WRes Thread_Create_With_Affinity(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, CAffinityMask affinity)
+{
+ #ifdef USE_THREADS_CreateThread
+
+ UNUSED_VAR(affinity)
+ return Thread_Create(p, func, param);
+
+ #else
+
+ /* Windows Me/98/95: threadId parameter may not be NULL in _beginthreadex/CreateThread functions */
+ HANDLE h;
+ WRes wres;
+ unsigned threadId;
+ h = (HANDLE)(_beginthreadex(NULL, 0, func, param, CREATE_SUSPENDED, &threadId));
+ *p = h;
+ wres = HandleToWRes(h);
+ if (h)
+ {
+ {
+ // DWORD_PTR prevMask =
+ SetThreadAffinityMask(h, (DWORD_PTR)affinity);
+ /*
+ if (prevMask == 0)
+ {
+ // affinity change is non-critical error, so we can ignore it
+ // wres = GetError();
+ }
+ */
+ }
+ {
+ DWORD prevSuspendCount = ResumeThread(h);
+ /* ResumeThread() returns:
+ 0 : was_not_suspended
+ 1 : was_resumed
+ -1 : error
+ */
+ if (prevSuspendCount == (DWORD)-1)
+ wres = GetError();
+ }
+ }
+
+ /* maybe we must use errno here, but probably GetLastError() is also OK. */
+ return wres;
+
+ #endif
+}
+
+
+static WRes Event_Create(CEvent *p, BOOL manualReset, int signaled)
+{
+ *p = CreateEvent(NULL, manualReset, (signaled ? TRUE : FALSE), NULL);
+ return HandleToWRes(*p);
+}
+
+WRes Event_Set(CEvent *p) { return BOOLToWRes(SetEvent(*p)); }
+WRes Event_Reset(CEvent *p) { return BOOLToWRes(ResetEvent(*p)); }
+
+WRes ManualResetEvent_Create(CManualResetEvent *p, int signaled) { return Event_Create(p, TRUE, signaled); }
+WRes AutoResetEvent_Create(CAutoResetEvent *p, int signaled) { return Event_Create(p, FALSE, signaled); }
+WRes ManualResetEvent_CreateNotSignaled(CManualResetEvent *p) { return ManualResetEvent_Create(p, 0); }
+WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *p) { return AutoResetEvent_Create(p, 0); }
+
+
+WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount)
+{
+ // negative ((LONG)maxCount) is not supported in WIN32::CreateSemaphore()
+ *p = CreateSemaphore(NULL, (LONG)initCount, (LONG)maxCount, NULL);
+ return HandleToWRes(*p);
+}
+
+WRes Semaphore_OptCreateInit(CSemaphore *p, UInt32 initCount, UInt32 maxCount)
+{
+ // if (Semaphore_IsCreated(p))
+ {
+ WRes wres = Semaphore_Close(p);
+ if (wres != 0)
+ return wres;
+ }
+ return Semaphore_Create(p, initCount, maxCount);
+}
+
+static WRes Semaphore_Release(CSemaphore *p, LONG releaseCount, LONG *previousCount)
+ { return BOOLToWRes(ReleaseSemaphore(*p, releaseCount, previousCount)); }
+WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 num)
+ { return Semaphore_Release(p, (LONG)num, NULL); }
+WRes Semaphore_Release1(CSemaphore *p) { return Semaphore_ReleaseN(p, 1); }
+
+WRes CriticalSection_Init(CCriticalSection *p)
+{
+ /* InitializeCriticalSection() can raise exception:
+ Windows XP, 2003 : can raise a STATUS_NO_MEMORY exception
+ Windows Vista+ : no exceptions */
+ #ifdef _MSC_VER
+ #ifdef __clang__
+ #pragma GCC diagnostic ignored "-Wlanguage-extension-token"
+ #endif
+ __try
+ #endif
+ {
+ InitializeCriticalSection(p);
+ /* InitializeCriticalSectionAndSpinCount(p, 0); */
+ }
+ #ifdef _MSC_VER
+ __except (EXCEPTION_EXECUTE_HANDLER) { return ERROR_NOT_ENOUGH_MEMORY; }
+ #endif
+ return 0;
+}
+
+
+
+
+#else // _WIN32
+
+// ---------- POSIX ----------
+
+#ifndef __APPLE__
+#ifndef Z7_AFFINITY_DISABLE
+// _GNU_SOURCE can be required for pthread_setaffinity_np() / CPU_ZERO / CPU_SET
+// clang < 3.6 : unknown warning group '-Wreserved-id-macro'
+// clang 3.6 - 12.01 : gives warning "macro name is a reserved identifier"
+// clang >= 13 : do not give warning
+#if !defined(_GNU_SOURCE)
+ #if defined(__clang__) && (__clang_major__ >= 4) && (__clang_major__ <= 12)
+ #pragma GCC diagnostic ignored "-Wreserved-id-macro"
+ #endif
+#define _GNU_SOURCE
+#endif // !defined(_GNU_SOURCE)
+#endif // Z7_AFFINITY_DISABLE
+#endif // __APPLE__
+
+#include "Threads.h"
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#ifdef Z7_AFFINITY_SUPPORTED
+// #include <sched.h>
+#endif
+
+
+// #include <stdio.h>
+// #define PRF(p) p
+#define PRF(p)
+#define Print(s) PRF(printf("\n%s\n", s);)
+
+WRes Thread_Create_With_CpuSet(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, const CCpuSet *cpuSet)
+{
+ // new thread in Posix probably inherits affinity from parrent thread
+ Print("Thread_Create_With_CpuSet")
+
+ pthread_attr_t attr;
+ int ret;
+ // int ret2;
+
+ p->_created = 0;
+
+ RINOK(pthread_attr_init(&attr))
+
+ ret = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
+
+ if (!ret)
+ {
+ if (cpuSet)
+ {
+ #ifdef Z7_AFFINITY_SUPPORTED
+
+ /*
+ printf("\n affinity :");
+ unsigned i;
+ for (i = 0; i < sizeof(*cpuSet) && i < 8; i++)
+ {
+ Byte b = *((const Byte *)cpuSet + i);
+ char temp[32];
+ #define GET_HEX_CHAR(t) ((char)(((t < 10) ? ('0' + t) : ('A' + (t - 10)))))
+ temp[0] = GET_HEX_CHAR((b & 0xF));
+ temp[1] = GET_HEX_CHAR((b >> 4));
+ // temp[0] = GET_HEX_CHAR((b >> 4)); // big-endian
+ // temp[1] = GET_HEX_CHAR((b & 0xF)); // big-endian
+ temp[2] = 0;
+ printf("%s", temp);
+ }
+ printf("\n");
+ */
+
+ // ret2 =
+ pthread_attr_setaffinity_np(&attr, sizeof(*cpuSet), cpuSet);
+ // if (ret2) ret = ret2;
+ #endif
+ }
+
+ ret = pthread_create(&p->_tid, &attr, func, param);
+
+ if (!ret)
+ {
+ p->_created = 1;
+ /*
+ if (cpuSet)
+ {
+ // ret2 =
+ pthread_setaffinity_np(p->_tid, sizeof(*cpuSet), cpuSet);
+ // if (ret2) ret = ret2;
+ }
+ */
+ }
+ }
+ // ret2 =
+ pthread_attr_destroy(&attr);
+ // if (ret2 != 0) ret = ret2;
+ return ret;
+}
+
+
+WRes Thread_Create(CThread *p, THREAD_FUNC_TYPE func, LPVOID param)
+{
+ return Thread_Create_With_CpuSet(p, func, param, NULL);
+}
+
+
+WRes Thread_Create_With_Affinity(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, CAffinityMask affinity)
+{
+ Print("Thread_Create_WithAffinity")
+ CCpuSet cs;
+ unsigned i;
+ CpuSet_Zero(&cs);
+ for (i = 0; i < sizeof(affinity) * 8; i++)
+ {
+ if (affinity == 0)
+ break;
+ if (affinity & 1)
+ {
+ CpuSet_Set(&cs, i);
+ }
+ affinity >>= 1;
+ }
+ return Thread_Create_With_CpuSet(p, func, param, &cs);
+}
+
+
+WRes Thread_Close(CThread *p)
+{
+ // Print("Thread_Close")
+ int ret;
+ if (!p->_created)
+ return 0;
+
+ ret = pthread_detach(p->_tid);
+ p->_tid = 0;
+ p->_created = 0;
+ return ret;
+}
+
+
+WRes Thread_Wait_Close(CThread *p)
+{
+ // Print("Thread_Wait_Close")
+ void *thread_return;
+ int ret;
+ if (!p->_created)
+ return EINVAL;
+
+ ret = pthread_join(p->_tid, &thread_return);
+ // probably we can't use that (_tid) after pthread_join(), so we close thread here
+ p->_created = 0;
+ p->_tid = 0;
+ return ret;
+}
+
+
+
+static WRes Event_Create(CEvent *p, int manualReset, int signaled)
+{
+ RINOK(pthread_mutex_init(&p->_mutex, NULL))
+ RINOK(pthread_cond_init(&p->_cond, NULL))
+ p->_manual_reset = manualReset;
+ p->_state = (signaled ? True : False);
+ p->_created = 1;
+ return 0;
+}
+
+WRes ManualResetEvent_Create(CManualResetEvent *p, int signaled)
+ { return Event_Create(p, True, signaled); }
+WRes ManualResetEvent_CreateNotSignaled(CManualResetEvent *p)
+ { return ManualResetEvent_Create(p, 0); }
+WRes AutoResetEvent_Create(CAutoResetEvent *p, int signaled)
+ { return Event_Create(p, False, signaled); }
+WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *p)
+ { return AutoResetEvent_Create(p, 0); }
+
+
+WRes Event_Set(CEvent *p)
+{
+ RINOK(pthread_mutex_lock(&p->_mutex))
+ p->_state = True;
+ int res1 = pthread_cond_broadcast(&p->_cond);
+ int res2 = pthread_mutex_unlock(&p->_mutex);
+ return (res2 ? res2 : res1);
+}
+
+WRes Event_Reset(CEvent *p)
+{
+ RINOK(pthread_mutex_lock(&p->_mutex))
+ p->_state = False;
+ return pthread_mutex_unlock(&p->_mutex);
+}
+
+WRes Event_Wait(CEvent *p)
+{
+ RINOK(pthread_mutex_lock(&p->_mutex))
+ while (p->_state == False)
+ {
+ // ETIMEDOUT
+ // ret =
+ pthread_cond_wait(&p->_cond, &p->_mutex);
+ // if (ret != 0) break;
+ }
+ if (p->_manual_reset == False)
+ {
+ p->_state = False;
+ }
+ return pthread_mutex_unlock(&p->_mutex);
+}
+
+WRes Event_Close(CEvent *p)
+{
+ if (!p->_created)
+ return 0;
+ p->_created = 0;
+ {
+ int res1 = pthread_mutex_destroy(&p->_mutex);
+ int res2 = pthread_cond_destroy(&p->_cond);
+ return (res1 ? res1 : res2);
+ }
+}
+
+
+WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount)
+{
+ if (initCount > maxCount || maxCount < 1)
+ return EINVAL;
+ RINOK(pthread_mutex_init(&p->_mutex, NULL))
+ RINOK(pthread_cond_init(&p->_cond, NULL))
+ p->_count = initCount;
+ p->_maxCount = maxCount;
+ p->_created = 1;
+ return 0;
+}
+
+
+WRes Semaphore_OptCreateInit(CSemaphore *p, UInt32 initCount, UInt32 maxCount)
+{
+ if (Semaphore_IsCreated(p))
+ {
+ /*
+ WRes wres = Semaphore_Close(p);
+ if (wres != 0)
+ return wres;
+ */
+ if (initCount > maxCount || maxCount < 1)
+ return EINVAL;
+ // return EINVAL; // for debug
+ p->_count = initCount;
+ p->_maxCount = maxCount;
+ return 0;
+ }
+ return Semaphore_Create(p, initCount, maxCount);
+}
+
+
+WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 releaseCount)
+{
+ UInt32 newCount;
+ int ret;
+
+ if (releaseCount < 1)
+ return EINVAL;
+
+ RINOK(pthread_mutex_lock(&p->_mutex))
+
+ newCount = p->_count + releaseCount;
+ if (newCount > p->_maxCount)
+ ret = ERROR_TOO_MANY_POSTS; // EINVAL;
+ else
+ {
+ p->_count = newCount;
+ ret = pthread_cond_broadcast(&p->_cond);
+ }
+ RINOK(pthread_mutex_unlock(&p->_mutex))
+ return ret;
+}
+
+WRes Semaphore_Wait(CSemaphore *p)
+{
+ RINOK(pthread_mutex_lock(&p->_mutex))
+ while (p->_count < 1)
+ {
+ pthread_cond_wait(&p->_cond, &p->_mutex);
+ }
+ p->_count--;
+ return pthread_mutex_unlock(&p->_mutex);
+}
+
+WRes Semaphore_Close(CSemaphore *p)
+{
+ if (!p->_created)
+ return 0;
+ p->_created = 0;
+ {
+ int res1 = pthread_mutex_destroy(&p->_mutex);
+ int res2 = pthread_cond_destroy(&p->_cond);
+ return (res1 ? res1 : res2);
+ }
+}
+
+
+
+WRes CriticalSection_Init(CCriticalSection *p)
+{
+ // Print("CriticalSection_Init")
+ if (!p)
+ return EINTR;
+ return pthread_mutex_init(&p->_mutex, NULL);
+}
+
+void CriticalSection_Enter(CCriticalSection *p)
+{
+ // Print("CriticalSection_Enter")
+ if (p)
+ {
+ // int ret =
+ pthread_mutex_lock(&p->_mutex);
+ }
+}
+
+void CriticalSection_Leave(CCriticalSection *p)
+{
+ // Print("CriticalSection_Leave")
+ if (p)
+ {
+ // int ret =
+ pthread_mutex_unlock(&p->_mutex);
+ }
+}
+
+void CriticalSection_Delete(CCriticalSection *p)
+{
+ // Print("CriticalSection_Delete")
+ if (p)
+ {
+ // int ret =
+ pthread_mutex_destroy(&p->_mutex);
+ }
+}
+
+LONG InterlockedIncrement(LONG volatile *addend)
+{
+ // Print("InterlockedIncrement")
+ #ifdef USE_HACK_UNSAFE_ATOMIC
+ LONG val = *addend + 1;
+ *addend = val;
+ return val;
+ #else
+
+ #if defined(__clang__) && (__clang_major__ >= 8)
+ #pragma GCC diagnostic ignored "-Watomic-implicit-seq-cst"
+ #endif
+ return __sync_add_and_fetch(addend, 1);
+ #endif
+}
+
+#endif // _WIN32
+
+WRes AutoResetEvent_OptCreate_And_Reset(CAutoResetEvent *p)
+{
+ if (Event_IsCreated(p))
+ return Event_Reset(p);
+ return AutoResetEvent_CreateNotSignaled(p);
+}
+
+#undef PRF
+#undef Print
diff --git a/src/Common/lzma/Threads.h b/src/Common/lzma/Threads.h
new file mode 100644
index 00000000..4028464a
--- /dev/null
+++ b/src/Common/lzma/Threads.h
@@ -0,0 +1,240 @@
+/* Threads.h -- multithreading library
+2023-04-02 : Igor Pavlov : Public domain */
+
+#ifndef ZIP7_INC_THREADS_H
+#define ZIP7_INC_THREADS_H
+
+#ifdef _WIN32
+#include "7zWindows.h"
+
+#else
+
+#if defined(__linux__)
+#if !defined(__APPLE__) && !defined(_AIX) && !defined(__ANDROID__)
+#ifndef Z7_AFFINITY_DISABLE
+#define Z7_AFFINITY_SUPPORTED
+// #pragma message(" ==== Z7_AFFINITY_SUPPORTED")
+// #define _GNU_SOURCE
+#endif
+#endif
+#endif
+
+#include <pthread.h>
+
+#endif
+
+#include "7zTypes.h"
+
+EXTERN_C_BEGIN
+
+#ifdef _WIN32
+
+WRes HandlePtr_Close(HANDLE *h);
+WRes Handle_WaitObject(HANDLE h);
+
+typedef HANDLE CThread;
+
+#define Thread_CONSTRUCT(p) { *(p) = NULL; }
+#define Thread_WasCreated(p) (*(p) != NULL)
+#define Thread_Close(p) HandlePtr_Close(p)
+// #define Thread_Wait(p) Handle_WaitObject(*(p))
+
+#ifdef UNDER_CE
+ // if (USE_THREADS_CreateThread is defined), we use _beginthreadex()
+ // if (USE_THREADS_CreateThread is not definned), we use CreateThread()
+ #define USE_THREADS_CreateThread
+#endif
+
+typedef
+ #ifdef USE_THREADS_CreateThread
+ DWORD
+ #else
+ unsigned
+ #endif
+ THREAD_FUNC_RET_TYPE;
+
+#define THREAD_FUNC_RET_ZERO 0
+
+typedef DWORD_PTR CAffinityMask;
+typedef DWORD_PTR CCpuSet;
+
+#define CpuSet_Zero(p) *(p) = (0)
+#define CpuSet_Set(p, cpu) *(p) |= ((DWORD_PTR)1 << (cpu))
+
+#else // _WIN32
+
+typedef struct
+{
+ pthread_t _tid;
+ int _created;
+} CThread;
+
+#define Thread_CONSTRUCT(p) { (p)->_tid = 0; (p)->_created = 0; }
+#define Thread_WasCreated(p) ((p)->_created != 0)
+WRes Thread_Close(CThread *p);
+// #define Thread_Wait Thread_Wait_Close
+
+typedef void * THREAD_FUNC_RET_TYPE;
+#define THREAD_FUNC_RET_ZERO NULL
+
+
+typedef UInt64 CAffinityMask;
+
+#ifdef Z7_AFFINITY_SUPPORTED
+
+typedef cpu_set_t CCpuSet;
+#define CpuSet_Zero(p) CPU_ZERO(p)
+#define CpuSet_Set(p, cpu) CPU_SET(cpu, p)
+#define CpuSet_IsSet(p, cpu) CPU_ISSET(cpu, p)
+
+#else
+
+typedef UInt64 CCpuSet;
+#define CpuSet_Zero(p) *(p) = (0)
+#define CpuSet_Set(p, cpu) *(p) |= ((UInt64)1 << (cpu))
+#define CpuSet_IsSet(p, cpu) ((*(p) & ((UInt64)1 << (cpu))) != 0)
+
+#endif
+
+
+#endif // _WIN32
+
+
+#define THREAD_FUNC_CALL_TYPE Z7_STDCALL
+
+#if defined(_WIN32) && defined(__GNUC__)
+/* GCC compiler for x86 32-bit uses the rule:
+ the stack is 16-byte aligned before CALL instruction for function calling.
+ But only root function main() contains instructions that
+ set 16-byte alignment for stack pointer. And another functions
+ just keep alignment, if it was set in some parent function.
+
+ The problem:
+ if we create new thread in MinGW (GCC) 32-bit x86 via _beginthreadex() or CreateThread(),
+ the root function of thread doesn't set 16-byte alignment.
+ And stack frames in all child functions also will be unaligned in that case.
+
+ Here we set (force_align_arg_pointer) attribute for root function of new thread.
+ Do we need (force_align_arg_pointer) also for another systems? */
+
+ #define THREAD_FUNC_ATTRIB_ALIGN_ARG __attribute__((force_align_arg_pointer))
+ // #define THREAD_FUNC_ATTRIB_ALIGN_ARG // for debug : bad alignment in SSE functions
+#else
+ #define THREAD_FUNC_ATTRIB_ALIGN_ARG
+#endif
+
+#define THREAD_FUNC_DECL THREAD_FUNC_ATTRIB_ALIGN_ARG THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE
+
+typedef THREAD_FUNC_RET_TYPE (THREAD_FUNC_CALL_TYPE * THREAD_FUNC_TYPE)(void *);
+WRes Thread_Create(CThread *p, THREAD_FUNC_TYPE func, LPVOID param);
+WRes Thread_Create_With_Affinity(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, CAffinityMask affinity);
+WRes Thread_Wait_Close(CThread *p);
+
+#ifdef _WIN32
+#define Thread_Create_With_CpuSet(p, func, param, cs) \
+ Thread_Create_With_Affinity(p, func, param, *cs)
+#else
+WRes Thread_Create_With_CpuSet(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, const CCpuSet *cpuSet);
+#endif
+
+
+#ifdef _WIN32
+
+typedef HANDLE CEvent;
+typedef CEvent CAutoResetEvent;
+typedef CEvent CManualResetEvent;
+#define Event_Construct(p) *(p) = NULL
+#define Event_IsCreated(p) (*(p) != NULL)
+#define Event_Close(p) HandlePtr_Close(p)
+#define Event_Wait(p) Handle_WaitObject(*(p))
+WRes Event_Set(CEvent *p);
+WRes Event_Reset(CEvent *p);
+WRes ManualResetEvent_Create(CManualResetEvent *p, int signaled);
+WRes ManualResetEvent_CreateNotSignaled(CManualResetEvent *p);
+WRes AutoResetEvent_Create(CAutoResetEvent *p, int signaled);
+WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *p);
+
+typedef HANDLE CSemaphore;
+#define Semaphore_Construct(p) *(p) = NULL
+#define Semaphore_IsCreated(p) (*(p) != NULL)
+#define Semaphore_Close(p) HandlePtr_Close(p)
+#define Semaphore_Wait(p) Handle_WaitObject(*(p))
+WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount);
+WRes Semaphore_OptCreateInit(CSemaphore *p, UInt32 initCount, UInt32 maxCount);
+WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 num);
+WRes Semaphore_Release1(CSemaphore *p);
+
+typedef CRITICAL_SECTION CCriticalSection;
+WRes CriticalSection_Init(CCriticalSection *p);
+#define CriticalSection_Delete(p) DeleteCriticalSection(p)
+#define CriticalSection_Enter(p) EnterCriticalSection(p)
+#define CriticalSection_Leave(p) LeaveCriticalSection(p)
+
+
+#else // _WIN32
+
+typedef struct _CEvent
+{
+ int _created;
+ int _manual_reset;
+ int _state;
+ pthread_mutex_t _mutex;
+ pthread_cond_t _cond;
+} CEvent;
+
+typedef CEvent CAutoResetEvent;
+typedef CEvent CManualResetEvent;
+
+#define Event_Construct(p) (p)->_created = 0
+#define Event_IsCreated(p) ((p)->_created)
+
+WRes ManualResetEvent_Create(CManualResetEvent *p, int signaled);
+WRes ManualResetEvent_CreateNotSignaled(CManualResetEvent *p);
+WRes AutoResetEvent_Create(CAutoResetEvent *p, int signaled);
+WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *p);
+
+WRes Event_Set(CEvent *p);
+WRes Event_Reset(CEvent *p);
+WRes Event_Wait(CEvent *p);
+WRes Event_Close(CEvent *p);
+
+
+typedef struct _CSemaphore
+{
+ int _created;
+ UInt32 _count;
+ UInt32 _maxCount;
+ pthread_mutex_t _mutex;
+ pthread_cond_t _cond;
+} CSemaphore;
+
+#define Semaphore_Construct(p) (p)->_created = 0
+#define Semaphore_IsCreated(p) ((p)->_created)
+
+WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount);
+WRes Semaphore_OptCreateInit(CSemaphore *p, UInt32 initCount, UInt32 maxCount);
+WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 num);
+#define Semaphore_Release1(p) Semaphore_ReleaseN(p, 1)
+WRes Semaphore_Wait(CSemaphore *p);
+WRes Semaphore_Close(CSemaphore *p);
+
+
+typedef struct _CCriticalSection
+{
+ pthread_mutex_t _mutex;
+} CCriticalSection;
+
+WRes CriticalSection_Init(CCriticalSection *p);
+void CriticalSection_Delete(CCriticalSection *cs);
+void CriticalSection_Enter(CCriticalSection *cs);
+void CriticalSection_Leave(CCriticalSection *cs);
+
+LONG InterlockedIncrement(LONG volatile *addend);
+
+#endif // _WIN32
+
+WRes AutoResetEvent_OptCreate_And_Reset(CAutoResetEvent *p);
+
+EXTERN_C_END
+
+#endif
diff --git a/src/Common/lzma/lzma-history.txt b/src/Common/lzma/lzma-history.txt
new file mode 100644
index 00000000..a151c4b9
--- /dev/null
+++ b/src/Common/lzma/lzma-history.txt
@@ -0,0 +1,560 @@
+HISTORY of the LZMA SDK
+-----------------------
+
+23.01 2023-06-20
+-------------------------
+- 7-Zip now can use new ARM64 filter for compression to 7z and xz archives.
+ ARM64 filter can increase compression ratio for data containing executable
+ files compiled for ARM64 (AArch64) architecture.
+ Also 7-Zip now parses executable files (that have exe and dll filename extensions)
+ before compressing, and it selects appropriate filter for each parsed file:
+ - BCJ or BCJ2 filter for x86 executable files,
+ - ARM64 filter for ARM64 executable files.
+ Previous versions by default used x86 filter BCJ or BCJ2 for all exe/dll files.
+- Default section size for BCJ2 filter was changed from 64 MiB to 240 MiB.
+ It can increase compression ratio for executable files larger than 64 MiB.
+- Some optimizations in filters code: BCJ, BCJ2, Swap* and opthers.
+- If 7-Zip uses BCJ2 filter for big datasets compressing, it can use additional temp
+ files in system's TEMP folder. 7-Zip uses temp file for additional compressed
+ data stream, if size of such compressed stream is larger than predefined limit:
+ 16 MiB in 32-bit version, 4 GiB in 64-bit version.
+- When new 7-Zip creates multivolume archive, 7-Zip keeps in open state
+ only volumes that still can be changed. Previous versions kept all volumes
+ in open state until the end of the archive creation.
+- 7-Zip for Linux and macOS now can reduce the number of simultaneously open files,
+ when 7-Zip opens, extracts or creates multivolume archive. It allows to avoid
+ the failures for cases with big number of volumes, bacause there is a limitation
+ for number of open files allowed for a single program in Linux and macOS.
+- Some bugs were fixed.
+- Source code changes:
+- All external macros for compiling C/C++ code of 7-Zip now have Z7_ prefix.
+- 7-Zip COM interfaces now use new macros that allow to declare and implement COM interface.
+- The code has been modified to compile with the maximum diagnostic warning level:
+ -Wall in MSVC and -Weverything in CLANG.
+ And some warning types are disabled in 2 files:
+ - C/Compiler.h for C/C++ code warnings.
+ - CPP/Common/Common.h for C++ code warnings.
+- Linux/macOS versions of 7-Zip: IUnknown interface in new code doesn't use
+ virtual destructor that was used in previous 7-Zip and p7zip:
+ // virtual ~IUnknown() {}
+ So 7-Zip's dynamically linked shared libraries (codecs) are not compatible
+ between new 7-Zip for Linux/macOS and old 7-Zip (and p7zip).
+
+
+21.07 2021-12-26
+-------------------------
+- New switches: -spm and -im!{file_path} to exclude directories from processing
+ for specified paths that don't contain path separator character at the end of path.
+- The sorting order of files in archives was slightly changed to be more consistent
+ for cases where the name of some directory is the same as the prefix part of the name
+ of another directory or file.
+
+
+21.06 2021-11-24
+-------------------------
+- Bug in LZMA encoder in file LzmaEnc.c was fixed:
+ LzmaEnc_MemEncode(), LzmaEncode() and LzmaCompress() could work incorrectly,
+ if size value for output buffer is smaller than size required for all compressed data.
+ LzmaEnc_Encode() could work incorrectly,
+ if callback ISeqOutStream::Write() doesn't write all compressed data.
+ NCompress::NLzma::CEncoder::Code() could work incorrectly,
+ if callback ISequentialOutStream::Write() returns error code.
+- Bug in versions 21.00-21.05 was fixed:
+ 7-Zip didn't set attributes of directories during archive extracting.
+
+
+21.04 beta 2021-11-02
+-------------------------
+- 7-Zip now reduces the number of working CPU threads for compression,
+ if RAM size is not enough for compression with big LZMA2 dictionary.
+- 7-Zip now can create and check "file.sha256" text files that contain the list
+ of file names and SHA-256 checksums in format compatible with sha256sum program.
+
+
+21.03 beta 2021-07-20
+-------------------------
+- The maximum dictionary size for LZMA/LZMA2 compressing was increased to 4 GB (3840 MiB).
+- Minor speed optimizations in LZMA/LZMA2 compressing.
+
+
+21.02 alpha 2021-05-06
+-------------------------
+- The command line version of 7-Zip for macOS was released.
+- The speed for LZMA and LZMA2 decompression in arm64 versions for macOS and Linux
+ was increased by 20%-60%.
+
+
+21.01 alpha 2021-03-09
+-------------------------
+- The command line version of 7-Zip for Linux was released.
+- The improvements for speed of ARM64 version using hardware CPU instructions
+ for AES, CRC-32, SHA-1 and SHA-256.
+- Some bugs were fixed.
+
+
+20.02 alpha 2020-08-08
+-------------------------
+- The default number of LZMA2 chunks per solid block in 7z archive was increased to 64.
+ It allows to increase the compression speed for big 7z archives, if there is a big number
+ of CPU cores and threads.
+- The speed of PPMd compressing/decompressing was increased for 7z archives.
+- The new -ssp switch. If the switch -ssp is specified, 7-Zip doesn't allow the system
+ to modify "Last Access Time" property of source files for archiving and hashing operations.
+- Some bugs were fixed.
+
+
+20.00 alpha 2020-02-06
+-------------------------
+- 7-Zip now supports new optional match finders for LZMA/LZMA2 compression: bt5 and hc5,
+ that can work faster than bt4 and hc4 match finders for the data with big redundancy.
+- The compression ratio was improved for Fast and Fastest compression levels with the
+ following default settings:
+ - Fastest level (-mx1) : hc5 match finder with 256 KB dictionary.
+ - Fast level (-mx3) : hc5 match finder with 4 MB dictionary.
+- Minor speed optimizations in multithreaded LZMA/LZMA2 compression for Normal/Maximum/Ultra
+ compression levels.
+
+
+19.00 2019-02-21
+-------------------------
+- Encryption strength for 7z archives was increased:
+ the size of random initialization vector was increased from 64-bit to 128-bit,
+ and the pseudo-random number generator was improved.
+- The bug in 7zIn.c code was fixed.
+
+
+18.06 2018-12-30
+-------------------------
+- The speed for LZMA/LZMA2 compressing was increased by 3-10%,
+ and there are minor changes in compression ratio.
+- Some bugs were fixed.
+- The bug in 7-Zip 18.02-18.05 was fixed:
+ There was memory leak in multithreading xz decoder - XzDecMt_Decode(),
+ if xz stream contains only one block.
+- The changes for MSVS compiler makefiles:
+ - the makefiles now use "PLATFORM" macroname with values (x64, x86, arm64)
+ instead of "CPU" macroname with values (AMD64, ARM64).
+ - the makefiles by default now use static version of the run-time library.
+
+
+18.05 2018-04-30
+-------------------------
+- The speed for LZMA/LZMA2 compressing was increased
+ by 8% for fastest/fast compression levels and
+ by 3% for normal/maximum compression levels.
+- Previous versions of 7-Zip could work incorrectly in "Large memory pages" mode in
+ Windows 10 because of some BUG with "Large Pages" in Windows 10.
+ Now 7-Zip doesn't use "Large Pages" on Windows 10 up to revision 1709 (16299).
+- The BUG was fixed in Lzma2Enc.c
+ Lzma2Enc_Encode2() function worked incorretly,
+ if (inStream == NULL) and the number of block threads is more than 1.
+
+
+18.03 beta 2018-03-04
+-------------------------
+- Asm\x86\LzmaDecOpt.asm: new optimized LZMA decoder written in asm
+ for x64 with about 30% higher speed than main version of LZMA decoder written in C.
+- The speed for single-thread LZMA/LZMA2 decoder written in C was increased by 3%.
+- 7-Zip now can use multi-threading for 7z/LZMA2 decoding,
+ if there are multiple independent data chunks in LZMA2 stream.
+- 7-Zip now can use multi-threading for xz decoding,
+ if there are multiple blocks in xz stream.
+
+
+18.01 2019-01-28
+-------------------------
+- The BUG in 17.01 - 18.00 beta was fixed:
+ XzDec.c : random block unpacking and XzUnpacker_IsBlockFinished()
+ didn't work correctly for xz archives without checksum (CRC).
+
+
+18.00 beta 2019-01-10
+-------------------------
+- The BUG in xz encoder was fixed:
+ There was memory leak of 16 KB for each file compressed with
+ xz compression method, if additional filter was used.
+
+
+17.01 beta 2017-08-28
+-------------------------
+- Minor speed optimization for LZMA2 (xz and 7z) multi-threading compression.
+ 7-Zip now uses additional memory buffers for multi-block LZMA2 compression.
+ CPU utilization was slightly improved.
+- 7-zip now creates multi-block xz archives by default. Block size can be
+ specified with -ms[Size]{m|g} switch.
+- xz decoder now can unpack random block from multi-block xz archives.
+- 7-Zip command line: @listfile now doesn't work after -- switch.
+ Use -i@listfile before -- switch instead.
+- The BUGs were fixed:
+ 7-Zip 17.00 beta crashed for commands that write anti-item to 7z archive.
+
+
+17.00 beta 2017-04-29
+-------------------------
+- NewHandler.h / NewHandler.cpp:
+ now it redefines operator new() only for old MSVC compilers (_MSC_VER < 1900).
+- C/7zTypes.h : the names of variables in interface structures were changed (vt).
+- Some bugs were fixed. 7-Zip could crash in some cases.
+- Some internal changes in code.
+
+
+16.04 2016-10-04
+-------------------------
+- The bug was fixed in DllSecur.c.
+
+
+16.03 2016-09-28
+-------------------------
+- SFX modules now use some protection against DLL preloading attack.
+- Some bugs in 7z code were fixed.
+
+
+16.02 2016-05-21
+-------------------------
+- The BUG in 16.00 - 16.01 was fixed:
+ Split Handler (SplitHandler.cpp) returned incorrect
+ total size value (kpidSize) for split archives.
+
+
+16.01 2016-05-19
+-------------------------
+- Some internal changes to reduce the number of compiler warnings.
+
+
+16.00 2016-05-10
+-------------------------
+- Some bugs were fixed.
+
+
+15.12 2015-11-19
+-------------------------
+- The BUG in C version of 7z decoder was fixed:
+ 7zDec.c : SzDecodeLzma2()
+ 7z decoder could mistakenly report about decoding error for some 7z archives
+ that use LZMA2 compression method.
+ The probability to get that mistaken decoding error report was about
+ one error per 16384 solid blocks for solid blocks larger than 16 KB (compressed size).
+- The BUG (in 9.26-15.11) in C version of 7z decoder was fixed:
+ 7zArcIn.c : SzReadHeader2()
+ 7z decoder worked incorrectly for 7z archives that contain
+ empty solid blocks, that can be placed to 7z archive, if some file is
+ unavailable for reading during archive creation.
+
+
+15.09 beta 2015-10-16
+-------------------------
+- The BUG in LZMA / LZMA2 encoding code was fixed.
+ The BUG in LzFind.c::MatchFinder_ReadBlock() function.
+ If input data size is larger than (4 GiB - dictionary_size),
+ the following code worked incorrectly:
+ - LZMA : LzmaEnc_MemEncode(), LzmaEncode() : LZMA encoding functions
+ for compressing from memory to memory.
+ That BUG is not related to LZMA encoder version that works via streams.
+ - LZMA2 : multi-threaded version of LZMA2 encoder worked incorrectly, if
+ default value of chunk size (CLzma2EncProps::blockSize) is changed
+ to value larger than (4 GiB - dictionary_size).
+
+
+9.38 beta 2015-01-03
+-------------------------
+- The BUG in 9.31-9.37 was fixed:
+ IArchiveGetRawProps interface was disabled for 7z archives.
+- The BUG in 9.26-9.36 was fixed:
+ Some code in CPP\7zip\Archive\7z\ worked correctly only under Windows.
+
+
+9.36 beta 2014-12-26
+-------------------------
+- The BUG in command line version was fixed:
+ 7-Zip created temporary archive in current folder during update archive
+ operation, if -w{Path} switch was not specified.
+ The fixed 7-Zip creates temporary archive in folder that contains updated archive.
+- The BUG in 9.33-9.35 was fixed:
+ 7-Zip silently ignored file reading errors during 7z or gz archive creation,
+ and the created archive contained only part of file that was read before error.
+ The fixed 7-Zip stops archive creation and it reports about error.
+
+
+9.35 beta 2014-12-07
+-------------------------
+- 7zr.exe now support AES encryption.
+- SFX mudules were added to LZMA SDK
+- Some bugs were fixed.
+
+
+9.21 beta 2011-04-11
+-------------------------
+- New class FString for file names at file systems.
+- Speed optimization in CRC code for big-endian CPUs.
+- The BUG in Lzma2Dec.c was fixed:
+ Lzma2Decode function didn't work.
+
+
+9.18 beta 2010-11-02
+-------------------------
+- New small SFX module for installers (SfxSetup).
+
+
+9.12 beta 2010-03-24
+-------------------------
+- The BUG in LZMA SDK 9.* was fixed: LZMA2 codec didn't work,
+ if more than 10 threads were used (or more than 20 threads in some modes).
+
+
+9.11 beta 2010-03-15
+-------------------------
+- PPMd compression method support
+
+
+9.09 2009-12-12
+-------------------------
+- The bug was fixed:
+ Utf16_To_Utf8 funstions in UTFConvert.cpp and 7zMain.c
+ incorrectly converted surrogate characters (the code >= 0x10000) to UTF-8.
+- Some bugs were fixed
+
+
+9.06 2009-08-17
+-------------------------
+- Some changes in ANSI-C 7z Decoder interfaces.
+
+
+9.04 2009-05-30
+-------------------------
+- LZMA2 compression method support
+- xz format support
+
+
+4.65 2009-02-03
+-------------------------
+- Some minor fixes
+
+
+4.63 2008-12-31
+-------------------------
+- Some minor fixes
+
+
+4.61 beta 2008-11-23
+-------------------------
+- The bug in ANSI-C LZMA Decoder was fixed:
+ If encoded stream was corrupted, decoder could access memory
+ outside of allocated range.
+- Some changes in ANSI-C 7z Decoder interfaces.
+- LZMA SDK is placed in the public domain.
+
+
+4.60 beta 2008-08-19
+-------------------------
+- Some minor fixes.
+
+
+4.59 beta 2008-08-13
+-------------------------
+- The bug was fixed:
+ LZMA Encoder in fast compression mode could access memory outside of
+ allocated range in some rare cases.
+
+
+4.58 beta 2008-05-05
+-------------------------
+- ANSI-C LZMA Decoder was rewritten for speed optimizations.
+- ANSI-C LZMA Encoder was included to LZMA SDK.
+- C++ LZMA code now is just wrapper over ANSI-C code.
+
+
+4.57 2007-12-12
+-------------------------
+- Speed optimizations in Ñ++ LZMA Decoder.
+- Small changes for more compatibility with some C/C++ compilers.
+
+
+4.49 beta 2007-07-05
+-------------------------
+- .7z ANSI-C Decoder:
+ - now it supports BCJ and BCJ2 filters
+ - now it supports files larger than 4 GB.
+ - now it supports "Last Write Time" field for files.
+- C++ code for .7z archives compressing/decompressing from 7-zip
+ was included to LZMA SDK.
+
+
+4.43 2006-06-04
+-------------------------
+- Small changes for more compatibility with some C/C++ compilers.
+
+
+4.42 2006-05-15
+-------------------------
+- Small changes in .h files in ANSI-C version.
+
+
+4.39 beta 2006-04-14
+-------------------------
+- The bug in versions 4.33b:4.38b was fixed:
+ C++ version of LZMA encoder could not correctly compress
+ files larger than 2 GB with HC4 match finder (-mfhc4).
+
+
+4.37 beta 2005-04-06
+-------------------------
+- Fixes in C++ code: code could no be compiled if _NO_EXCEPTIONS was defined.
+
+
+4.35 beta 2005-03-02
+-------------------------
+- The bug was fixed in C++ version of LZMA Decoder:
+ If encoded stream was corrupted, decoder could access memory
+ outside of allocated range.
+
+
+4.34 beta 2006-02-27
+-------------------------
+- Compressing speed and memory requirements for compressing were increased
+- LZMA now can use only these match finders: HC4, BT2, BT3, BT4
+
+
+4.32 2005-12-09
+-------------------------
+- Java version of LZMA SDK was included
+
+
+4.30 2005-11-20
+-------------------------
+- Compression ratio was improved in -a2 mode
+- Speed optimizations for compressing in -a2 mode
+- -fb switch now supports values up to 273
+- The bug in 7z_C (7zIn.c) was fixed:
+ It used Alloc/Free functions from different memory pools.
+ So if program used two memory pools, it worked incorrectly.
+- 7z_C: .7z format supporting was improved
+- LZMA# SDK (C#.NET version) was included
+
+
+4.27 (Updated) 2005-09-21
+-------------------------
+- Some GUIDs/interfaces in C++ were changed.
+ IStream.h:
+ ISequentialInStream::Read now works as old ReadPart
+ ISequentialOutStream::Write now works as old WritePart
+
+
+4.27 2005-08-07
+-------------------------
+- The bug in LzmaDecodeSize.c was fixed:
+ if _LZMA_IN_CB and _LZMA_OUT_READ were defined,
+ decompressing worked incorrectly.
+
+
+4.26 2005-08-05
+-------------------------
+- Fixes in 7z_C code and LzmaTest.c:
+ previous versions could work incorrectly,
+ if malloc(0) returns 0
+
+
+4.23 2005-06-29
+-------------------------
+- Small fixes in C++ code
+
+
+4.22 2005-06-10
+-------------------------
+- Small fixes
+
+
+4.21 2005-06-08
+-------------------------
+- Interfaces for ANSI-C LZMA Decoder (LzmaDecode.c) were changed
+- New additional version of ANSI-C LZMA Decoder with zlib-like interface:
+ - LzmaStateDecode.h
+ - LzmaStateDecode.c
+ - LzmaStateTest.c
+- ANSI-C LZMA Decoder now can decompress files larger than 4 GB
+
+
+4.17 2005-04-18
+-------------------------
+- New example for RAM->RAM compressing/decompressing:
+ LZMA + BCJ (filter for x86 code):
+ - LzmaRam.h
+ - LzmaRam.cpp
+ - LzmaRamDecode.h
+ - LzmaRamDecode.c
+ - -f86 switch for lzma.exe
+
+
+4.16 2005-03-29
+-------------------------
+- The bug was fixed in LzmaDecode.c (ANSI-C LZMA Decoder):
+ If _LZMA_OUT_READ was defined, and if encoded stream was corrupted,
+ decoder could access memory outside of allocated range.
+- Speed optimization of ANSI-C LZMA Decoder (now it's about 20% faster).
+ Old version of LZMA Decoder now is in file LzmaDecodeSize.c.
+ LzmaDecodeSize.c can provide slightly smaller code than LzmaDecode.c
+- Small speed optimization in LZMA C++ code
+- filter for SPARC's code was added
+- Simplified version of .7z ANSI-C Decoder was included
+
+
+4.06 2004-09-05
+-------------------------
+- The bug in v4.05 was fixed:
+ LZMA-Encoder didn't release output stream in some cases.
+
+
+4.05 2004-08-25
+-------------------------
+- Source code of filters for x86, IA-64, ARM, ARM-Thumb
+ and PowerPC code was included to SDK
+- Some internal minor changes
+
+
+4.04 2004-07-28
+-------------------------
+- More compatibility with some C++ compilers
+
+
+4.03 2004-06-18
+-------------------------
+- "Benchmark" command was added. It measures compressing
+ and decompressing speed and shows rating values.
+ Also it checks hardware errors.
+
+
+4.02 2004-06-10
+-------------------------
+- C++ LZMA Encoder/Decoder code now is more portable
+ and it can be compiled by GCC on Linux.
+
+
+4.01 2004-02-15
+-------------------------
+- Some detection of data corruption was enabled.
+ LzmaDecode.c / RangeDecoderReadByte
+ .....
+ {
+ rd->ExtraBytes = 1;
+ return 0xFF;
+ }
+
+
+4.00 2004-02-13
+-------------------------
+- Original version of LZMA SDK
+
+
+
+HISTORY of the LZMA
+-------------------
+ 2001-2008: Improvements to LZMA compressing/decompressing code,
+ keeping compatibility with original LZMA format
+ 1996-2001: Development of LZMA compression format
+
+ Some milestones:
+
+ 2001-08-30: LZMA compression was added to 7-Zip
+ 1999-01-02: First version of 7-Zip was released
+
+
+End of document
diff --git a/src/Common/lzma/lzma-sdk.txt b/src/Common/lzma/lzma-sdk.txt
new file mode 100644
index 00000000..141b0fd4
--- /dev/null
+++ b/src/Common/lzma/lzma-sdk.txt
@@ -0,0 +1,404 @@
+LZMA SDK 23.01
+--------------
+
+LZMA SDK provides the documentation, samples, header files,
+libraries, and tools you need to develop applications that
+use 7z / LZMA / LZMA2 / XZ compression.
+
+LZMA is an improved version of famous LZ77 compression algorithm.
+It was improved in way of maximum increasing of compression ratio,
+keeping high decompression speed and low memory requirements for
+decompressing.
+
+LZMA2 is a LZMA based compression method. LZMA2 provides better
+multithreading support for compression than LZMA and some other improvements.
+
+7z is a file format for data compression and file archiving.
+7z is a main file format for 7-Zip compression program (www.7-zip.org).
+7z format supports different compression methods: LZMA, LZMA2 and others.
+7z also supports AES-256 based encryption.
+
+XZ is a file format for data compression that uses LZMA2 compression.
+XZ format provides additional features: SHA/CRC check, filters for
+improved compression ratio, splitting to blocks and streams,
+
+
+
+LICENSE
+-------
+
+LZMA SDK is written and placed in the public domain by Igor Pavlov.
+
+Some code in LZMA SDK is based on public domain code from another developers:
+ 1) PPMd var.H (2001): Dmitry Shkarin
+ 2) SHA-256: Wei Dai (Crypto++ library)
+
+Anyone is free to copy, modify, publish, use, compile, sell, or distribute the
+original LZMA SDK code, either in source code form or as a compiled binary, for
+any purpose, commercial or non-commercial, and by any means.
+
+LZMA SDK code is compatible with open source licenses, for example, you can
+include it to GNU GPL or GNU LGPL code.
+
+
+LZMA SDK Contents
+-----------------
+
+ Source code:
+
+ - C / C++ / C# / Java - LZMA compression and decompression
+ - C / C++ - LZMA2 compression and decompression
+ - C / C++ - XZ compression and decompression
+ - C - 7z decompression
+ - C++ - 7z compression and decompression
+ - C - small SFXs for installers (7z decompression)
+ - C++ - SFXs and SFXs for installers (7z decompression)
+
+ Precomiled binaries:
+
+ - console programs for lzma / 7z / xz compression and decompression
+ - SFX modules for installers.
+
+
+UNIX/Linux version
+------------------
+There are several otpions to compile 7-Zip with different compilers: gcc and clang.
+Also 7-Zip code contains two versions for some critical parts of code: in C and in Assembeler.
+So if you compile the version with Assembeler code, you will get faster 7-Zip binary.
+
+7-Zip's assembler code uses the following syntax for different platforms:
+
+1) x86 and x86-64 (AMD64): MASM syntax.
+ There are 2 programs that supports MASM syntax in Linux.
+' 'Asmc Macro Assembler and JWasm. But JWasm now doesn't support some
+ cpu instructions used in 7-Zip.
+ So you must install Asmc Macro Assembler in Linux, if you want to compile fastest version
+ of 7-Zip x86 and x86-64:
+ https://github.com/nidud/asmc
+
+2) arm64: GNU assembler for ARM64 with preprocessor.
+ That systax of that arm64 assembler code in 7-Zip is supported by GCC and CLANG for ARM64.
+
+There are different binaries that can be compiled from 7-Zip source.
+There are 2 main files in folder for compiling:
+ makefile - that can be used for compiling Windows version of 7-Zip with nmake command
+ makefile.gcc - that can be used for compiling Linux/macOS versions of 7-Zip with make command
+
+At first you must change the current folder to folder that contains `makefile.gcc`:
+
+ cd CPP/7zip/Bundles/Alone7z
+
+Then you can compile `makefile.gcc` with the command:
+
+ make -j -f makefile.gcc
+
+Also there are additional "*.mak" files in folder "CPP/7zip/" that can be used to compile
+7-Zip binaries with optimized code and optimzing options.
+
+To compile with GCC without assembler:
+ cd CPP/7zip/Bundles/Alone7z
+ make -j -f ../../cmpl_gcc.mak
+
+To compile with CLANG without assembler:
+ make -j -f ../../cmpl_clang.mak
+
+To compile 7-Zip for x86-64 with asmc assembler:
+ make -j -f ../../cmpl_gcc_x64.mak
+
+To compile 7-Zip for arm64 with assembler:
+ make -j -f ../../cmpl_gcc_arm64.mak
+
+To compile 7-Zip for arm64 for macOS:
+ make -j -f ../../cmpl_mac_arm64.mak
+
+Also you can change some compiler options in the mak files:
+ cmpl_gcc.mak
+ var_gcc.mak
+ warn_gcc.mak
+
+
+
+Also you can use p7zip (port of 7-Zip for POSIX systems like Unix or Linux):
+
+ http://p7zip.sourceforge.net/
+
+
+Files
+-----
+
+DOC/7zC.txt - 7z ANSI-C Decoder description
+DOC/7zFormat.txt - 7z Format description
+DOC/installer.txt - information about 7-Zip for installers
+DOC/lzma.txt - LZMA compression description
+DOC/lzma-sdk.txt - LZMA SDK description (this file)
+DOC/lzma-history.txt - history of LZMA SDK
+DOC/lzma-specification.txt - Specification of LZMA
+DOC/Methods.txt - Compression method IDs for .7z
+
+bin/installer/ - example script to create installer that uses SFX module,
+
+bin/7zdec.exe - simplified 7z archive decoder
+bin/7zr.exe - 7-Zip console program (reduced version)
+bin/x64/7zr.exe - 7-Zip console program (reduced version) (x64 version)
+bin/lzma.exe - file->file LZMA encoder/decoder for Windows
+bin/7zS2.sfx - small SFX module for installers (GUI version)
+bin/7zS2con.sfx - small SFX module for installers (Console version)
+bin/7zSD.sfx - SFX module for installers.
+
+
+7zDec.exe
+---------
+7zDec.exe is simplified 7z archive decoder.
+It supports only LZMA, LZMA2, and PPMd methods.
+7zDec decodes whole solid block from 7z archive to RAM.
+The RAM consumption can be high.
+
+
+
+
+Source code structure
+---------------------
+
+
+Asm/ - asm files (optimized code for CRC calculation and Intel-AES encryption)
+
+C/ - C files (compression / decompression and other)
+ Util/
+ 7z - 7z decoder program (decoding 7z files)
+ Lzma - LZMA program (file->file LZMA encoder/decoder).
+ LzmaLib - LZMA library (.DLL for Windows)
+ SfxSetup - small SFX module for installers
+
+CPP/ -- CPP files
+
+ Common - common files for C++ projects
+ Windows - common files for Windows related code
+
+ 7zip - files related to 7-Zip
+
+ Archive - files related to archiving
+
+ Common - common files for archive handling
+ 7z - 7z C++ Encoder/Decoder
+
+ Bundles - Modules that are bundles of other modules (files)
+
+ Alone7z - 7zr.exe: Standalone 7-Zip console program (reduced version)
+ Format7zExtractR - 7zxr.dll: Reduced version of 7z DLL: extracting from 7z/LZMA/BCJ/BCJ2.
+ Format7zR - 7zr.dll: Reduced version of 7z DLL: extracting/compressing to 7z/LZMA/BCJ/BCJ2
+ LzmaCon - lzma.exe: LZMA compression/decompression
+ LzmaSpec - example code for LZMA Specification
+ SFXCon - 7zCon.sfx: Console 7z SFX module
+ SFXSetup - 7zS.sfx: 7z SFX module for installers
+ SFXWin - 7z.sfx: GUI 7z SFX module
+
+ Common - common files for 7-Zip
+
+ Compress - files for compression/decompression
+
+ Crypto - files for encryption / decompression
+
+ UI - User Interface files
+
+ Client7z - Test application for 7za.dll, 7zr.dll, 7zxr.dll
+ Common - Common UI files
+ Console - Code for console program (7z.exe)
+ Explorer - Some code from 7-Zip Shell extension
+ FileManager - Some GUI code from 7-Zip File Manager
+ GUI - Some GUI code from 7-Zip
+
+
+CS/ - C# files
+ 7zip
+ Common - some common files for 7-Zip
+ Compress - files related to compression/decompression
+ LZ - files related to LZ (Lempel-Ziv) compression algorithm
+ LZMA - LZMA compression/decompression
+ LzmaAlone - file->file LZMA compression/decompression
+ RangeCoder - Range Coder (special code of compression/decompression)
+
+Java/ - Java files
+ SevenZip
+ Compression - files related to compression/decompression
+ LZ - files related to LZ (Lempel-Ziv) compression algorithm
+ LZMA - LZMA compression/decompression
+ RangeCoder - Range Coder (special code of compression/decompression)
+
+
+Note:
+ Asm / C / C++ source code of LZMA SDK is part of 7-Zip's source code.
+ 7-Zip's source code can be downloaded from 7-Zip's SourceForge page:
+
+ http://sourceforge.net/projects/sevenzip/
+
+
+
+LZMA features
+-------------
+ - Variable dictionary size (up to 1 GB)
+ - Estimated compressing speed: about 2 MB/s on 2 GHz CPU
+ - Estimated decompressing speed:
+ - 20-30 MB/s on modern 2 GHz cpu
+ - 1-2 MB/s on 200 MHz simple RISC cpu: (ARM, MIPS, PowerPC)
+ - Small memory requirements for decompressing (16 KB + DictionarySize)
+ - Small code size for decompressing: 5-8 KB
+
+LZMA decoder uses only integer operations and can be
+implemented in any modern 32-bit CPU (or on 16-bit CPU with some conditions).
+
+Some critical operations that affect the speed of LZMA decompression:
+ 1) 32*16 bit integer multiply
+ 2) Mispredicted branches (penalty mostly depends from pipeline length)
+ 3) 32-bit shift and arithmetic operations
+
+The speed of LZMA decompressing mostly depends from CPU speed.
+Memory speed has no big meaning. But if your CPU has small data cache,
+overall weight of memory speed will slightly increase.
+
+
+How To Use
+----------
+
+Using LZMA encoder/decoder executable
+--------------------------------------
+
+Usage: LZMA <e|d> inputFile outputFile [<switches>...]
+
+ e: encode file
+
+ d: decode file
+
+ b: Benchmark. There are two tests: compressing and decompressing
+ with LZMA method. Benchmark shows rating in MIPS (million
+ instructions per second). Rating value is calculated from
+ measured speed and it is normalized with Intel's Core 2 results.
+ Also Benchmark checks possible hardware errors (RAM
+ errors in most cases). Benchmark uses these settings:
+ (-a1, -d21, -fb32, -mfbt4). You can change only -d parameter.
+ Also you can change the number of iterations. Example for 30 iterations:
+ LZMA b 30
+ Default number of iterations is 10.
+
+<Switches>
+
+
+ -a{N}: set compression mode 0 = fast, 1 = normal
+ default: 1 (normal)
+
+ d{N}: Sets Dictionary size - [0, 30], default: 23 (8MB)
+ The maximum value for dictionary size is 1 GB = 2^30 bytes.
+ Dictionary size is calculated as DictionarySize = 2^N bytes.
+ For decompressing file compressed by LZMA method with dictionary
+ size D = 2^N you need about D bytes of memory (RAM).
+
+ -fb{N}: set number of fast bytes - [5, 273], default: 128
+ Usually big number gives a little bit better compression ratio
+ and slower compression process.
+
+ -lc{N}: set number of literal context bits - [0, 8], default: 3
+ Sometimes lc=4 gives gain for big files.
+
+ -lp{N}: set number of literal pos bits - [0, 4], default: 0
+ lp switch is intended for periodical data when period is
+ equal 2^N. For example, for 32-bit (4 bytes)
+ periodical data you can use lp=2. Often it's better to set lc0,
+ if you change lp switch.
+
+ -pb{N}: set number of pos bits - [0, 4], default: 2
+ pb switch is intended for periodical data
+ when period is equal 2^N.
+
+ -mf{MF_ID}: set Match Finder. Default: bt4.
+ Algorithms from hc* group doesn't provide good compression
+ ratio, but they often works pretty fast in combination with
+ fast mode (-a0).
+
+ Memory requirements depend from dictionary size
+ (parameter "d" in table below).
+
+ MF_ID Memory Description
+
+ bt2 d * 9.5 + 4MB Binary Tree with 2 bytes hashing.
+ bt3 d * 11.5 + 4MB Binary Tree with 3 bytes hashing.
+ bt4 d * 11.5 + 4MB Binary Tree with 4 bytes hashing.
+ hc4 d * 7.5 + 4MB Hash Chain with 4 bytes hashing.
+
+ -eos: write End Of Stream marker. By default LZMA doesn't write
+ eos marker, since LZMA decoder knows uncompressed size
+ stored in .lzma file header.
+
+ -si: Read data from stdin (it will write End Of Stream marker).
+ -so: Write data to stdout
+
+
+Examples:
+
+1) LZMA e file.bin file.lzma -d16 -lc0
+
+compresses file.bin to file.lzma with 64 KB dictionary (2^16=64K)
+and 0 literal context bits. -lc0 allows to reduce memory requirements
+for decompression.
+
+
+2) LZMA e file.bin file.lzma -lc0 -lp2
+
+compresses file.bin to file.lzma with settings suitable
+for 32-bit periodical data (for example, ARM or MIPS code).
+
+3) LZMA d file.lzma file.bin
+
+decompresses file.lzma to file.bin.
+
+
+Compression ratio hints
+-----------------------
+
+Recommendations
+---------------
+
+To increase the compression ratio for LZMA compressing it's desirable
+to have aligned data (if it's possible) and also it's desirable to locate
+data in such order, where code is grouped in one place and data is
+grouped in other place (it's better than such mixing: code, data, code,
+data, ...).
+
+
+Filters
+-------
+You can increase the compression ratio for some data types, using
+special filters before compressing. For example, it's possible to
+increase the compression ratio on 5-10% for code for those CPU ISAs:
+x86, IA-64, ARM, ARM-Thumb, PowerPC, SPARC.
+
+You can find C source code of such filters in C/Bra*.* files
+
+You can check the compression ratio gain of these filters with such
+7-Zip commands (example for ARM code):
+No filter:
+ 7z a a1.7z a.bin -m0=lzma
+
+With filter for little-endian ARM code:
+ 7z a a2.7z a.bin -m0=arm -m1=lzma
+
+It works in such manner:
+Compressing = Filter_encoding + LZMA_encoding
+Decompressing = LZMA_decoding + Filter_decoding
+
+Compressing and decompressing speed of such filters is very high,
+so it will not increase decompressing time too much.
+Moreover, it reduces decompression time for LZMA_decoding,
+since compression ratio with filtering is higher.
+
+These filters convert CALL (calling procedure) instructions
+from relative offsets to absolute addresses, so such data becomes more
+compressible.
+
+For some ISAs (for example, for MIPS) it's impossible to get gain from such filter.
+
+
+
+---
+
+http://www.7-zip.org
+http://www.7-zip.org/sdk.html
+http://www.7-zip.org/support.html