Revert changes from main commits that were merged into blender-v4.1-release

The last good commit was 4bf6a2e564.
This commit is contained in:
Brecht Van Lommel
2024-02-19 15:54:48 +01:00
parent 6b6393b22f
commit 0f2064bc3b
2245 changed files with 21398 additions and 34232 deletions

597
extern/lzma/7zTypes.h vendored
View File

@@ -1,597 +0,0 @@
/* 7zTypes.h -- Basic types
2023-04-02 : Igor Pavlov : Public domain */
#ifndef ZIP7_7Z_TYPES_H
#define ZIP7_7Z_TYPES_H
#ifdef _WIN32
/* #include <windows.h> */
#else
#include <errno.h>
#endif
#include <stddef.h>
#ifndef EXTERN_C_BEGIN
#ifdef __cplusplus
#define EXTERN_C_BEGIN extern "C" {
#define EXTERN_C_END }
#else
#define EXTERN_C_BEGIN
#define EXTERN_C_END
#endif
#endif
EXTERN_C_BEGIN
#define SZ_OK 0
#define SZ_ERROR_DATA 1
#define SZ_ERROR_MEM 2
#define SZ_ERROR_CRC 3
#define SZ_ERROR_UNSUPPORTED 4
#define SZ_ERROR_PARAM 5
#define SZ_ERROR_INPUT_EOF 6
#define SZ_ERROR_OUTPUT_EOF 7
#define SZ_ERROR_READ 8
#define SZ_ERROR_WRITE 9
#define SZ_ERROR_PROGRESS 10
#define SZ_ERROR_FAIL 11
#define SZ_ERROR_THREAD 12
#define SZ_ERROR_ARCHIVE 16
#define SZ_ERROR_NO_ARCHIVE 17
typedef int SRes;
#ifdef _MSC_VER
#if _MSC_VER > 1200
#define MY_ALIGN(n) __declspec(align(n))
#else
#define MY_ALIGN(n)
#endif
#else
/*
// C11/C++11:
#include <stdalign.h>
#define MY_ALIGN(n) alignas(n)
*/
#define MY_ALIGN(n) __attribute__ ((aligned(n)))
#endif
#ifdef _WIN32
/* typedef DWORD WRes; */
typedef unsigned WRes;
#define MY_SRes_HRESULT_FROM_WRes(x) HRESULT_FROM_WIN32(x)
// #define MY_HRES_ERROR_INTERNAL_ERROR MY_SRes_HRESULT_FROM_WRes(ERROR_INTERNAL_ERROR)
#else // _WIN32
// #define ENV_HAVE_LSTAT
typedef int WRes;
// (FACILITY_ERRNO = 0x800) is 7zip's FACILITY constant to represent (errno) errors in HRESULT
#define MY_FACILITY_ERRNO 0x800
#define MY_FACILITY_WIN32 7
#define MY_FACILITY_WRes MY_FACILITY_ERRNO
#define MY_HRESULT_FROM_errno_CONST_ERROR(x) ((HRESULT)( \
( (HRESULT)(x) & 0x0000FFFF) \
| (MY_FACILITY_WRes << 16) \
| (HRESULT)0x80000000 ))
#define MY_SRes_HRESULT_FROM_WRes(x) \
((HRESULT)(x) <= 0 ? ((HRESULT)(x)) : MY_HRESULT_FROM_errno_CONST_ERROR(x))
// we call macro HRESULT_FROM_WIN32 for system errors (WRes) that are (errno)
#define HRESULT_FROM_WIN32(x) MY_SRes_HRESULT_FROM_WRes(x)
/*
#define ERROR_FILE_NOT_FOUND 2L
#define ERROR_ACCESS_DENIED 5L
#define ERROR_NO_MORE_FILES 18L
#define ERROR_LOCK_VIOLATION 33L
#define ERROR_FILE_EXISTS 80L
#define ERROR_DISK_FULL 112L
#define ERROR_NEGATIVE_SEEK 131L
#define ERROR_ALREADY_EXISTS 183L
#define ERROR_DIRECTORY 267L
#define ERROR_TOO_MANY_POSTS 298L
#define ERROR_INTERNAL_ERROR 1359L
#define ERROR_INVALID_REPARSE_DATA 4392L
#define ERROR_REPARSE_TAG_INVALID 4393L
#define ERROR_REPARSE_TAG_MISMATCH 4394L
*/
// we use errno equivalents for some WIN32 errors:
#define ERROR_INVALID_PARAMETER EINVAL
#define ERROR_INVALID_FUNCTION EINVAL
#define ERROR_ALREADY_EXISTS EEXIST
#define ERROR_FILE_EXISTS EEXIST
#define ERROR_PATH_NOT_FOUND ENOENT
#define ERROR_FILE_NOT_FOUND ENOENT
#define ERROR_DISK_FULL ENOSPC
// #define ERROR_INVALID_HANDLE EBADF
// we use FACILITY_WIN32 for errors that has no errno equivalent
// Too many posts were made to a semaphore.
#define ERROR_TOO_MANY_POSTS ((HRESULT)0x8007012AL)
#define ERROR_INVALID_REPARSE_DATA ((HRESULT)0x80071128L)
#define ERROR_REPARSE_TAG_INVALID ((HRESULT)0x80071129L)
// if (MY_FACILITY_WRes != FACILITY_WIN32),
// we use FACILITY_WIN32 for COM errors:
#define E_OUTOFMEMORY ((HRESULT)0x8007000EL)
#define E_INVALIDARG ((HRESULT)0x80070057L)
#define MY_E_ERROR_NEGATIVE_SEEK ((HRESULT)0x80070083L)
/*
// we can use FACILITY_ERRNO for some COM errors, that have errno equivalents:
#define E_OUTOFMEMORY MY_HRESULT_FROM_errno_CONST_ERROR(ENOMEM)
#define E_INVALIDARG MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL)
#define MY_E_ERROR_NEGATIVE_SEEK MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL)
*/
#define TEXT(quote) quote
#define FILE_ATTRIBUTE_READONLY 0x0001
#define FILE_ATTRIBUTE_HIDDEN 0x0002
#define FILE_ATTRIBUTE_SYSTEM 0x0004
#define FILE_ATTRIBUTE_DIRECTORY 0x0010
#define FILE_ATTRIBUTE_ARCHIVE 0x0020
#define FILE_ATTRIBUTE_DEVICE 0x0040
#define FILE_ATTRIBUTE_NORMAL 0x0080
#define FILE_ATTRIBUTE_TEMPORARY 0x0100
#define FILE_ATTRIBUTE_SPARSE_FILE 0x0200
#define FILE_ATTRIBUTE_REPARSE_POINT 0x0400
#define FILE_ATTRIBUTE_COMPRESSED 0x0800
#define FILE_ATTRIBUTE_OFFLINE 0x1000
#define FILE_ATTRIBUTE_NOT_CONTENT_INDEXED 0x2000
#define FILE_ATTRIBUTE_ENCRYPTED 0x4000
#define FILE_ATTRIBUTE_UNIX_EXTENSION 0x8000 /* trick for Unix */
#endif
#ifndef RINOK
#define RINOK(x) { const int _result_ = (x); if (_result_ != 0) return _result_; }
#endif
#ifndef RINOK_WRes
#define RINOK_WRes(x) { const WRes _result_ = (x); if (_result_ != 0) return _result_; }
#endif
typedef unsigned char Byte;
typedef short Int16;
typedef unsigned short UInt16;
#ifdef Z7_DECL_Int32_AS_long
typedef long Int32;
typedef unsigned long UInt32;
#else
typedef int Int32;
typedef unsigned int UInt32;
#endif
#ifndef _WIN32
typedef int INT;
typedef Int32 INT32;
typedef unsigned int UINT;
typedef UInt32 UINT32;
typedef INT32 LONG; // LONG, ULONG and DWORD must be 32-bit for _WIN32 compatibility
typedef UINT32 ULONG;
#undef DWORD
typedef UINT32 DWORD;
#define VOID void
#define HRESULT LONG
typedef void *LPVOID;
// typedef void VOID;
// typedef ULONG_PTR DWORD_PTR, *PDWORD_PTR;
// gcc / clang on Unix : sizeof(long==sizeof(void*) in 32 or 64 bits)
typedef long INT_PTR;
typedef unsigned long UINT_PTR;
typedef long LONG_PTR;
typedef unsigned long DWORD_PTR;
typedef size_t SIZE_T;
#endif // _WIN32
#define MY_HRES_ERROR_INTERNAL_ERROR ((HRESULT)0x8007054FL)
#ifdef Z7_DECL_Int64_AS_long
typedef long Int64;
typedef unsigned long UInt64;
#else
#if (defined(_MSC_VER) || defined(__BORLANDC__)) && !defined(__clang__)
typedef __int64 Int64;
typedef unsigned __int64 UInt64;
#else
#if defined(__clang__) || defined(__GNUC__)
#include <stdint.h>
typedef int64_t Int64;
typedef uint64_t UInt64;
#else
typedef long long int Int64;
typedef unsigned long long int UInt64;
// #define UINT64_CONST(n) n ## ULL
#endif
#endif
#endif
#define UINT64_CONST(n) n
#ifdef Z7_DECL_SizeT_AS_unsigned_int
typedef unsigned int SizeT;
#else
typedef size_t SizeT;
#endif
/*
#if (defined(_MSC_VER) && _MSC_VER <= 1200)
typedef size_t MY_uintptr_t;
#else
#include <stdint.h>
typedef uintptr_t MY_uintptr_t;
#endif
*/
typedef int BoolInt;
/* typedef BoolInt Bool; */
#define True 1
#define False 0
#ifdef _WIN32
#define Z7_STDCALL __stdcall
#else
#define Z7_STDCALL
#endif
#ifdef _MSC_VER
#if _MSC_VER >= 1300
#define Z7_NO_INLINE __declspec(noinline)
#else
#define Z7_NO_INLINE
#endif
#define Z7_FORCE_INLINE __forceinline
#define Z7_CDECL __cdecl
#define Z7_FASTCALL __fastcall
#else // _MSC_VER
#if (defined(__GNUC__) && (__GNUC__ >= 4)) \
|| (defined(__clang__) && (__clang_major__ >= 4)) \
|| defined(__INTEL_COMPILER) \
|| defined(__xlC__)
#define Z7_NO_INLINE __attribute__((noinline))
#define Z7_FORCE_INLINE __attribute__((always_inline)) inline
#else
#define Z7_NO_INLINE
#define Z7_FORCE_INLINE
#endif
#define Z7_CDECL
#if defined(_M_IX86) \
|| defined(__i386__)
// #define Z7_FASTCALL __attribute__((fastcall))
// #define Z7_FASTCALL __attribute__((cdecl))
#define Z7_FASTCALL
#elif defined(MY_CPU_AMD64)
// #define Z7_FASTCALL __attribute__((ms_abi))
#define Z7_FASTCALL
#else
#define Z7_FASTCALL
#endif
#endif // _MSC_VER
/* The following interfaces use first parameter as pointer to structure */
// #define Z7_C_IFACE_CONST_QUAL
#define Z7_C_IFACE_CONST_QUAL const
#define Z7_C_IFACE_DECL(a) \
struct a ## _; \
typedef Z7_C_IFACE_CONST_QUAL struct a ## _ * a ## Ptr; \
typedef struct a ## _ a; \
struct a ## _
Z7_C_IFACE_DECL (IByteIn)
{
Byte (*Read)(IByteInPtr p); /* reads one byte, returns 0 in case of EOF or error */
};
#define IByteIn_Read(p) (p)->Read(p)
Z7_C_IFACE_DECL (IByteOut)
{
void (*Write)(IByteOutPtr p, Byte b);
};
#define IByteOut_Write(p, b) (p)->Write(p, b)
Z7_C_IFACE_DECL (ISeqInStream)
{
SRes (*Read)(ISeqInStreamPtr p, void *buf, size_t *size);
/* if (input(*size) != 0 && output(*size) == 0) means end_of_stream.
(output(*size) < input(*size)) is allowed */
};
#define ISeqInStream_Read(p, buf, size) (p)->Read(p, buf, size)
/* try to read as much as avail in stream and limited by (*processedSize) */
SRes SeqInStream_ReadMax(ISeqInStreamPtr stream, void *buf, size_t *processedSize);
/* it can return SZ_ERROR_INPUT_EOF */
// SRes SeqInStream_Read(ISeqInStreamPtr stream, void *buf, size_t size);
// SRes SeqInStream_Read2(ISeqInStreamPtr stream, void *buf, size_t size, SRes errorType);
SRes SeqInStream_ReadByte(ISeqInStreamPtr stream, Byte *buf);
Z7_C_IFACE_DECL (ISeqOutStream)
{
size_t (*Write)(ISeqOutStreamPtr p, const void *buf, size_t size);
/* Returns: result - the number of actually written bytes.
(result < size) means error */
};
#define ISeqOutStream_Write(p, buf, size) (p)->Write(p, buf, size)
typedef enum
{
SZ_SEEK_SET = 0,
SZ_SEEK_CUR = 1,
SZ_SEEK_END = 2
} ESzSeek;
Z7_C_IFACE_DECL (ISeekInStream)
{
SRes (*Read)(ISeekInStreamPtr p, void *buf, size_t *size); /* same as ISeqInStream::Read */
SRes (*Seek)(ISeekInStreamPtr p, Int64 *pos, ESzSeek origin);
};
#define ISeekInStream_Read(p, buf, size) (p)->Read(p, buf, size)
#define ISeekInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin)
Z7_C_IFACE_DECL (ILookInStream)
{
SRes (*Look)(ILookInStreamPtr p, const void **buf, size_t *size);
/* if (input(*size) != 0 && output(*size) == 0) means end_of_stream.
(output(*size) > input(*size)) is not allowed
(output(*size) < input(*size)) is allowed */
SRes (*Skip)(ILookInStreamPtr p, size_t offset);
/* offset must be <= output(*size) of Look */
SRes (*Read)(ILookInStreamPtr p, void *buf, size_t *size);
/* reads directly (without buffer). It's same as ISeqInStream::Read */
SRes (*Seek)(ILookInStreamPtr p, Int64 *pos, ESzSeek origin);
};
#define ILookInStream_Look(p, buf, size) (p)->Look(p, buf, size)
#define ILookInStream_Skip(p, offset) (p)->Skip(p, offset)
#define ILookInStream_Read(p, buf, size) (p)->Read(p, buf, size)
#define ILookInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin)
SRes LookInStream_LookRead(ILookInStreamPtr stream, void *buf, size_t *size);
SRes LookInStream_SeekTo(ILookInStreamPtr stream, UInt64 offset);
/* reads via ILookInStream::Read */
SRes LookInStream_Read2(ILookInStreamPtr stream, void *buf, size_t size, SRes errorType);
SRes LookInStream_Read(ILookInStreamPtr stream, void *buf, size_t size);
typedef struct
{
ILookInStream vt;
ISeekInStreamPtr realStream;
size_t pos;
size_t size; /* it's data size */
/* the following variables must be set outside */
Byte *buf;
size_t bufSize;
} CLookToRead2;
void LookToRead2_CreateVTable(CLookToRead2 *p, int lookahead);
#define LookToRead2_INIT(p) { (p)->pos = (p)->size = 0; }
typedef struct
{
ISeqInStream vt;
ILookInStreamPtr realStream;
} CSecToLook;
void SecToLook_CreateVTable(CSecToLook *p);
typedef struct
{
ISeqInStream vt;
ILookInStreamPtr realStream;
} CSecToRead;
void SecToRead_CreateVTable(CSecToRead *p);
Z7_C_IFACE_DECL (ICompressProgress)
{
SRes (*Progress)(ICompressProgressPtr p, UInt64 inSize, UInt64 outSize);
/* Returns: result. (result != SZ_OK) means break.
Value (UInt64)(Int64)-1 for size means unknown value. */
};
#define ICompressProgress_Progress(p, inSize, outSize) (p)->Progress(p, inSize, outSize)
typedef struct ISzAlloc ISzAlloc;
typedef const ISzAlloc * ISzAllocPtr;
struct ISzAlloc
{
void *(*Alloc)(ISzAllocPtr p, size_t size);
void (*Free)(ISzAllocPtr p, void *address); /* address can be 0 */
};
#define ISzAlloc_Alloc(p, size) (p)->Alloc(p, size)
#define ISzAlloc_Free(p, a) (p)->Free(p, a)
/* deprecated */
#define IAlloc_Alloc(p, size) ISzAlloc_Alloc(p, size)
#define IAlloc_Free(p, a) ISzAlloc_Free(p, a)
#ifndef MY_offsetof
#ifdef offsetof
#define MY_offsetof(type, m) offsetof(type, m)
/*
#define MY_offsetof(type, m) FIELD_OFFSET(type, m)
*/
#else
#define MY_offsetof(type, m) ((size_t)&(((type *)0)->m))
#endif
#endif
#ifndef Z7_container_of
/*
#define Z7_container_of(ptr, type, m) container_of(ptr, type, m)
#define Z7_container_of(ptr, type, m) CONTAINING_RECORD(ptr, type, m)
#define Z7_container_of(ptr, type, m) ((type *)((char *)(ptr) - offsetof(type, m)))
#define Z7_container_of(ptr, type, m) (&((type *)0)->m == (ptr), ((type *)(((char *)(ptr)) - MY_offsetof(type, m))))
*/
/*
GCC shows warning: "perhaps the 'offsetof' macro was used incorrectly"
GCC 3.4.4 : classes with constructor
GCC 4.8.1 : classes with non-public variable members"
*/
#define Z7_container_of(ptr, type, m) \
((type *)(void *)((char *)(void *) \
(1 ? (ptr) : &((type *)NULL)->m) - MY_offsetof(type, m)))
#define Z7_container_of_CONST(ptr, type, m) \
((const type *)(const void *)((const char *)(const void *) \
(1 ? (ptr) : &((type *)NULL)->m) - MY_offsetof(type, m)))
/*
#define Z7_container_of_NON_CONST_FROM_CONST(ptr, type, m) \
((type *)(void *)(const void *)((const char *)(const void *) \
(1 ? (ptr) : &((type *)NULL)->m) - MY_offsetof(type, m)))
*/
#endif
#define Z7_CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) ((type *)(void *)(ptr))
// #define Z7_CONTAINER_FROM_VTBL(ptr, type, m) Z7_CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m)
#define Z7_CONTAINER_FROM_VTBL(ptr, type, m) Z7_container_of(ptr, type, m)
// #define Z7_CONTAINER_FROM_VTBL(ptr, type, m) Z7_container_of_NON_CONST_FROM_CONST(ptr, type, m)
#define Z7_CONTAINER_FROM_VTBL_CONST(ptr, type, m) Z7_container_of_CONST(ptr, type, m)
#define Z7_CONTAINER_FROM_VTBL_CLS(ptr, type, m) Z7_CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m)
/*
#define Z7_CONTAINER_FROM_VTBL_CLS(ptr, type, m) Z7_CONTAINER_FROM_VTBL(ptr, type, m)
*/
#if defined (__clang__) || defined(__GNUC__)
#define Z7_DIAGNOSCTIC_IGNORE_BEGIN_CAST_QUAL \
_Pragma("GCC diagnostic push") \
_Pragma("GCC diagnostic ignored \"-Wcast-qual\"")
#define Z7_DIAGNOSCTIC_IGNORE_END_CAST_QUAL \
_Pragma("GCC diagnostic pop")
#else
#define Z7_DIAGNOSCTIC_IGNORE_BEGIN_CAST_QUAL
#define Z7_DIAGNOSCTIC_IGNORE_END_CAST_QUAL
#endif
#define Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR(ptr, type, m, p) \
Z7_DIAGNOSCTIC_IGNORE_BEGIN_CAST_QUAL \
type *p = Z7_CONTAINER_FROM_VTBL(ptr, type, m); \
Z7_DIAGNOSCTIC_IGNORE_END_CAST_QUAL
#define Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(type) \
Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR(pp, type, vt, p)
// #define ZIP7_DECLARE_HANDLE(name) typedef void *name;
#define Z7_DECLARE_HANDLE(name) struct name##_dummy{int unused;}; typedef struct name##_dummy *name;
#define Z7_memset_0_ARRAY(a) memset((a), 0, sizeof(a))
#ifndef Z7_ARRAY_SIZE
#define Z7_ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
#endif
#ifdef _WIN32
#define CHAR_PATH_SEPARATOR '\\'
#define WCHAR_PATH_SEPARATOR L'\\'
#define STRING_PATH_SEPARATOR "\\"
#define WSTRING_PATH_SEPARATOR L"\\"
#else
#define CHAR_PATH_SEPARATOR '/'
#define WCHAR_PATH_SEPARATOR L'/'
#define STRING_PATH_SEPARATOR "/"
#define WSTRING_PATH_SEPARATOR L"/"
#endif
#define k_PropVar_TimePrec_0 0
#define k_PropVar_TimePrec_Unix 1
#define k_PropVar_TimePrec_DOS 2
#define k_PropVar_TimePrec_HighPrec 3
#define k_PropVar_TimePrec_Base 16
#define k_PropVar_TimePrec_100ns (k_PropVar_TimePrec_Base + 7)
#define k_PropVar_TimePrec_1ns (k_PropVar_TimePrec_Base + 9)
EXTERN_C_END
#endif
/*
#ifndef Z7_ST
#ifdef _7ZIP_ST
#define Z7_ST
#endif
#endif
*/

View File

@@ -1,101 +0,0 @@
/* 7zWindows.h -- StdAfx
2023-04-02 : Igor Pavlov : Public domain */
#ifndef ZIP7_INC_7Z_WINDOWS_H
#define ZIP7_INC_7Z_WINDOWS_H
#ifdef _WIN32
#if defined(__clang__)
# pragma clang diagnostic push
#endif
#if defined(_MSC_VER)
#pragma warning(push)
#pragma warning(disable : 4668) // '_WIN32_WINNT' is not defined as a preprocessor macro, replacing with '0' for '#if/#elif'
#if _MSC_VER == 1900
// for old kit10 versions
// #pragma warning(disable : 4255) // winuser.h(13979): warning C4255: 'GetThreadDpiAwarenessContext':
#endif
// win10 Windows Kit:
#endif // _MSC_VER
#if defined(_MSC_VER) && _MSC_VER <= 1200 && !defined(_WIN64)
// for msvc6 without sdk2003
#define RPC_NO_WINDOWS_H
#endif
#if defined(__MINGW32__) || defined(__MINGW64__)
// #if defined(__GNUC__) && !defined(__clang__)
#include <windows.h>
#else
#include <Windows.h>
#endif
// #include <basetsd.h>
// #include <wtypes.h>
// but if precompiled with clang-cl then we need
// #include <windows.h>
#if defined(_MSC_VER)
#pragma warning(pop)
#endif
#if defined(__clang__)
# pragma clang diagnostic pop
#endif
#if defined(_MSC_VER) && _MSC_VER <= 1200 && !defined(_WIN64)
#ifndef _W64
typedef long LONG_PTR, *PLONG_PTR;
typedef unsigned long ULONG_PTR, *PULONG_PTR;
typedef ULONG_PTR DWORD_PTR, *PDWORD_PTR;
#define Z7_OLD_WIN_SDK
#endif // _W64
#endif // _MSC_VER == 1200
#ifdef Z7_OLD_WIN_SDK
#ifndef INVALID_FILE_ATTRIBUTES
#define INVALID_FILE_ATTRIBUTES ((DWORD)-1)
#endif
#ifndef INVALID_SET_FILE_POINTER
#define INVALID_SET_FILE_POINTER ((DWORD)-1)
#endif
#ifndef FILE_SPECIAL_ACCESS
#define FILE_SPECIAL_ACCESS (FILE_ANY_ACCESS)
#endif
// ShlObj.h:
// #define BIF_NEWDIALOGSTYLE 0x0040
#pragma warning(disable : 4201)
// #pragma warning(disable : 4115)
#undef VARIANT_TRUE
#define VARIANT_TRUE ((VARIANT_BOOL)-1)
#endif
#endif // Z7_OLD_WIN_SDK
#ifdef UNDER_CE
#undef VARIANT_TRUE
#define VARIANT_TRUE ((VARIANT_BOOL)-1)
#endif
#if defined(_MSC_VER)
#if _MSC_VER >= 1400 && _MSC_VER <= 1600
// BaseTsd.h(148) : 'HandleToULong' : unreferenced inline function has been removed
// string.h
// #pragma warning(disable : 4514)
#endif
#endif
/* #include "7zTypes.h" */
#endif

522
extern/lzma/Alloc.c vendored
View File

@@ -1,182 +1,33 @@
/* Alloc.c -- Memory allocation functions
2023-04-02 : Igor Pavlov : Public domain */
#include "Precomp.h"
2008-09-24
Igor Pavlov
Public domain */
#ifdef _WIN32
#include "7zWindows.h"
#include <windows.h>
#endif
#include <stdlib.h>
#include "Alloc.h"
#ifdef _WIN32
#ifdef Z7_LARGE_PAGES
#if defined(__clang__) || defined(__GNUC__)
typedef void (*Z7_voidFunction)(void);
#define MY_CAST_FUNC (Z7_voidFunction)
#elif defined(_MSC_VER) && _MSC_VER > 1920
#define MY_CAST_FUNC (void *)
// #pragma warning(disable : 4191) // 'type cast': unsafe conversion from 'FARPROC' to 'void (__cdecl *)()'
#else
#define MY_CAST_FUNC
#endif
#endif // Z7_LARGE_PAGES
#endif // _WIN32
/* #define _SZ_ALLOC_DEBUG */
// #define SZ_ALLOC_DEBUG
/* #define SZ_ALLOC_DEBUG */
/* use SZ_ALLOC_DEBUG to debug alloc/free operations */
#ifdef SZ_ALLOC_DEBUG
#include <string.h>
/* use _SZ_ALLOC_DEBUG to debug alloc/free operations */
#ifdef _SZ_ALLOC_DEBUG
#include <stdio.h>
static int g_allocCount = 0;
#ifdef _WIN32
static int g_allocCountMid = 0;
static int g_allocCountBig = 0;
int g_allocCount = 0;
int g_allocCountMid = 0;
int g_allocCountBig = 0;
#endif
#define CONVERT_INT_TO_STR(charType, tempSize) \
char temp[tempSize]; unsigned i = 0; \
while (val >= 10) { temp[i++] = (char)('0' + (unsigned)(val % 10)); val /= 10; } \
*s++ = (charType)('0' + (unsigned)val); \
while (i != 0) { i--; *s++ = temp[i]; } \
*s = 0;
static void ConvertUInt64ToString(UInt64 val, char *s)
{
CONVERT_INT_TO_STR(char, 24)
}
#define GET_HEX_CHAR(t) ((char)(((t < 10) ? ('0' + t) : ('A' + (t - 10)))))
static void ConvertUInt64ToHex(UInt64 val, char *s)
{
UInt64 v = val;
unsigned i;
for (i = 1;; i++)
{
v >>= 4;
if (v == 0)
break;
}
s[i] = 0;
do
{
unsigned t = (unsigned)(val & 0xF);
val >>= 4;
s[--i] = GET_HEX_CHAR(t);
}
while (i);
}
#define DEBUG_OUT_STREAM stderr
static void Print(const char *s)
{
fputs(s, DEBUG_OUT_STREAM);
}
static void PrintAligned(const char *s, size_t align)
{
size_t len = strlen(s);
for(;;)
{
fputc(' ', DEBUG_OUT_STREAM);
if (len >= align)
break;
++len;
}
Print(s);
}
static void PrintLn(void)
{
Print("\n");
}
static void PrintHex(UInt64 v, size_t align)
{
char s[32];
ConvertUInt64ToHex(v, s);
PrintAligned(s, align);
}
static void PrintDec(int v, size_t align)
{
char s[32];
ConvertUInt64ToString((unsigned)v, s);
PrintAligned(s, align);
}
static void PrintAddr(void *p)
{
PrintHex((UInt64)(size_t)(ptrdiff_t)p, 12);
}
#define PRINT_REALLOC(name, cnt, size, ptr) { \
Print(name " "); \
if (!ptr) PrintDec(cnt++, 10); \
PrintHex(size, 10); \
PrintAddr(ptr); \
PrintLn(); }
#define PRINT_ALLOC(name, cnt, size, ptr) { \
Print(name " "); \
PrintDec(cnt++, 10); \
PrintHex(size, 10); \
PrintAddr(ptr); \
PrintLn(); }
#define PRINT_FREE(name, cnt, ptr) if (ptr) { \
Print(name " "); \
PrintDec(--cnt, 10); \
PrintAddr(ptr); \
PrintLn(); }
#else
#ifdef _WIN32
#define PRINT_ALLOC(name, cnt, size, ptr)
#endif
#define PRINT_FREE(name, cnt, ptr)
#define Print(s)
#define PrintLn()
#define PrintHex(v, align)
#define PrintAddr(p)
#endif
/*
by specification:
malloc(non_NULL, 0) : returns NULL or a unique pointer value that can later be successfully passed to free()
realloc(NULL, size) : the call is equivalent to malloc(size)
realloc(non_NULL, 0) : the call is equivalent to free(ptr)
in main compilers:
malloc(0) : returns non_NULL
realloc(NULL, 0) : returns non_NULL
realloc(non_NULL, 0) : returns NULL
*/
void *MyAlloc(size_t size)
{
if (size == 0)
return NULL;
// PRINT_ALLOC("Alloc ", g_allocCount, size, NULL)
#ifdef SZ_ALLOC_DEBUG
return 0;
#ifdef _SZ_ALLOC_DEBUG
{
void *p = malloc(size);
if (p)
{
PRINT_ALLOC("Alloc ", g_allocCount, size, p)
}
fprintf(stderr, "\nAlloc %10d bytes, count = %10d, addr = %8X", size, g_allocCount++, (unsigned)p);
return p;
}
#else
@@ -186,350 +37,91 @@ void *MyAlloc(size_t size)
void MyFree(void *address)
{
PRINT_FREE("Free ", g_allocCount, address)
#ifdef _SZ_ALLOC_DEBUG
if (address != 0)
fprintf(stderr, "\nFree; count = %10d, addr = %8X", --g_allocCount, (unsigned)address);
#endif
free(address);
}
void *MyRealloc(void *address, size_t size)
{
if (size == 0)
{
MyFree(address);
return NULL;
}
// PRINT_REALLOC("Realloc ", g_allocCount, size, address)
#ifdef SZ_ALLOC_DEBUG
{
void *p = realloc(address, size);
if (p)
{
PRINT_REALLOC("Realloc ", g_allocCount, size, address)
}
return p;
}
#else
return realloc(address, size);
#endif
}
#ifdef _WIN32
void *MidAlloc(size_t size)
{
if (size == 0)
return NULL;
#ifdef SZ_ALLOC_DEBUG
{
void *p = VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE);
if (p)
{
PRINT_ALLOC("Alloc-Mid", g_allocCountMid, size, p)
}
return p;
}
#else
return VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE);
return 0;
#ifdef _SZ_ALLOC_DEBUG
fprintf(stderr, "\nAlloc_Mid %10d bytes; count = %10d", size, g_allocCountMid++);
#endif
return VirtualAlloc(0, size, MEM_COMMIT, PAGE_READWRITE);
}
void MidFree(void *address)
{
PRINT_FREE("Free-Mid", g_allocCountMid, address)
if (!address)
#ifdef _SZ_ALLOC_DEBUG
if (address != 0)
fprintf(stderr, "\nFree_Mid; count = %10d", --g_allocCountMid);
#endif
if (address == 0)
return;
VirtualFree(address, 0, MEM_RELEASE);
}
#ifdef Z7_LARGE_PAGES
#ifdef MEM_LARGE_PAGES
#define MY__MEM_LARGE_PAGES MEM_LARGE_PAGES
#else
#define MY__MEM_LARGE_PAGES 0x20000000
#ifndef MEM_LARGE_PAGES
#undef _7ZIP_LARGE_PAGES
#endif
extern
SIZE_T g_LargePageSize;
#ifdef _7ZIP_LARGE_PAGES
SIZE_T g_LargePageSize = 0;
typedef SIZE_T (WINAPI *Func_GetLargePageMinimum)(VOID);
typedef SIZE_T (WINAPI *GetLargePageMinimumP)();
#endif
void SetLargePageSize(void)
void SetLargePageSize()
{
#ifdef Z7_LARGE_PAGES
SIZE_T size;
const
Func_GetLargePageMinimum fn =
(Func_GetLargePageMinimum) MY_CAST_FUNC GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")),
"GetLargePageMinimum");
if (!fn)
#ifdef _7ZIP_LARGE_PAGES
SIZE_T size = 0;
GetLargePageMinimumP largePageMinimum = (GetLargePageMinimumP)
GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), "GetLargePageMinimum");
if (largePageMinimum == 0)
return;
size = fn();
size = largePageMinimum();
if (size == 0 || (size & (size - 1)) != 0)
return;
g_LargePageSize = size;
#endif
}
#endif // Z7_LARGE_PAGES
void *BigAlloc(size_t size)
{
if (size == 0)
return NULL;
PRINT_ALLOC("Alloc-Big", g_allocCountBig, size, NULL)
#ifdef Z7_LARGE_PAGES
return 0;
#ifdef _SZ_ALLOC_DEBUG
fprintf(stderr, "\nAlloc_Big %10d bytes; count = %10d", size, g_allocCountBig++);
#endif
#ifdef _7ZIP_LARGE_PAGES
if (g_LargePageSize != 0 && g_LargePageSize <= (1 << 30) && size >= (1 << 18))
{
SIZE_T ps = g_LargePageSize;
if (ps != 0 && ps <= (1 << 30) && size > (ps / 2))
{
size_t size2;
ps--;
size2 = (size + ps) & ~ps;
if (size2 >= size)
{
void *p = VirtualAlloc(NULL, size2, MEM_COMMIT | MY__MEM_LARGE_PAGES, PAGE_READWRITE);
if (p)
{
PRINT_ALLOC("Alloc-BM ", g_allocCountMid, size2, p)
return p;
}
}
}
void *res = VirtualAlloc(0, (size + g_LargePageSize - 1) & (~(g_LargePageSize - 1)),
MEM_COMMIT | MEM_LARGE_PAGES, PAGE_READWRITE);
if (res != 0)
return res;
}
#endif
return MidAlloc(size);
return VirtualAlloc(0, size, MEM_COMMIT, PAGE_READWRITE);
}
void BigFree(void *address)
{
PRINT_FREE("Free-Big", g_allocCountBig, address)
MidFree(address);
}
#endif // _WIN32
static void *SzAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p) return MyAlloc(size); }
static void SzFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p) MyFree(address); }
const ISzAlloc g_Alloc = { SzAlloc, SzFree };
#ifdef _WIN32
static void *SzMidAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p) return MidAlloc(size); }
static void SzMidFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p) MidFree(address); }
static void *SzBigAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p) return BigAlloc(size); }
static void SzBigFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p) BigFree(address); }
const ISzAlloc g_MidAlloc = { SzMidAlloc, SzMidFree };
const ISzAlloc g_BigAlloc = { SzBigAlloc, SzBigFree };
#endif
/*
uintptr_t : <stdint.h> C99 (optional)
: unsupported in VS6
*/
#ifdef _WIN32
typedef UINT_PTR UIntPtr;
#else
/*
typedef uintptr_t UIntPtr;
*/
typedef ptrdiff_t UIntPtr;
#endif
#define ADJUST_ALLOC_SIZE 0
/*
#define ADJUST_ALLOC_SIZE (sizeof(void *) - 1)
*/
/*
Use (ADJUST_ALLOC_SIZE = (sizeof(void *) - 1)), if
MyAlloc() can return address that is NOT multiple of sizeof(void *).
*/
/*
#define MY_ALIGN_PTR_DOWN(p, align) ((void *)((char *)(p) - ((size_t)(UIntPtr)(p) & ((align) - 1))))
*/
#define MY_ALIGN_PTR_DOWN(p, align) ((void *)((((UIntPtr)(p)) & ~((UIntPtr)(align) - 1))))
#if !defined(_WIN32) && defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE >= 200112L)
#define USE_posix_memalign
#endif
#ifndef USE_posix_memalign
#define MY_ALIGN_PTR_UP_PLUS(p, align) MY_ALIGN_PTR_DOWN(((char *)(p) + (align) + ADJUST_ALLOC_SIZE), align)
#endif
/*
This posix_memalign() is for test purposes only.
We also need special Free() function instead of free(),
if this posix_memalign() is used.
*/
/*
static int posix_memalign(void **ptr, size_t align, size_t size)
{
size_t newSize = size + align;
void *p;
void *pAligned;
*ptr = NULL;
if (newSize < size)
return 12; // ENOMEM
p = MyAlloc(newSize);
if (!p)
return 12; // ENOMEM
pAligned = MY_ALIGN_PTR_UP_PLUS(p, align);
((void **)pAligned)[-1] = p;
*ptr = pAligned;
return 0;
}
*/
/*
ALLOC_ALIGN_SIZE >= sizeof(void *)
ALLOC_ALIGN_SIZE >= cache_line_size
*/
#define ALLOC_ALIGN_SIZE ((size_t)1 << 7)
static void *SzAlignedAlloc(ISzAllocPtr pp, size_t size)
{
#ifndef USE_posix_memalign
void *p;
void *pAligned;
size_t newSize;
UNUSED_VAR(pp)
/* also we can allocate additional dummy ALLOC_ALIGN_SIZE bytes after aligned
block to prevent cache line sharing with another allocated blocks */
newSize = size + ALLOC_ALIGN_SIZE * 1 + ADJUST_ALLOC_SIZE;
if (newSize < size)
return NULL;
p = MyAlloc(newSize);
if (!p)
return NULL;
pAligned = MY_ALIGN_PTR_UP_PLUS(p, ALLOC_ALIGN_SIZE);
Print(" size="); PrintHex(size, 8);
Print(" a_size="); PrintHex(newSize, 8);
Print(" ptr="); PrintAddr(p);
Print(" a_ptr="); PrintAddr(pAligned);
PrintLn();
((void **)pAligned)[-1] = p;
return pAligned;
#else
void *p;
UNUSED_VAR(pp)
if (posix_memalign(&p, ALLOC_ALIGN_SIZE, size))
return NULL;
Print(" posix_memalign="); PrintAddr(p);
PrintLn();
return p;
#ifdef _SZ_ALLOC_DEBUG
if (address != 0)
fprintf(stderr, "\nFree_Big; count = %10d", --g_allocCountBig);
#endif
}
static void SzAlignedFree(ISzAllocPtr pp, void *address)
{
UNUSED_VAR(pp)
#ifndef USE_posix_memalign
if (address)
MyFree(((void **)address)[-1]);
#else
free(address);
#endif
}
const ISzAlloc g_AlignedAlloc = { SzAlignedAlloc, SzAlignedFree };
#define MY_ALIGN_PTR_DOWN_1(p) MY_ALIGN_PTR_DOWN(p, sizeof(void *))
/* we align ptr to support cases where CAlignOffsetAlloc::offset is not multiply of sizeof(void *) */
#define REAL_BLOCK_PTR_VAR(p) ((void **)MY_ALIGN_PTR_DOWN_1(p))[-1]
/*
#define REAL_BLOCK_PTR_VAR(p) ((void **)(p))[-1]
*/
static void *AlignOffsetAlloc_Alloc(ISzAllocPtr pp, size_t size)
{
const CAlignOffsetAlloc *p = Z7_CONTAINER_FROM_VTBL_CONST(pp, CAlignOffsetAlloc, vt);
void *adr;
void *pAligned;
size_t newSize;
size_t extra;
size_t alignSize = (size_t)1 << p->numAlignBits;
if (alignSize < sizeof(void *))
alignSize = sizeof(void *);
if (p->offset >= alignSize)
return NULL;
/* also we can allocate additional dummy ALLOC_ALIGN_SIZE bytes after aligned
block to prevent cache line sharing with another allocated blocks */
extra = p->offset & (sizeof(void *) - 1);
newSize = size + alignSize + extra + ADJUST_ALLOC_SIZE;
if (newSize < size)
return NULL;
adr = ISzAlloc_Alloc(p->baseAlloc, newSize);
if (!adr)
return NULL;
pAligned = (char *)MY_ALIGN_PTR_DOWN((char *)adr +
alignSize - p->offset + extra + ADJUST_ALLOC_SIZE, alignSize) + p->offset;
PrintLn();
Print("- Aligned: ");
Print(" size="); PrintHex(size, 8);
Print(" a_size="); PrintHex(newSize, 8);
Print(" ptr="); PrintAddr(adr);
Print(" a_ptr="); PrintAddr(pAligned);
PrintLn();
REAL_BLOCK_PTR_VAR(pAligned) = adr;
return pAligned;
if (address == 0)
return;
VirtualFree(address, 0, MEM_RELEASE);
}
static void AlignOffsetAlloc_Free(ISzAllocPtr pp, void *address)
{
if (address)
{
const CAlignOffsetAlloc *p = Z7_CONTAINER_FROM_VTBL_CONST(pp, CAlignOffsetAlloc, vt);
PrintLn();
Print("- Aligned Free: ");
PrintLn();
ISzAlloc_Free(p->baseAlloc, REAL_BLOCK_PTR_VAR(address));
}
}
void AlignOffsetAlloc_CreateVTable(CAlignOffsetAlloc *p)
{
p->vt.Alloc = AlignOffsetAlloc_Alloc;
p->vt.Free = AlignOffsetAlloc_Free;
}
#endif

53
extern/lzma/Alloc.h vendored
View File

@@ -1,32 +1,19 @@
/* Alloc.h -- Memory allocation functions
2023-03-04 : Igor Pavlov : Public domain */
2008-03-13
Igor Pavlov
Public domain */
#ifndef ZIP7_INC_ALLOC_H
#define ZIP7_INC_ALLOC_H
#ifndef __COMMON_ALLOC_H
#define __COMMON_ALLOC_H
#include "7zTypes.h"
EXTERN_C_BEGIN
/*
MyFree(NULL) : is allowed, as free(NULL)
MyAlloc(0) : returns NULL : but malloc(0) is allowed to return NULL or non_NULL
MyRealloc(NULL, 0) : returns NULL : but realloc(NULL, 0) is allowed to return NULL or non_NULL
MyRealloc() is similar to realloc() for the following cases:
MyRealloc(non_NULL, 0) : returns NULL and always calls MyFree(ptr)
MyRealloc(NULL, non_ZERO) : returns NULL, if allocation failed
MyRealloc(non_NULL, non_ZERO) : returns NULL, if reallocation failed
*/
#include <stddef.h>
void *MyAlloc(size_t size);
void MyFree(void *address);
void *MyRealloc(void *address, size_t size);
#ifdef _WIN32
#ifdef Z7_LARGE_PAGES
void SetLargePageSize(void);
#endif
void SetLargePageSize();
void *MidAlloc(size_t size);
void MidFree(void *address);
@@ -42,30 +29,4 @@ void BigFree(void *address);
#endif
extern const ISzAlloc g_Alloc;
#ifdef _WIN32
extern const ISzAlloc g_BigAlloc;
extern const ISzAlloc g_MidAlloc;
#else
#define g_BigAlloc g_AlignedAlloc
#define g_MidAlloc g_AlignedAlloc
#endif
extern const ISzAlloc g_AlignedAlloc;
typedef struct
{
ISzAlloc vt;
ISzAllocPtr baseAlloc;
unsigned numAlignBits; /* ((1 << numAlignBits) >= sizeof(void *)) */
size_t offset; /* (offset == (k * sizeof(void *)) && offset < (1 << numAlignBits) */
} CAlignOffsetAlloc;
void AlignOffsetAlloc_CreateVTable(CAlignOffsetAlloc *p);
EXTERN_C_END
#endif

View File

@@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: 2024 Blender Foundation
# SPDX-FileCopyrightText: 2006 Blender Foundation
#
# SPDX-License-Identifier: GPL-2.0-or-later
@@ -19,28 +19,18 @@ set(INC_SYS
set(SRC
Alloc.c
CpuArch.c
LzFind.c
LzFindMt.c
LzFindOpt.c
LzmaDec.c
LzmaEnc.c
LzmaLib.c
Threads.c
7zTypes.h
7zWindows.h
Alloc.h
Compiler.h
CpuArch.h
LzFind.h
LzFindMt.h
LzHash.h
LzmaDec.h
LzmaEnc.h
LzmaLib.h
Precomp.h
Threads.h
Types.h
)
set(LIB

159
extern/lzma/Compiler.h vendored
View File

@@ -1,159 +0,0 @@
/* Compiler.h : Compiler specific defines and pragmas
2023-04-02 : Igor Pavlov : Public domain */
#ifndef ZIP7_INC_COMPILER_H
#define ZIP7_INC_COMPILER_H
#if defined(__clang__)
# define Z7_CLANG_VERSION (__clang_major__ * 10000 + __clang_minor__ * 100 + __clang_patchlevel__)
#endif
#if defined(__clang__) && defined(__apple_build_version__)
# define Z7_APPLE_CLANG_VERSION Z7_CLANG_VERSION
#elif defined(__clang__)
# define Z7_LLVM_CLANG_VERSION Z7_CLANG_VERSION
#elif defined(__GNUC__)
# define Z7_GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
#endif
#ifdef _MSC_VER
#if !defined(__clang__) && !defined(__GNUC__)
#define Z7_MSC_VER_ORIGINAL _MSC_VER
#endif
#endif
#if defined(__MINGW32__) || defined(__MINGW64__)
#define Z7_MINGW
#endif
// #pragma GCC diagnostic ignored "-Wunknown-pragmas"
#ifdef __clang__
// padding size of '' with 4 bytes to alignment boundary
#pragma GCC diagnostic ignored "-Wpadded"
#endif
#ifdef _MSC_VER
#ifdef UNDER_CE
#define RPC_NO_WINDOWS_H
/* #pragma warning(disable : 4115) // '_RPC_ASYNC_STATE' : named type definition in parentheses */
#pragma warning(disable : 4201) // nonstandard extension used : nameless struct/union
#pragma warning(disable : 4214) // nonstandard extension used : bit field types other than int
#endif
#if defined(_MSC_VER) && _MSC_VER >= 1800
#pragma warning(disable : 4464) // relative include path contains '..'
#endif
// == 1200 : -O1 : for __forceinline
// >= 1900 : -O1 : for printf
#pragma warning(disable : 4710) // function not inlined
#if _MSC_VER < 1900
// winnt.h: 'Int64ShllMod32'
#pragma warning(disable : 4514) // unreferenced inline function has been removed
#endif
#if _MSC_VER < 1300
// #pragma warning(disable : 4702) // unreachable code
// Bra.c : -O1:
#pragma warning(disable : 4714) // function marked as __forceinline not inlined
#endif
/*
#if _MSC_VER > 1400 && _MSC_VER <= 1900
// strcat: This function or variable may be unsafe
// sysinfoapi.h: kit10: GetVersion was declared deprecated
#pragma warning(disable : 4996)
#endif
*/
#if _MSC_VER > 1200
// -Wall warnings
#pragma warning(disable : 4711) // function selected for automatic inline expansion
#pragma warning(disable : 4820) // '2' bytes padding added after data member
#if _MSC_VER >= 1400 && _MSC_VER < 1920
// 1400: string.h: _DBG_MEMCPY_INLINE_
// 1600 - 191x : smmintrin.h __cplusplus'
// is not defined as a preprocessor macro, replacing with '0' for '#if/#elif'
#pragma warning(disable : 4668)
// 1400 - 1600 : WinDef.h : 'FARPROC' :
// 1900 - 191x : immintrin.h: _readfsbase_u32
// no function prototype given : converting '()' to '(void)'
#pragma warning(disable : 4255)
#endif
#if _MSC_VER >= 1914
// Compiler will insert Spectre mitigation for memory load if /Qspectre switch specified
#pragma warning(disable : 5045)
#endif
#endif // _MSC_VER > 1200
#endif // _MSC_VER
#if defined(__clang__) && (__clang_major__ >= 4)
#define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE \
_Pragma("clang loop unroll(disable)") \
_Pragma("clang loop vectorize(disable)")
#define Z7_ATTRIB_NO_VECTORIZE
#elif defined(__GNUC__) && (__GNUC__ >= 5)
#define Z7_ATTRIB_NO_VECTORIZE __attribute__((optimize("no-tree-vectorize")))
// __attribute__((optimize("no-unroll-loops")));
#define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
#elif defined(_MSC_VER) && (_MSC_VER >= 1920)
#define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE \
_Pragma("loop( no_vector )")
#define Z7_ATTRIB_NO_VECTORIZE
#else
#define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
#define Z7_ATTRIB_NO_VECTORIZE
#endif
#if defined(MY_CPU_X86_OR_AMD64) && ( \
defined(__clang__) && (__clang_major__ >= 4) \
|| defined(__GNUC__) && (__GNUC__ >= 5))
#define Z7_ATTRIB_NO_SSE __attribute__((__target__("no-sse")))
#else
#define Z7_ATTRIB_NO_SSE
#endif
#define Z7_ATTRIB_NO_VECTOR \
Z7_ATTRIB_NO_VECTORIZE \
Z7_ATTRIB_NO_SSE
#if defined(__clang__) && (__clang_major__ >= 8) \
|| defined(__GNUC__) && (__GNUC__ >= 1000) \
/* || defined(_MSC_VER) && (_MSC_VER >= 1920) */
// GCC is not good for __builtin_expect()
#define Z7_LIKELY(x) (__builtin_expect((x), 1))
#define Z7_UNLIKELY(x) (__builtin_expect((x), 0))
// #define Z7_unlikely [[unlikely]]
// #define Z7_likely [[likely]]
#else
#define Z7_LIKELY(x) (x)
#define Z7_UNLIKELY(x) (x)
// #define Z7_likely
#endif
#if (defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 36000))
#define Z7_DIAGNOSCTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER \
_Pragma("GCC diagnostic push") \
_Pragma("GCC diagnostic ignored \"-Wreserved-macro-identifier\"")
#define Z7_DIAGNOSCTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER \
_Pragma("GCC diagnostic pop")
#else
#define Z7_DIAGNOSCTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
#define Z7_DIAGNOSCTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
#endif
#define UNUSED_VAR(x) (void)x;
/* #define UNUSED_VAR(x) x=x; */
#endif

823
extern/lzma/CpuArch.c vendored
View File

@@ -1,823 +0,0 @@
/* CpuArch.c -- CPU specific code
2023-05-18 : Igor Pavlov : Public domain */
#include "Precomp.h"
// #include <stdio.h>
#include "CpuArch.h"
#ifdef MY_CPU_X86_OR_AMD64
#undef NEED_CHECK_FOR_CPUID
#if !defined(MY_CPU_AMD64)
#define NEED_CHECK_FOR_CPUID
#endif
/*
cpuid instruction supports (subFunction) parameter in ECX,
that is used only with some specific (function) parameter values.
But we always use only (subFunction==0).
*/
/*
__cpuid(): MSVC and GCC/CLANG use same function/macro name
but parameters are different.
We use MSVC __cpuid() parameters style for our z7_x86_cpuid() function.
*/
#if defined(__GNUC__) /* && (__GNUC__ >= 10) */ \
|| defined(__clang__) /* && (__clang_major__ >= 10) */
/* there was some CLANG/GCC compilers that have issues with
rbx(ebx) handling in asm blocks in -fPIC mode (__PIC__ is defined).
compiler's <cpuid.h> contains the macro __cpuid() that is similar to our code.
The history of __cpuid() changes in CLANG/GCC:
GCC:
2007: it preserved ebx for (__PIC__ && __i386__)
2013: it preserved rbx and ebx for __PIC__
2014: it doesn't preserves rbx and ebx anymore
we suppose that (__GNUC__ >= 5) fixed that __PIC__ ebx/rbx problem.
CLANG:
2014+: it preserves rbx, but only for 64-bit code. No __PIC__ check.
Why CLANG cares about 64-bit mode only, and doesn't care about ebx (in 32-bit)?
Do we need __PIC__ test for CLANG or we must care about rbx even if
__PIC__ is not defined?
*/
#define ASM_LN "\n"
#if defined(MY_CPU_AMD64) && defined(__PIC__) \
&& ((defined (__GNUC__) && (__GNUC__ < 5)) || defined(__clang__))
#define x86_cpuid_MACRO(p, func) { \
__asm__ __volatile__ ( \
ASM_LN "mov %%rbx, %q1" \
ASM_LN "cpuid" \
ASM_LN "xchg %%rbx, %q1" \
: "=a" ((p)[0]), "=&r" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(0)); }
/* "=&r" selects free register. It can select even rbx, if that register is free.
"=&D" for (RDI) also works, but the code can be larger with "=&D"
"2"(0) means (subFunction = 0),
2 is (zero-based) index in the output constraint list "=c" (ECX). */
#elif defined(MY_CPU_X86) && defined(__PIC__) \
&& ((defined (__GNUC__) && (__GNUC__ < 5)) || defined(__clang__))
#define x86_cpuid_MACRO(p, func) { \
__asm__ __volatile__ ( \
ASM_LN "mov %%ebx, %k1" \
ASM_LN "cpuid" \
ASM_LN "xchg %%ebx, %k1" \
: "=a" ((p)[0]), "=&r" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(0)); }
#else
#define x86_cpuid_MACRO(p, func) { \
__asm__ __volatile__ ( \
ASM_LN "cpuid" \
: "=a" ((p)[0]), "=b" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(0)); }
#endif
void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
{
x86_cpuid_MACRO(p, func)
}
Z7_NO_INLINE
UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
{
#if defined(NEED_CHECK_FOR_CPUID)
#define EFALGS_CPUID_BIT 21
UInt32 a;
__asm__ __volatile__ (
ASM_LN "pushf"
ASM_LN "pushf"
ASM_LN "pop %0"
// ASM_LN "movl %0, %1"
// ASM_LN "xorl $0x200000, %0"
ASM_LN "btc %1, %0"
ASM_LN "push %0"
ASM_LN "popf"
ASM_LN "pushf"
ASM_LN "pop %0"
ASM_LN "xorl (%%esp), %0"
ASM_LN "popf"
ASM_LN
: "=&r" (a) // "=a"
: "i" (EFALGS_CPUID_BIT)
);
if ((a & (1 << EFALGS_CPUID_BIT)) == 0)
return 0;
#endif
{
UInt32 p[4];
x86_cpuid_MACRO(p, 0)
return p[0];
}
}
#undef ASM_LN
#elif !defined(_MSC_VER)
/*
// for gcc/clang and other: we can try to use __cpuid macro:
#include <cpuid.h>
void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
{
__cpuid(func, p[0], p[1], p[2], p[3]);
}
UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
{
return (UInt32)__get_cpuid_max(0, NULL);
}
*/
// for unsupported cpuid:
void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
{
UNUSED_VAR(func)
p[0] = p[1] = p[2] = p[3] = 0;
}
UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
{
return 0;
}
#else // _MSC_VER
#if !defined(MY_CPU_AMD64)
UInt32 __declspec(naked) Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
{
#if defined(NEED_CHECK_FOR_CPUID)
#define EFALGS_CPUID_BIT 21
__asm pushfd
__asm pushfd
/*
__asm pop eax
// __asm mov edx, eax
__asm btc eax, EFALGS_CPUID_BIT
__asm push eax
*/
__asm btc dword ptr [esp], EFALGS_CPUID_BIT
__asm popfd
__asm pushfd
__asm pop eax
// __asm xor eax, edx
__asm xor eax, [esp]
// __asm push edx
__asm popfd
__asm and eax, (1 shl EFALGS_CPUID_BIT)
__asm jz end_func
#endif
__asm push ebx
__asm xor eax, eax // func
__asm xor ecx, ecx // subFunction (optional) for (func == 0)
__asm cpuid
__asm pop ebx
#if defined(NEED_CHECK_FOR_CPUID)
end_func:
#endif
__asm ret 0
}
void __declspec(naked) Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
{
UNUSED_VAR(p)
UNUSED_VAR(func)
__asm push ebx
__asm push edi
__asm mov edi, ecx // p
__asm mov eax, edx // func
__asm xor ecx, ecx // subfunction (optional) for (func == 0)
__asm cpuid
__asm mov [edi ], eax
__asm mov [edi + 4], ebx
__asm mov [edi + 8], ecx
__asm mov [edi + 12], edx
__asm pop edi
__asm pop ebx
__asm ret 0
}
#else // MY_CPU_AMD64
#if _MSC_VER >= 1600
#include <intrin.h>
#define MY_cpuidex __cpuidex
#else
/*
__cpuid (func == (0 or 7)) requires subfunction number in ECX.
MSDN: The __cpuid intrinsic clears the ECX register before calling the cpuid instruction.
__cpuid() in new MSVC clears ECX.
__cpuid() in old MSVC (14.00) x64 doesn't clear ECX
We still can use __cpuid for low (func) values that don't require ECX,
but __cpuid() in old MSVC will be incorrect for some func values: (func == 7).
So here we use the hack for old MSVC to send (subFunction) in ECX register to cpuid instruction,
where ECX value is first parameter for FASTCALL / NO_INLINE func,
So the caller of MY_cpuidex_HACK() sets ECX as subFunction, and
old MSVC for __cpuid() doesn't change ECX and cpuid instruction gets (subFunction) value.
DON'T remove Z7_NO_INLINE and Z7_FASTCALL for MY_cpuidex_HACK(): !!!
*/
static
Z7_NO_INLINE void Z7_FASTCALL MY_cpuidex_HACK(UInt32 subFunction, UInt32 func, int *CPUInfo)
{
UNUSED_VAR(subFunction)
__cpuid(CPUInfo, func);
}
#define MY_cpuidex(info, func, func2) MY_cpuidex_HACK(func2, func, info)
#pragma message("======== MY_cpuidex_HACK WAS USED ========")
#endif // _MSC_VER >= 1600
#if !defined(MY_CPU_AMD64)
/* inlining for __cpuid() in MSVC x86 (32-bit) produces big ineffective code,
so we disable inlining here */
Z7_NO_INLINE
#endif
void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
{
MY_cpuidex((int *)p, (int)func, 0);
}
Z7_NO_INLINE
UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
{
int a[4];
MY_cpuidex(a, 0, 0);
return a[0];
}
#endif // MY_CPU_AMD64
#endif // _MSC_VER
#if defined(NEED_CHECK_FOR_CPUID)
#define CHECK_CPUID_IS_SUPPORTED { if (z7_x86_cpuid_GetMaxFunc() == 0) return 0; }
#else
#define CHECK_CPUID_IS_SUPPORTED
#endif
#undef NEED_CHECK_FOR_CPUID
static
BoolInt x86cpuid_Func_1(UInt32 *p)
{
CHECK_CPUID_IS_SUPPORTED
z7_x86_cpuid(p, 1);
return True;
}
/*
static const UInt32 kVendors[][1] =
{
{ 0x756E6547 }, // , 0x49656E69, 0x6C65746E },
{ 0x68747541 }, // , 0x69746E65, 0x444D4163 },
{ 0x746E6543 } // , 0x48727561, 0x736C7561 }
};
*/
/*
typedef struct
{
UInt32 maxFunc;
UInt32 vendor[3];
UInt32 ver;
UInt32 b;
UInt32 c;
UInt32 d;
} Cx86cpuid;
enum
{
CPU_FIRM_INTEL,
CPU_FIRM_AMD,
CPU_FIRM_VIA
};
int x86cpuid_GetFirm(const Cx86cpuid *p);
#define x86cpuid_ver_GetFamily(ver) (((ver >> 16) & 0xff0) | ((ver >> 8) & 0xf))
#define x86cpuid_ver_GetModel(ver) (((ver >> 12) & 0xf0) | ((ver >> 4) & 0xf))
#define x86cpuid_ver_GetStepping(ver) (ver & 0xf)
int x86cpuid_GetFirm(const Cx86cpuid *p)
{
unsigned i;
for (i = 0; i < sizeof(kVendors) / sizeof(kVendors[0]); i++)
{
const UInt32 *v = kVendors[i];
if (v[0] == p->vendor[0]
// && v[1] == p->vendor[1]
// && v[2] == p->vendor[2]
)
return (int)i;
}
return -1;
}
BoolInt CPU_Is_InOrder()
{
Cx86cpuid p;
UInt32 family, model;
if (!x86cpuid_CheckAndRead(&p))
return True;
family = x86cpuid_ver_GetFamily(p.ver);
model = x86cpuid_ver_GetModel(p.ver);
switch (x86cpuid_GetFirm(&p))
{
case CPU_FIRM_INTEL: return (family < 6 || (family == 6 && (
// In-Order Atom CPU
model == 0x1C // 45 nm, N4xx, D4xx, N5xx, D5xx, 230, 330
|| model == 0x26 // 45 nm, Z6xx
|| model == 0x27 // 32 nm, Z2460
|| model == 0x35 // 32 nm, Z2760
|| model == 0x36 // 32 nm, N2xxx, D2xxx
)));
case CPU_FIRM_AMD: return (family < 5 || (family == 5 && (model < 6 || model == 0xA)));
case CPU_FIRM_VIA: return (family < 6 || (family == 6 && model < 0xF));
}
return False; // v23 : unknown processors are not In-Order
}
*/
#ifdef _WIN32
#include "7zWindows.h"
#endif
#if !defined(MY_CPU_AMD64) && defined(_WIN32)
/* for legacy SSE ia32: there is no user-space cpu instruction to check
that OS supports SSE register storing/restoring on context switches.
So we need some OS-specific function to check that it's safe to use SSE registers.
*/
Z7_FORCE_INLINE
static BoolInt CPU_Sys_Is_SSE_Supported(void)
{
#ifdef _MSC_VER
#pragma warning(push)
#pragma warning(disable : 4996) // `GetVersion': was declared deprecated
#endif
/* low byte is major version of Windows
We suppose that any Windows version since
Windows2000 (major == 5) supports SSE registers */
return (Byte)GetVersion() >= 5;
#if defined(_MSC_VER)
#pragma warning(pop)
#endif
}
#define CHECK_SYS_SSE_SUPPORT if (!CPU_Sys_Is_SSE_Supported()) return False;
#else
#define CHECK_SYS_SSE_SUPPORT
#endif
#if !defined(MY_CPU_AMD64)
BoolInt CPU_IsSupported_CMOV(void)
{
UInt32 a[4];
if (!x86cpuid_Func_1(&a[0]))
return 0;
return (a[3] >> 15) & 1;
}
BoolInt CPU_IsSupported_SSE(void)
{
UInt32 a[4];
CHECK_SYS_SSE_SUPPORT
if (!x86cpuid_Func_1(&a[0]))
return 0;
return (a[3] >> 25) & 1;
}
BoolInt CPU_IsSupported_SSE2(void)
{
UInt32 a[4];
CHECK_SYS_SSE_SUPPORT
if (!x86cpuid_Func_1(&a[0]))
return 0;
return (a[3] >> 26) & 1;
}
#endif
static UInt32 x86cpuid_Func_1_ECX(void)
{
UInt32 a[4];
CHECK_SYS_SSE_SUPPORT
if (!x86cpuid_Func_1(&a[0]))
return 0;
return a[2];
}
BoolInt CPU_IsSupported_AES(void)
{
return (x86cpuid_Func_1_ECX() >> 25) & 1;
}
BoolInt CPU_IsSupported_SSSE3(void)
{
return (x86cpuid_Func_1_ECX() >> 9) & 1;
}
BoolInt CPU_IsSupported_SSE41(void)
{
return (x86cpuid_Func_1_ECX() >> 19) & 1;
}
BoolInt CPU_IsSupported_SHA(void)
{
CHECK_SYS_SSE_SUPPORT
if (z7_x86_cpuid_GetMaxFunc() < 7)
return False;
{
UInt32 d[4];
z7_x86_cpuid(d, 7);
return (d[1] >> 29) & 1;
}
}
/*
MSVC: _xgetbv() intrinsic is available since VS2010SP1.
MSVC also defines (_XCR_XFEATURE_ENABLED_MASK) macro in
<immintrin.h> that we can use or check.
For any 32-bit x86 we can use asm code in MSVC,
but MSVC asm code is huge after compilation.
So _xgetbv() is better
ICC: _xgetbv() intrinsic is available (in what version of ICC?)
ICC defines (__GNUC___) and it supports gnu assembler
also ICC supports MASM style code with -use-msasm switch.
but ICC doesn't support __attribute__((__target__))
GCC/CLANG 9:
_xgetbv() is macro that works via __builtin_ia32_xgetbv()
and we need __attribute__((__target__("xsave")).
But with __target__("xsave") the function will be not
inlined to function that has no __target__("xsave") attribute.
If we want _xgetbv() call inlining, then we should use asm version
instead of calling _xgetbv().
Note:intrinsic is broke before GCC 8.2:
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85684
*/
#if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1100) \
|| defined(_MSC_VER) && (_MSC_VER >= 1600) && (_MSC_FULL_VER >= 160040219) \
|| defined(__GNUC__) && (__GNUC__ >= 9) \
|| defined(__clang__) && (__clang_major__ >= 9)
// we define ATTRIB_XGETBV, if we want to use predefined _xgetbv() from compiler
#if defined(__INTEL_COMPILER)
#define ATTRIB_XGETBV
#elif defined(__GNUC__) || defined(__clang__)
// we don't define ATTRIB_XGETBV here, because asm version is better for inlining.
// #define ATTRIB_XGETBV __attribute__((__target__("xsave")))
#else
#define ATTRIB_XGETBV
#endif
#endif
#if defined(ATTRIB_XGETBV)
#include <immintrin.h>
#endif
// XFEATURE_ENABLED_MASK/XCR0
#define MY_XCR_XFEATURE_ENABLED_MASK 0
#if defined(ATTRIB_XGETBV)
ATTRIB_XGETBV
#endif
static UInt64 x86_xgetbv_0(UInt32 num)
{
#if defined(ATTRIB_XGETBV)
{
return
#if (defined(_MSC_VER))
_xgetbv(num);
#else
__builtin_ia32_xgetbv(
#if !defined(__clang__)
(int)
#endif
num);
#endif
}
#elif defined(__GNUC__) || defined(__clang__) || defined(__SUNPRO_CC)
UInt32 a, d;
#if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4))
__asm__
(
"xgetbv"
: "=a"(a), "=d"(d) : "c"(num) : "cc"
);
#else // is old gcc
__asm__
(
".byte 0x0f, 0x01, 0xd0" "\n\t"
: "=a"(a), "=d"(d) : "c"(num) : "cc"
);
#endif
return ((UInt64)d << 32) | a;
// return a;
#elif defined(_MSC_VER) && !defined(MY_CPU_AMD64)
UInt32 a, d;
__asm {
push eax
push edx
push ecx
mov ecx, num;
// xor ecx, ecx // = MY_XCR_XFEATURE_ENABLED_MASK
_emit 0x0f
_emit 0x01
_emit 0xd0
mov a, eax
mov d, edx
pop ecx
pop edx
pop eax
}
return ((UInt64)d << 32) | a;
// return a;
#else // it's unknown compiler
// #error "Need xgetbv function"
UNUSED_VAR(num)
// for MSVC-X64 we could call external function from external file.
/* Actually we had checked OSXSAVE/AVX in cpuid before.
So it's expected that OS supports at least AVX and below. */
// if (num != MY_XCR_XFEATURE_ENABLED_MASK) return 0; // if not XCR0
return
// (1 << 0) | // x87
(1 << 1) // SSE
| (1 << 2); // AVX
#endif
}
#ifdef _WIN32
/*
Windows versions do not know about new ISA extensions that
can be introduced. But we still can use new extensions,
even if Windows doesn't report about supporting them,
But we can use new extensions, only if Windows knows about new ISA extension
that changes the number or size of registers: SSE, AVX/XSAVE, AVX512
So it's enough to check
MY_PF_AVX_INSTRUCTIONS_AVAILABLE
instead of
MY_PF_AVX2_INSTRUCTIONS_AVAILABLE
*/
#define MY_PF_XSAVE_ENABLED 17
// #define MY_PF_SSSE3_INSTRUCTIONS_AVAILABLE 36
// #define MY_PF_SSE4_1_INSTRUCTIONS_AVAILABLE 37
// #define MY_PF_SSE4_2_INSTRUCTIONS_AVAILABLE 38
// #define MY_PF_AVX_INSTRUCTIONS_AVAILABLE 39
// #define MY_PF_AVX2_INSTRUCTIONS_AVAILABLE 40
// #define MY_PF_AVX512F_INSTRUCTIONS_AVAILABLE 41
#endif
BoolInt CPU_IsSupported_AVX(void)
{
#ifdef _WIN32
if (!IsProcessorFeaturePresent(MY_PF_XSAVE_ENABLED))
return False;
/* PF_AVX_INSTRUCTIONS_AVAILABLE probably is supported starting from
some latest Win10 revisions. But we need AVX in older Windows also.
So we don't use the following check: */
/*
if (!IsProcessorFeaturePresent(MY_PF_AVX_INSTRUCTIONS_AVAILABLE))
return False;
*/
#endif
/*
OS must use new special XSAVE/XRSTOR instructions to save
AVX registers when it required for context switching.
At OS statring:
OS sets CR4.OSXSAVE flag to signal the processor that OS supports the XSAVE extensions.
Also OS sets bitmask in XCR0 register that defines what
registers will be processed by XSAVE instruction:
XCR0.SSE[bit 0] - x87 registers and state
XCR0.SSE[bit 1] - SSE registers and state
XCR0.AVX[bit 2] - AVX registers and state
CR4.OSXSAVE is reflected to CPUID.1:ECX.OSXSAVE[bit 27].
So we can read that bit in user-space.
XCR0 is available for reading in user-space by new XGETBV instruction.
*/
{
const UInt32 c = x86cpuid_Func_1_ECX();
if (0 == (1
& (c >> 28) // AVX instructions are supported by hardware
& (c >> 27))) // OSXSAVE bit: XSAVE and related instructions are enabled by OS.
return False;
}
/* also we can check
CPUID.1:ECX.XSAVE [bit 26] : that shows that
XSAVE, XRESTOR, XSETBV, XGETBV instructions are supported by hardware.
But that check is redundant, because if OSXSAVE bit is set, then XSAVE is also set */
/* If OS have enabled XSAVE extension instructions (OSXSAVE == 1),
in most cases we expect that OS also will support storing/restoring
for AVX and SSE states at least.
But to be ensure for that we call user-space instruction
XGETBV(0) to get XCR0 value that contains bitmask that defines
what exact states(registers) OS have enabled for storing/restoring.
*/
{
const UInt32 bm = (UInt32)x86_xgetbv_0(MY_XCR_XFEATURE_ENABLED_MASK);
// printf("\n=== XGetBV=%d\n", bm);
return 1
& (bm >> 1) // SSE state is supported (set by OS) for storing/restoring
& (bm >> 2); // AVX state is supported (set by OS) for storing/restoring
}
// since Win7SP1: we can use GetEnabledXStateFeatures();
}
BoolInt CPU_IsSupported_AVX2(void)
{
if (!CPU_IsSupported_AVX())
return False;
if (z7_x86_cpuid_GetMaxFunc() < 7)
return False;
{
UInt32 d[4];
z7_x86_cpuid(d, 7);
// printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]);
return 1
& (d[1] >> 5); // avx2
}
}
BoolInt CPU_IsSupported_VAES_AVX2(void)
{
if (!CPU_IsSupported_AVX())
return False;
if (z7_x86_cpuid_GetMaxFunc() < 7)
return False;
{
UInt32 d[4];
z7_x86_cpuid(d, 7);
// printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]);
return 1
& (d[1] >> 5) // avx2
// & (d[1] >> 31) // avx512vl
& (d[2] >> 9); // vaes // VEX-256/EVEX
}
}
BoolInt CPU_IsSupported_PageGB(void)
{
CHECK_CPUID_IS_SUPPORTED
{
UInt32 d[4];
z7_x86_cpuid(d, 0x80000000);
if (d[0] < 0x80000001)
return False;
z7_x86_cpuid(d, 0x80000001);
return (d[3] >> 26) & 1;
}
}
#elif defined(MY_CPU_ARM_OR_ARM64)
#ifdef _WIN32
#include "7zWindows.h"
BoolInt CPU_IsSupported_CRC32(void) { return IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
BoolInt CPU_IsSupported_CRYPTO(void) { return IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
BoolInt CPU_IsSupported_NEON(void) { return IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
#else
#if defined(__APPLE__)
/*
#include <stdio.h>
#include <string.h>
static void Print_sysctlbyname(const char *name)
{
size_t bufSize = 256;
char buf[256];
int res = sysctlbyname(name, &buf, &bufSize, NULL, 0);
{
int i;
printf("\nres = %d : %s : '%s' : bufSize = %d, numeric", res, name, buf, (unsigned)bufSize);
for (i = 0; i < 20; i++)
printf(" %2x", (unsigned)(Byte)buf[i]);
}
}
*/
/*
Print_sysctlbyname("hw.pagesize");
Print_sysctlbyname("machdep.cpu.brand_string");
*/
static BoolInt z7_sysctlbyname_Get_BoolInt(const char *name)
{
UInt32 val = 0;
if (z7_sysctlbyname_Get_UInt32(name, &val) == 0 && val == 1)
return 1;
return 0;
}
BoolInt CPU_IsSupported_CRC32(void)
{
return z7_sysctlbyname_Get_BoolInt("hw.optional.armv8_crc32");
}
BoolInt CPU_IsSupported_NEON(void)
{
return z7_sysctlbyname_Get_BoolInt("hw.optional.neon");
}
#ifdef MY_CPU_ARM64
#define APPLE_CRYPTO_SUPPORT_VAL 1
#else
#define APPLE_CRYPTO_SUPPORT_VAL 0
#endif
BoolInt CPU_IsSupported_SHA1(void) { return APPLE_CRYPTO_SUPPORT_VAL; }
BoolInt CPU_IsSupported_SHA2(void) { return APPLE_CRYPTO_SUPPORT_VAL; }
BoolInt CPU_IsSupported_AES (void) { return APPLE_CRYPTO_SUPPORT_VAL; }
#else // __APPLE__
#include <sys/auxv.h>
#define USE_HWCAP
#ifdef USE_HWCAP
#include <asm/hwcap.h>
#define MY_HWCAP_CHECK_FUNC_2(name1, name2) \
BoolInt CPU_IsSupported_ ## name1() { return (getauxval(AT_HWCAP) & (HWCAP_ ## name2)) ? 1 : 0; }
#ifdef MY_CPU_ARM64
#define MY_HWCAP_CHECK_FUNC(name) \
MY_HWCAP_CHECK_FUNC_2(name, name)
MY_HWCAP_CHECK_FUNC_2(NEON, ASIMD)
// MY_HWCAP_CHECK_FUNC (ASIMD)
#elif defined(MY_CPU_ARM)
#define MY_HWCAP_CHECK_FUNC(name) \
BoolInt CPU_IsSupported_ ## name() { return (getauxval(AT_HWCAP2) & (HWCAP2_ ## name)) ? 1 : 0; }
MY_HWCAP_CHECK_FUNC_2(NEON, NEON)
#endif
#else // USE_HWCAP
#define MY_HWCAP_CHECK_FUNC(name) \
BoolInt CPU_IsSupported_ ## name() { return 0; }
MY_HWCAP_CHECK_FUNC(NEON)
#endif // USE_HWCAP
MY_HWCAP_CHECK_FUNC (CRC32)
MY_HWCAP_CHECK_FUNC (SHA1)
MY_HWCAP_CHECK_FUNC (SHA2)
MY_HWCAP_CHECK_FUNC (AES)
#endif // __APPLE__
#endif // _WIN32
#endif // MY_CPU_ARM_OR_ARM64
#ifdef __APPLE__
#include <sys/sysctl.h>
int z7_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize)
{
return sysctlbyname(name, buf, bufSize, NULL, 0);
}
int z7_sysctlbyname_Get_UInt32(const char *name, UInt32 *val)
{
size_t bufSize = sizeof(*val);
const int res = z7_sysctlbyname_Get(name, val, &bufSize);
if (res == 0 && bufSize != sizeof(*val))
return EFAULT;
return res;
}
#endif

523
extern/lzma/CpuArch.h vendored
View File

@@ -1,523 +0,0 @@
/* CpuArch.h -- CPU specific code
2023-04-02 : Igor Pavlov : Public domain */
#ifndef ZIP7_INC_CPU_ARCH_H
#define ZIP7_INC_CPU_ARCH_H
#include "7zTypes.h"
EXTERN_C_BEGIN
/*
MY_CPU_LE means that CPU is LITTLE ENDIAN.
MY_CPU_BE means that CPU is BIG ENDIAN.
If MY_CPU_LE and MY_CPU_BE are not defined, we don't know about ENDIANNESS of platform.
MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned memory accesses.
MY_CPU_64BIT means that processor can work with 64-bit registers.
MY_CPU_64BIT can be used to select fast code branch
MY_CPU_64BIT doesn't mean that (sizeof(void *) == 8)
*/
#if defined(_M_X64) \
|| defined(_M_AMD64) \
|| defined(__x86_64__) \
|| defined(__AMD64__) \
|| defined(__amd64__)
#define MY_CPU_AMD64
#ifdef __ILP32__
#define MY_CPU_NAME "x32"
#define MY_CPU_SIZEOF_POINTER 4
#else
#define MY_CPU_NAME "x64"
#define MY_CPU_SIZEOF_POINTER 8
#endif
#define MY_CPU_64BIT
#endif
#if defined(_M_IX86) \
|| defined(__i386__)
#define MY_CPU_X86
#define MY_CPU_NAME "x86"
/* #define MY_CPU_32BIT */
#define MY_CPU_SIZEOF_POINTER 4
#endif
#if defined(_M_ARM64) \
|| defined(__AARCH64EL__) \
|| defined(__AARCH64EB__) \
|| defined(__aarch64__)
#define MY_CPU_ARM64
#ifdef __ILP32__
#define MY_CPU_NAME "arm64-32"
#define MY_CPU_SIZEOF_POINTER 4
#else
#define MY_CPU_NAME "arm64"
#define MY_CPU_SIZEOF_POINTER 8
#endif
#define MY_CPU_64BIT
#endif
#if defined(_M_ARM) \
|| defined(_M_ARM_NT) \
|| defined(_M_ARMT) \
|| defined(__arm__) \
|| defined(__thumb__) \
|| defined(__ARMEL__) \
|| defined(__ARMEB__) \
|| defined(__THUMBEL__) \
|| defined(__THUMBEB__)
#define MY_CPU_ARM
#if defined(__thumb__) || defined(__THUMBEL__) || defined(_M_ARMT)
#define MY_CPU_ARMT
#define MY_CPU_NAME "armt"
#else
#define MY_CPU_ARM32
#define MY_CPU_NAME "arm"
#endif
/* #define MY_CPU_32BIT */
#define MY_CPU_SIZEOF_POINTER 4
#endif
#if defined(_M_IA64) \
|| defined(__ia64__)
#define MY_CPU_IA64
#define MY_CPU_NAME "ia64"
#define MY_CPU_64BIT
#endif
#if defined(__mips64) \
|| defined(__mips64__) \
|| (defined(__mips) && (__mips == 64 || __mips == 4 || __mips == 3))
#define MY_CPU_NAME "mips64"
#define MY_CPU_64BIT
#elif defined(__mips__)
#define MY_CPU_NAME "mips"
/* #define MY_CPU_32BIT */
#endif
#if defined(__ppc64__) \
|| defined(__powerpc64__) \
|| defined(__ppc__) \
|| defined(__powerpc__) \
|| defined(__PPC__) \
|| defined(_POWER)
#define MY_CPU_PPC_OR_PPC64
#if defined(__ppc64__) \
|| defined(__powerpc64__) \
|| defined(_LP64) \
|| defined(__64BIT__)
#ifdef __ILP32__
#define MY_CPU_NAME "ppc64-32"
#define MY_CPU_SIZEOF_POINTER 4
#else
#define MY_CPU_NAME "ppc64"
#define MY_CPU_SIZEOF_POINTER 8
#endif
#define MY_CPU_64BIT
#else
#define MY_CPU_NAME "ppc"
#define MY_CPU_SIZEOF_POINTER 4
/* #define MY_CPU_32BIT */
#endif
#endif
#if defined(__riscv) \
|| defined(__riscv__)
#if __riscv_xlen == 32
#define MY_CPU_NAME "riscv32"
#elif __riscv_xlen == 64
#define MY_CPU_NAME "riscv64"
#else
#define MY_CPU_NAME "riscv"
#endif
#endif
#if defined(MY_CPU_X86) || defined(MY_CPU_AMD64)
#define MY_CPU_X86_OR_AMD64
#endif
#if defined(MY_CPU_ARM) || defined(MY_CPU_ARM64)
#define MY_CPU_ARM_OR_ARM64
#endif
#ifdef _WIN32
#ifdef MY_CPU_ARM
#define MY_CPU_ARM_LE
#endif
#ifdef MY_CPU_ARM64
#define MY_CPU_ARM64_LE
#endif
#ifdef _M_IA64
#define MY_CPU_IA64_LE
#endif
#endif
#if defined(MY_CPU_X86_OR_AMD64) \
|| defined(MY_CPU_ARM_LE) \
|| defined(MY_CPU_ARM64_LE) \
|| defined(MY_CPU_IA64_LE) \
|| defined(__LITTLE_ENDIAN__) \
|| defined(__ARMEL__) \
|| defined(__THUMBEL__) \
|| defined(__AARCH64EL__) \
|| defined(__MIPSEL__) \
|| defined(__MIPSEL) \
|| defined(_MIPSEL) \
|| defined(__BFIN__) \
|| (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__))
#define MY_CPU_LE
#endif
#if defined(__BIG_ENDIAN__) \
|| defined(__ARMEB__) \
|| defined(__THUMBEB__) \
|| defined(__AARCH64EB__) \
|| defined(__MIPSEB__) \
|| defined(__MIPSEB) \
|| defined(_MIPSEB) \
|| defined(__m68k__) \
|| defined(__s390__) \
|| defined(__s390x__) \
|| defined(__zarch__) \
|| (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__))
#define MY_CPU_BE
#endif
#if defined(MY_CPU_LE) && defined(MY_CPU_BE)
#error Stop_Compiling_Bad_Endian
#endif
#if !defined(MY_CPU_LE) && !defined(MY_CPU_BE)
#error Stop_Compiling_CPU_ENDIAN_must_be_detected_at_compile_time
#endif
#if defined(MY_CPU_32BIT) && defined(MY_CPU_64BIT)
#error Stop_Compiling_Bad_32_64_BIT
#endif
#ifdef __SIZEOF_POINTER__
#ifdef MY_CPU_SIZEOF_POINTER
#if MY_CPU_SIZEOF_POINTER != __SIZEOF_POINTER__
#error Stop_Compiling_Bad_MY_CPU_PTR_SIZE
#endif
#else
#define MY_CPU_SIZEOF_POINTER __SIZEOF_POINTER__
#endif
#endif
#if defined(MY_CPU_SIZEOF_POINTER) && (MY_CPU_SIZEOF_POINTER == 4)
#if defined (_LP64)
#error Stop_Compiling_Bad_MY_CPU_PTR_SIZE
#endif
#endif
#ifdef _MSC_VER
#if _MSC_VER >= 1300
#define MY_CPU_pragma_pack_push_1 __pragma(pack(push, 1))
#define MY_CPU_pragma_pop __pragma(pack(pop))
#else
#define MY_CPU_pragma_pack_push_1
#define MY_CPU_pragma_pop
#endif
#else
#ifdef __xlC__
#define MY_CPU_pragma_pack_push_1 _Pragma("pack(1)")
#define MY_CPU_pragma_pop _Pragma("pack()")
#else
#define MY_CPU_pragma_pack_push_1 _Pragma("pack(push, 1)")
#define MY_CPU_pragma_pop _Pragma("pack(pop)")
#endif
#endif
#ifndef MY_CPU_NAME
#ifdef MY_CPU_LE
#define MY_CPU_NAME "LE"
#elif defined(MY_CPU_BE)
#define MY_CPU_NAME "BE"
#else
/*
#define MY_CPU_NAME ""
*/
#endif
#endif
#ifdef __has_builtin
#define Z7_has_builtin(x) __has_builtin(x)
#else
#define Z7_has_builtin(x) 0
#endif
#define Z7_BSWAP32_CONST(v) \
( (((UInt32)(v) << 24) ) \
| (((UInt32)(v) << 8) & (UInt32)0xff0000) \
| (((UInt32)(v) >> 8) & (UInt32)0xff00 ) \
| (((UInt32)(v) >> 24) ))
#if defined(_MSC_VER) && (_MSC_VER >= 1300)
#include <stdlib.h>
/* Note: these macros will use bswap instruction (486), that is unsupported in 386 cpu */
#pragma intrinsic(_byteswap_ushort)
#pragma intrinsic(_byteswap_ulong)
#pragma intrinsic(_byteswap_uint64)
#define Z7_BSWAP16(v) _byteswap_ushort(v)
#define Z7_BSWAP32(v) _byteswap_ulong (v)
#define Z7_BSWAP64(v) _byteswap_uint64(v)
#define Z7_CPU_FAST_BSWAP_SUPPORTED
#elif (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) \
|| (defined(__clang__) && Z7_has_builtin(__builtin_bswap16))
#define Z7_BSWAP16(v) __builtin_bswap16(v)
#define Z7_BSWAP32(v) __builtin_bswap32(v)
#define Z7_BSWAP64(v) __builtin_bswap64(v)
#define Z7_CPU_FAST_BSWAP_SUPPORTED
#else
#define Z7_BSWAP16(v) ((UInt16) \
( ((UInt32)(v) << 8) \
| ((UInt32)(v) >> 8) \
))
#define Z7_BSWAP32(v) Z7_BSWAP32_CONST(v)
#define Z7_BSWAP64(v) \
( ( ( (UInt64)(v) ) << 8 * 7 ) \
| ( ( (UInt64)(v) & ((UInt32)0xff << 8 * 1) ) << 8 * 5 ) \
| ( ( (UInt64)(v) & ((UInt32)0xff << 8 * 2) ) << 8 * 3 ) \
| ( ( (UInt64)(v) & ((UInt32)0xff << 8 * 3) ) << 8 * 1 ) \
| ( ( (UInt64)(v) >> 8 * 1 ) & ((UInt32)0xff << 8 * 3) ) \
| ( ( (UInt64)(v) >> 8 * 3 ) & ((UInt32)0xff << 8 * 2) ) \
| ( ( (UInt64)(v) >> 8 * 5 ) & ((UInt32)0xff << 8 * 1) ) \
| ( ( (UInt64)(v) >> 8 * 7 ) ) \
)
#endif
#ifdef MY_CPU_LE
#if defined(MY_CPU_X86_OR_AMD64) \
|| defined(MY_CPU_ARM64)
#define MY_CPU_LE_UNALIGN
#define MY_CPU_LE_UNALIGN_64
#elif defined(__ARM_FEATURE_UNALIGNED)
/* gcc9 for 32-bit arm can use LDRD instruction that requires 32-bit alignment.
So we can't use unaligned 64-bit operations. */
#define MY_CPU_LE_UNALIGN
#endif
#endif
#ifdef MY_CPU_LE_UNALIGN
#define GetUi16(p) (*(const UInt16 *)(const void *)(p))
#define GetUi32(p) (*(const UInt32 *)(const void *)(p))
#ifdef MY_CPU_LE_UNALIGN_64
#define GetUi64(p) (*(const UInt64 *)(const void *)(p))
#define SetUi64(p, v) { *(UInt64 *)(void *)(p) = (v); }
#endif
#define SetUi16(p, v) { *(UInt16 *)(void *)(p) = (v); }
#define SetUi32(p, v) { *(UInt32 *)(void *)(p) = (v); }
#else
#define GetUi16(p) ( (UInt16) ( \
((const Byte *)(p))[0] | \
((UInt16)((const Byte *)(p))[1] << 8) ))
#define GetUi32(p) ( \
((const Byte *)(p))[0] | \
((UInt32)((const Byte *)(p))[1] << 8) | \
((UInt32)((const Byte *)(p))[2] << 16) | \
((UInt32)((const Byte *)(p))[3] << 24))
#define SetUi16(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \
_ppp_[0] = (Byte)_vvv_; \
_ppp_[1] = (Byte)(_vvv_ >> 8); }
#define SetUi32(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \
_ppp_[0] = (Byte)_vvv_; \
_ppp_[1] = (Byte)(_vvv_ >> 8); \
_ppp_[2] = (Byte)(_vvv_ >> 16); \
_ppp_[3] = (Byte)(_vvv_ >> 24); }
#endif
#ifndef GetUi64
#define GetUi64(p) (GetUi32(p) | ((UInt64)GetUi32(((const Byte *)(p)) + 4) << 32))
#endif
#ifndef SetUi64
#define SetUi64(p, v) { Byte *_ppp2_ = (Byte *)(p); UInt64 _vvv2_ = (v); \
SetUi32(_ppp2_ , (UInt32)_vvv2_) \
SetUi32(_ppp2_ + 4, (UInt32)(_vvv2_ >> 32)) }
#endif
#if defined(MY_CPU_LE_UNALIGN) && defined(Z7_CPU_FAST_BSWAP_SUPPORTED)
#define GetBe32(p) Z7_BSWAP32 (*(const UInt32 *)(const void *)(p))
#define SetBe32(p, v) { (*(UInt32 *)(void *)(p)) = Z7_BSWAP32(v); }
#if defined(MY_CPU_LE_UNALIGN_64)
#define GetBe64(p) Z7_BSWAP64 (*(const UInt64 *)(const void *)(p))
#endif
#else
#define GetBe32(p) ( \
((UInt32)((const Byte *)(p))[0] << 24) | \
((UInt32)((const Byte *)(p))[1] << 16) | \
((UInt32)((const Byte *)(p))[2] << 8) | \
((const Byte *)(p))[3] )
#define SetBe32(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \
_ppp_[0] = (Byte)(_vvv_ >> 24); \
_ppp_[1] = (Byte)(_vvv_ >> 16); \
_ppp_[2] = (Byte)(_vvv_ >> 8); \
_ppp_[3] = (Byte)_vvv_; }
#endif
#ifndef GetBe64
#define GetBe64(p) (((UInt64)GetBe32(p) << 32) | GetBe32(((const Byte *)(p)) + 4))
#endif
#ifndef GetBe16
#define GetBe16(p) ( (UInt16) ( \
((UInt16)((const Byte *)(p))[0] << 8) | \
((const Byte *)(p))[1] ))
#endif
#if defined(MY_CPU_BE)
#define Z7_CONV_BE_TO_NATIVE_CONST32(v) (v)
#define Z7_CONV_LE_TO_NATIVE_CONST32(v) Z7_BSWAP32_CONST(v)
#define Z7_CONV_NATIVE_TO_BE_32(v) (v)
#elif defined(MY_CPU_LE)
#define Z7_CONV_BE_TO_NATIVE_CONST32(v) Z7_BSWAP32_CONST(v)
#define Z7_CONV_LE_TO_NATIVE_CONST32(v) (v)
#define Z7_CONV_NATIVE_TO_BE_32(v) Z7_BSWAP32(v)
#else
#error Stop_Compiling_Unknown_Endian_CONV
#endif
#if defined(MY_CPU_BE)
#define GetBe32a(p) (*(const UInt32 *)(const void *)(p))
#define GetBe16a(p) (*(const UInt16 *)(const void *)(p))
#define SetBe32a(p, v) { *(UInt32 *)(void *)(p) = (v); }
#define SetBe16a(p, v) { *(UInt16 *)(void *)(p) = (v); }
#define GetUi32a(p) GetUi32(p)
#define GetUi16a(p) GetUi16(p)
#define SetUi32a(p, v) SetUi32(p, v)
#define SetUi16a(p, v) SetUi16(p, v)
#elif defined(MY_CPU_LE)
#define GetUi32a(p) (*(const UInt32 *)(const void *)(p))
#define GetUi16a(p) (*(const UInt16 *)(const void *)(p))
#define SetUi32a(p, v) { *(UInt32 *)(void *)(p) = (v); }
#define SetUi16a(p, v) { *(UInt16 *)(void *)(p) = (v); }
#define GetBe32a(p) GetBe32(p)
#define GetBe16a(p) GetBe16(p)
#define SetBe32a(p, v) SetBe32(p, v)
#define SetBe16a(p, v) SetBe16(p, v)
#else
#error Stop_Compiling_Unknown_Endian_CPU_a
#endif
#if defined(MY_CPU_X86_OR_AMD64) \
|| defined(MY_CPU_ARM_OR_ARM64) \
|| defined(MY_CPU_PPC_OR_PPC64)
#define Z7_CPU_FAST_ROTATE_SUPPORTED
#endif
#ifdef MY_CPU_X86_OR_AMD64
void Z7_FASTCALL z7_x86_cpuid(UInt32 a[4], UInt32 function);
UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void);
#if defined(MY_CPU_AMD64)
#define Z7_IF_X86_CPUID_SUPPORTED
#else
#define Z7_IF_X86_CPUID_SUPPORTED if (z7_x86_cpuid_GetMaxFunc())
#endif
BoolInt CPU_IsSupported_AES(void);
BoolInt CPU_IsSupported_AVX(void);
BoolInt CPU_IsSupported_AVX2(void);
BoolInt CPU_IsSupported_VAES_AVX2(void);
BoolInt CPU_IsSupported_CMOV(void);
BoolInt CPU_IsSupported_SSE(void);
BoolInt CPU_IsSupported_SSE2(void);
BoolInt CPU_IsSupported_SSSE3(void);
BoolInt CPU_IsSupported_SSE41(void);
BoolInt CPU_IsSupported_SHA(void);
BoolInt CPU_IsSupported_PageGB(void);
#elif defined(MY_CPU_ARM_OR_ARM64)
BoolInt CPU_IsSupported_CRC32(void);
BoolInt CPU_IsSupported_NEON(void);
#if defined(_WIN32)
BoolInt CPU_IsSupported_CRYPTO(void);
#define CPU_IsSupported_SHA1 CPU_IsSupported_CRYPTO
#define CPU_IsSupported_SHA2 CPU_IsSupported_CRYPTO
#define CPU_IsSupported_AES CPU_IsSupported_CRYPTO
#else
BoolInt CPU_IsSupported_SHA1(void);
BoolInt CPU_IsSupported_SHA2(void);
BoolInt CPU_IsSupported_AES(void);
#endif
#endif
#if defined(__APPLE__)
int z7_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize);
int z7_sysctlbyname_Get_UInt32(const char *name, UInt32 *val);
#endif
EXTERN_C_END
#endif

1732
extern/lzma/LzFind.c vendored
View File

@@ -1,166 +1,70 @@
/* LzFind.c -- Match finder for LZ algorithms
2023-03-14 : Igor Pavlov : Public domain */
#include "Precomp.h"
2008-10-04 : Igor Pavlov : Public domain */
#include <string.h>
// #include <stdio.h>
#include "CpuArch.h"
#include "LzFind.h"
#include "LzHash.h"
#define kBlockMoveAlign (1 << 7) // alignment for memmove()
#define kBlockSizeAlign (1 << 16) // alignment for block allocation
#define kBlockSizeReserveMin (1 << 24) // it's 1/256 from 4 GB dictinary
#define kEmptyHashValue 0
#define kMaxValForNormalize ((UInt32)0xFFFFFFFF)
#define kNormalizeStepMin (1 << 10) /* it must be power of 2 */
#define kNormalizeMask (~(kNormalizeStepMin - 1))
#define kMaxHistorySize ((UInt32)3 << 30)
#define kMaxValForNormalize ((UInt32)0)
// #define kMaxValForNormalize ((UInt32)(1 << 20) + 0xfff) // for debug
#define kStartMaxLen 3
// #define kNormalizeAlign (1 << 7) // alignment for speculated accesses
#define GET_AVAIL_BYTES(p) \
Inline_MatchFinder_GetNumAvailableBytes(p)
// #define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size)
#define kFix5HashSize kFix4HashSize
/*
HASH2_CALC:
if (hv) match, then cur[0] and cur[1] also match
*/
#define HASH2_CALC hv = GetUi16(cur);
// (crc[0 ... 255] & 0xFF) provides one-to-one correspondence to [0 ... 255]
/*
HASH3_CALC:
if (cur[0]) and (h2) match, then cur[1] also match
if (cur[0]) and (hv) match, then cur[1] and cur[2] also match
*/
#define HASH3_CALC { \
UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
h2 = temp & (kHash2Size - 1); \
hv = (temp ^ ((UInt32)cur[2] << 8)) & p->hashMask; }
#define HASH4_CALC { \
UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
h2 = temp & (kHash2Size - 1); \
temp ^= ((UInt32)cur[2] << 8); \
h3 = temp & (kHash3Size - 1); \
hv = (temp ^ (p->crc[cur[3]] << kLzHash_CrcShift_1)) & p->hashMask; }
#define HASH5_CALC { \
UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
h2 = temp & (kHash2Size - 1); \
temp ^= ((UInt32)cur[2] << 8); \
h3 = temp & (kHash3Size - 1); \
temp ^= (p->crc[cur[3]] << kLzHash_CrcShift_1); \
/* h4 = temp & p->hash4Mask; */ /* (kHash4Size - 1); */ \
hv = (temp ^ (p->crc[cur[4]] << kLzHash_CrcShift_2)) & p->hashMask; }
#define HASH_ZIP_CALC hv = ((cur[2] | ((UInt32)cur[0] << 8)) ^ p->crc[cur[1]]) & 0xFFFF;
static void LzInWindow_Free(CMatchFinder *p, ISzAllocPtr alloc)
static void LzInWindow_Free(CMatchFinder *p, ISzAlloc *alloc)
{
// if (!p->directInput)
if (!p->directInput)
{
ISzAlloc_Free(alloc, p->bufBase);
p->bufBase = NULL;
alloc->Free(alloc, p->bufferBase);
p->bufferBase = 0;
}
}
/* keepSizeBefore + keepSizeAfter + keepSizeReserv must be < 4G) */
static int LzInWindow_Create2(CMatchFinder *p, UInt32 blockSize, ISzAllocPtr alloc)
static int LzInWindow_Create(CMatchFinder *p, UInt32 keepSizeReserv, ISzAlloc *alloc)
{
if (blockSize == 0)
return 0;
if (!p->bufBase || p->blockSize != blockSize)
UInt32 blockSize = p->keepSizeBefore + p->keepSizeAfter + keepSizeReserv;
if (p->directInput)
{
p->blockSize = blockSize;
return 1;
}
if (p->bufferBase == 0 || p->blockSize != blockSize)
{
// size_t blockSizeT;
LzInWindow_Free(p, alloc);
p->blockSize = blockSize;
// blockSizeT = blockSize;
// printf("\nblockSize = 0x%x\n", blockSize);
/*
#if defined _WIN64
// we can allocate 4GiB, but still use UInt32 for (p->blockSize)
// we use UInt32 type for (p->blockSize), because
// we don't want to wrap over 4 GiB,
// when we use (p->streamPos - p->pos) that is UInt32.
if (blockSize >= (UInt32)0 - (UInt32)kBlockSizeAlign)
{
blockSizeT = ((size_t)1 << 32);
printf("\nchanged to blockSizeT = 4GiB\n");
}
#endif
*/
p->bufBase = (Byte *)ISzAlloc_Alloc(alloc, blockSize);
// printf("\nbufferBase = %p\n", p->bufBase);
// return 0; // for debug
p->bufferBase = (Byte *)alloc->Alloc(alloc, (size_t)blockSize);
}
return (p->bufBase != NULL);
return (p->bufferBase != 0);
}
static const Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return p->buffer; }
Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return p->buffer; }
Byte MatchFinder_GetIndexByte(CMatchFinder *p, Int32 index) { return p->buffer[index]; }
static UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder *p) { return GET_AVAIL_BYTES(p); }
UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder *p) { return p->streamPos - p->pos; }
void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue)
{
p->posLimit -= subValue;
p->pos -= subValue;
p->streamPos -= subValue;
}
Z7_NO_INLINE
static void MatchFinder_ReadBlock(CMatchFinder *p)
{
if (p->streamEndWasReached || p->result != SZ_OK)
return;
/* We use (p->streamPos - p->pos) value.
(p->streamPos < p->pos) is allowed. */
if (p->directInput)
{
UInt32 curSize = 0xFFFFFFFF - GET_AVAIL_BYTES(p);
if (curSize > p->directInputRem)
curSize = (UInt32)p->directInputRem;
p->streamPos += curSize;
p->directInputRem -= curSize;
if (p->directInputRem == 0)
p->streamEndWasReached = 1;
return;
}
for (;;)
{
const Byte *dest = p->buffer + GET_AVAIL_BYTES(p);
size_t size = (size_t)(p->bufBase + p->blockSize - dest);
Byte *dest = p->buffer + (p->streamPos - p->pos);
size_t size = (p->bufferBase + p->blockSize - dest);
if (size == 0)
{
/* we call ReadBlock() after NeedMove() and MoveBlock().
NeedMove() and MoveBlock() povide more than (keepSizeAfter)
to the end of (blockSize).
So we don't execute this branch in normal code flow.
We can go here, if we will call ReadBlock() before NeedMove(), MoveBlock().
*/
// p->result = SZ_ERROR_FAIL; // we can show error here
return;
}
// #define kRead 3
// if (size > kRead) size = kRead; // for debug
/*
// we need cast (Byte *)dest.
#ifdef __clang__
#pragma GCC diagnostic ignored "-Wcast-qual"
#endif
*/
p->result = ISeqInStream_Read(p->stream,
p->bufBase + (dest - p->bufBase), &size);
p->result = p->stream->Read(p->stream, dest, &size);
if (p->result != SZ_OK)
return;
if (size == 0)
@@ -169,60 +73,47 @@ static void MatchFinder_ReadBlock(CMatchFinder *p)
return;
}
p->streamPos += (UInt32)size;
if (GET_AVAIL_BYTES(p) > p->keepSizeAfter)
if (p->streamPos - p->pos > p->keepSizeAfter)
return;
/* here and in another (p->keepSizeAfter) checks we keep on 1 byte more than was requested by Create() function
(GET_AVAIL_BYTES(p) >= p->keepSizeAfter) - minimal required size */
}
// on exit: (p->result != SZ_OK || p->streamEndWasReached || GET_AVAIL_BYTES(p) > p->keepSizeAfter)
}
Z7_NO_INLINE
void MatchFinder_MoveBlock(CMatchFinder *p)
{
const size_t offset = (size_t)(p->buffer - p->bufBase) - p->keepSizeBefore;
const size_t keepBefore = (offset & (kBlockMoveAlign - 1)) + p->keepSizeBefore;
p->buffer = p->bufBase + keepBefore;
memmove(p->bufBase,
p->bufBase + (offset & ~((size_t)kBlockMoveAlign - 1)),
keepBefore + (size_t)GET_AVAIL_BYTES(p));
memmove(p->bufferBase,
p->buffer - p->keepSizeBefore,
(size_t)(p->streamPos - p->pos + p->keepSizeBefore));
p->buffer = p->bufferBase + p->keepSizeBefore;
}
/* We call MoveBlock() before ReadBlock().
So MoveBlock() can be wasteful operation, if the whole input data
can fit in current block even without calling MoveBlock().
in important case where (dataSize <= historySize)
condition (p->blockSize > dataSize + p->keepSizeAfter) is met
So there is no MoveBlock() in that case case.
*/
int MatchFinder_NeedMove(CMatchFinder *p)
{
if (p->directInput)
return 0;
if (p->streamEndWasReached || p->result != SZ_OK)
return 0;
return ((size_t)(p->bufBase + p->blockSize - p->buffer) <= p->keepSizeAfter);
/* if (p->streamEndWasReached) return 0; */
return ((size_t)(p->bufferBase + p->blockSize - p->buffer) <= p->keepSizeAfter);
}
void MatchFinder_ReadIfRequired(CMatchFinder *p)
{
if (p->keepSizeAfter >= GET_AVAIL_BYTES(p))
if (p->streamEndWasReached)
return;
if (p->keepSizeAfter >= p->streamPos - p->pos)
MatchFinder_ReadBlock(p);
}
static void MatchFinder_CheckAndMoveAndRead(CMatchFinder *p)
{
if (MatchFinder_NeedMove(p))
MatchFinder_MoveBlock(p);
MatchFinder_ReadBlock(p);
}
static void MatchFinder_SetDefaultSettings(CMatchFinder *p)
{
p->cutValue = 32;
p->btMode = 1;
p->numHashBytes = 4;
p->numHashBytes_Min = 2;
p->numHashOutBits = 0;
/* p->skipModeBits = 0; */
p->directInput = 0;
p->bigHash = 0;
}
@@ -230,657 +121,204 @@ static void MatchFinder_SetDefaultSettings(CMatchFinder *p)
void MatchFinder_Construct(CMatchFinder *p)
{
unsigned i;
p->buffer = NULL;
p->bufBase = NULL;
UInt32 i;
p->bufferBase = 0;
p->directInput = 0;
p->stream = NULL;
p->hash = NULL;
p->expectedDataSize = (UInt64)(Int64)-1;
p->hash = 0;
MatchFinder_SetDefaultSettings(p);
for (i = 0; i < 256; i++)
{
UInt32 r = (UInt32)i;
unsigned j;
UInt32 r = i;
int j;
for (j = 0; j < 8; j++)
r = (r >> 1) ^ (kCrcPoly & ((UInt32)0 - (r & 1)));
r = (r >> 1) ^ (kCrcPoly & ~((r & 1) - 1));
p->crc[i] = r;
}
}
#undef kCrcPoly
static void MatchFinder_FreeThisClassMemory(CMatchFinder *p, ISzAllocPtr alloc)
static void MatchFinder_FreeThisClassMemory(CMatchFinder *p, ISzAlloc *alloc)
{
ISzAlloc_Free(alloc, p->hash);
p->hash = NULL;
alloc->Free(alloc, p->hash);
p->hash = 0;
}
void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc)
void MatchFinder_Free(CMatchFinder *p, ISzAlloc *alloc)
{
MatchFinder_FreeThisClassMemory(p, alloc);
LzInWindow_Free(p, alloc);
}
static CLzRef* AllocRefs(size_t num, ISzAllocPtr alloc)
static CLzRef* AllocRefs(UInt32 num, ISzAlloc *alloc)
{
const size_t sizeInBytes = (size_t)num * sizeof(CLzRef);
size_t sizeInBytes = (size_t)num * sizeof(CLzRef);
if (sizeInBytes / sizeof(CLzRef) != num)
return NULL;
return (CLzRef *)ISzAlloc_Alloc(alloc, sizeInBytes);
}
#if (kBlockSizeReserveMin < kBlockSizeAlign * 2)
#error Stop_Compiling_Bad_Reserve
#endif
static UInt32 GetBlockSize(CMatchFinder *p, UInt32 historySize)
{
UInt32 blockSize = (p->keepSizeBefore + p->keepSizeAfter);
/*
if (historySize > kMaxHistorySize)
return 0;
*/
// printf("\nhistorySize == 0x%x\n", historySize);
if (p->keepSizeBefore < historySize || blockSize < p->keepSizeBefore) // if 32-bit overflow
return 0;
{
const UInt32 kBlockSizeMax = (UInt32)0 - (UInt32)kBlockSizeAlign;
const UInt32 rem = kBlockSizeMax - blockSize;
const UInt32 reserve = (blockSize >> (blockSize < ((UInt32)1 << 30) ? 1 : 2))
+ (1 << 12) + kBlockMoveAlign + kBlockSizeAlign; // do not overflow 32-bit here
if (blockSize >= kBlockSizeMax
|| rem < kBlockSizeReserveMin) // we reject settings that will be slow
return 0;
if (reserve >= rem)
blockSize = kBlockSizeMax;
else
{
blockSize += reserve;
blockSize &= ~(UInt32)(kBlockSizeAlign - 1);
}
}
// printf("\n LzFind_blockSize = %x\n", blockSize);
// printf("\n LzFind_blockSize = %d\n", blockSize >> 20);
return blockSize;
return (CLzRef *)alloc->Alloc(alloc, sizeInBytes);
}
// input is historySize
static UInt32 MatchFinder_GetHashMask2(CMatchFinder *p, UInt32 hs)
{
if (p->numHashBytes == 2)
return (1 << 16) - 1;
if (hs != 0)
hs--;
hs |= (hs >> 1);
hs |= (hs >> 2);
hs |= (hs >> 4);
hs |= (hs >> 8);
// we propagated 16 bits in (hs). Low 16 bits must be set later
if (hs >= (1 << 24))
{
if (p->numHashBytes == 3)
hs = (1 << 24) - 1;
/* if (bigHash) mode, GetHeads4b() in LzFindMt.c needs (hs >= ((1 << 24) - 1))) */
}
// (hash_size >= (1 << 16)) : Required for (numHashBytes > 2)
hs |= (1 << 16) - 1; /* don't change it! */
// bt5: we adjust the size with recommended minimum size
if (p->numHashBytes >= 5)
hs |= (256 << kLzHash_CrcShift_2) - 1;
return hs;
}
// input is historySize
static UInt32 MatchFinder_GetHashMask(CMatchFinder *p, UInt32 hs)
{
if (p->numHashBytes == 2)
return (1 << 16) - 1;
if (hs != 0)
hs--;
hs |= (hs >> 1);
hs |= (hs >> 2);
hs |= (hs >> 4);
hs |= (hs >> 8);
// we propagated 16 bits in (hs). Low 16 bits must be set later
hs >>= 1;
if (hs >= (1 << 24))
{
if (p->numHashBytes == 3)
hs = (1 << 24) - 1;
else
hs >>= 1;
/* if (bigHash) mode, GetHeads4b() in LzFindMt.c needs (hs >= ((1 << 24) - 1))) */
}
// (hash_size >= (1 << 16)) : Required for (numHashBytes > 2)
hs |= (1 << 16) - 1; /* don't change it! */
// bt5: we adjust the size with recommended minimum size
if (p->numHashBytes >= 5)
hs |= (256 << kLzHash_CrcShift_2) - 1;
return hs;
}
int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter,
ISzAllocPtr alloc)
ISzAlloc *alloc)
{
/* we need one additional byte in (p->keepSizeBefore),
since we use MoveBlock() after (p->pos++) and before dictionary using */
// keepAddBufferBefore = (UInt32)0xFFFFFFFF - (1 << 22); // for debug
p->keepSizeBefore = historySize + keepAddBufferBefore + 1;
keepAddBufferAfter += matchMaxLen;
/* we need (p->keepSizeAfter >= p->numHashBytes) */
if (keepAddBufferAfter < p->numHashBytes)
keepAddBufferAfter = p->numHashBytes;
// keepAddBufferAfter -= 2; // for debug
p->keepSizeAfter = keepAddBufferAfter;
if (p->directInput)
p->blockSize = 0;
if (p->directInput || LzInWindow_Create2(p, GetBlockSize(p, historySize), alloc))
UInt32 sizeReserv;
if (historySize > kMaxHistorySize)
{
size_t hashSizeSum;
MatchFinder_Free(p, alloc);
return 0;
}
sizeReserv = historySize >> 1;
if (historySize > ((UInt32)2 << 30))
sizeReserv = historySize >> 2;
sizeReserv += (keepAddBufferBefore + matchMaxLen + keepAddBufferAfter) / 2 + (1 << 19);
p->keepSizeBefore = historySize + keepAddBufferBefore + 1;
p->keepSizeAfter = matchMaxLen + keepAddBufferAfter;
/* we need one additional byte, since we use MoveBlock after pos++ and before dictionary using */
if (LzInWindow_Create(p, sizeReserv, alloc))
{
UInt32 newCyclicBufferSize = (historySize /* >> p->skipModeBits */) + 1;
UInt32 hs;
p->matchMaxLen = matchMaxLen;
{
UInt32 hs;
UInt32 hsCur;
if (p->numHashOutBits != 0)
{
unsigned numBits = p->numHashOutBits;
const unsigned nbMax =
(p->numHashBytes == 2 ? 16 :
(p->numHashBytes == 3 ? 24 : 32));
if (numBits > nbMax)
numBits = nbMax;
if (numBits >= 32)
hs = (UInt32)0 - 1;
else
hs = ((UInt32)1 << numBits) - 1;
// (hash_size >= (1 << 16)) : Required for (numHashBytes > 2)
hs |= (1 << 16) - 1; /* don't change it! */
if (p->numHashBytes >= 5)
hs |= (256 << kLzHash_CrcShift_2) - 1;
{
const UInt32 hs2 = MatchFinder_GetHashMask2(p, historySize);
if (hs > hs2)
hs = hs2;
}
hsCur = hs;
if (p->expectedDataSize < historySize)
{
const UInt32 hs2 = MatchFinder_GetHashMask2(p, (UInt32)p->expectedDataSize);
if (hsCur > hs2)
hsCur = hs2;
}
}
p->fixedHashSize = 0;
if (p->numHashBytes == 2)
hs = (1 << 16) - 1;
else
{
hs = MatchFinder_GetHashMask(p, historySize);
hsCur = hs;
if (p->expectedDataSize < historySize)
hs = historySize - 1;
hs |= (hs >> 1);
hs |= (hs >> 2);
hs |= (hs >> 4);
hs |= (hs >> 8);
hs >>= 1;
/* hs >>= p->skipModeBits; */
hs |= 0xFFFF; /* don't change it! It's required for Deflate */
if (hs > (1 << 24))
{
hsCur = MatchFinder_GetHashMask(p, (UInt32)p->expectedDataSize);
if (hsCur > hs) // is it possible?
hsCur = hs;
if (p->numHashBytes == 3)
hs = (1 << 24) - 1;
else
hs >>= 1;
}
}
p->hashMask = hsCur;
hashSizeSum = hs;
hashSizeSum++;
if (hashSizeSum < hs)
return 0;
{
UInt32 fixedHashSize = 0;
if (p->numHashBytes > 2 && p->numHashBytes_Min <= 2) fixedHashSize += kHash2Size;
if (p->numHashBytes > 3 && p->numHashBytes_Min <= 3) fixedHashSize += kHash3Size;
// if (p->numHashBytes > 4) p->fixedHashSize += hs4; // kHash4Size;
hashSizeSum += fixedHashSize;
p->fixedHashSize = fixedHashSize;
}
p->hashMask = hs;
hs++;
if (p->numHashBytes > 2) p->fixedHashSize += kHash2Size;
if (p->numHashBytes > 3) p->fixedHashSize += kHash3Size;
if (p->numHashBytes > 4) p->fixedHashSize += kHash4Size;
hs += p->fixedHashSize;
}
p->matchMaxLen = matchMaxLen;
{
size_t newSize;
size_t numSons;
const UInt32 newCyclicBufferSize = historySize + 1; // do not change it
UInt32 prevSize = p->hashSizeSum + p->numSons;
UInt32 newSize;
p->historySize = historySize;
p->cyclicBufferSize = newCyclicBufferSize; // it must be = (historySize + 1)
numSons = newCyclicBufferSize;
if (p->btMode)
numSons <<= 1;
newSize = hashSizeSum + numSons;
if (numSons < newCyclicBufferSize || newSize < numSons)
return 0;
// aligned size is not required here, but it can be better for some loops
#define NUM_REFS_ALIGN_MASK 0xF
newSize = (newSize + NUM_REFS_ALIGN_MASK) & ~(size_t)NUM_REFS_ALIGN_MASK;
// 22.02: we don't reallocate buffer, if old size is enough
if (p->hash && p->numRefs >= newSize)
p->hashSizeSum = hs;
p->cyclicBufferSize = newCyclicBufferSize;
p->numSons = (p->btMode ? newCyclicBufferSize * 2 : newCyclicBufferSize);
newSize = p->hashSizeSum + p->numSons;
if (p->hash != 0 && prevSize == newSize)
return 1;
MatchFinder_FreeThisClassMemory(p, alloc);
p->numRefs = newSize;
p->hash = AllocRefs(newSize, alloc);
if (p->hash)
if (p->hash != 0)
{
p->son = p->hash + hashSizeSum;
p->son = p->hash + p->hashSizeSum;
return 1;
}
}
}
MatchFinder_Free(p, alloc);
return 0;
}
static void MatchFinder_SetLimits(CMatchFinder *p)
{
UInt32 k;
UInt32 n = kMaxValForNormalize - p->pos;
if (n == 0)
n = (UInt32)(Int32)-1; // we allow (pos == 0) at start even with (kMaxValForNormalize == 0)
k = p->cyclicBufferSize - p->cyclicBufferPos;
if (k < n)
n = k;
k = GET_AVAIL_BYTES(p);
UInt32 limit = kMaxValForNormalize - p->pos;
UInt32 limit2 = p->cyclicBufferSize - p->cyclicBufferPos;
if (limit2 < limit)
limit = limit2;
limit2 = p->streamPos - p->pos;
if (limit2 <= p->keepSizeAfter)
{
const UInt32 ksa = p->keepSizeAfter;
UInt32 mm = p->matchMaxLen;
if (k > ksa)
k -= ksa; // we must limit exactly to keepSizeAfter for ReadBlock
else if (k >= mm)
{
// the limitation for (p->lenLimit) update
k -= mm; // optimization : to reduce the number of checks
k++;
// k = 1; // non-optimized version : for debug
}
else
{
mm = k;
if (k != 0)
k = 1;
}
p->lenLimit = mm;
if (limit2 > 0)
limit2 = 1;
}
if (k < n)
n = k;
p->posLimit = p->pos + n;
}
void MatchFinder_Init_LowHash(CMatchFinder *p)
{
size_t i;
CLzRef *items = p->hash;
const size_t numItems = p->fixedHashSize;
for (i = 0; i < numItems; i++)
items[i] = kEmptyHashValue;
}
void MatchFinder_Init_HighHash(CMatchFinder *p)
{
size_t i;
CLzRef *items = p->hash + p->fixedHashSize;
const size_t numItems = (size_t)p->hashMask + 1;
for (i = 0; i < numItems; i++)
items[i] = kEmptyHashValue;
}
void MatchFinder_Init_4(CMatchFinder *p)
{
if (!p->directInput)
p->buffer = p->bufBase;
else
limit2 -= p->keepSizeAfter;
if (limit2 < limit)
limit = limit2;
{
/* kEmptyHashValue = 0 (Zero) is used in hash tables as NO-VALUE marker.
the code in CMatchFinderMt expects (pos = 1) */
p->pos =
p->streamPos =
1; // it's smallest optimal value. do not change it
// 0; // for debug
UInt32 lenLimit = p->streamPos - p->pos;
if (lenLimit > p->matchMaxLen)
lenLimit = p->matchMaxLen;
p->lenLimit = lenLimit;
}
p->result = SZ_OK;
p->streamEndWasReached = 0;
p->posLimit = p->pos + limit;
}
// (CYC_TO_POS_OFFSET == 0) is expected by some optimized code
#define CYC_TO_POS_OFFSET 0
// #define CYC_TO_POS_OFFSET 1 // for debug
void MatchFinder_Init(CMatchFinder *p)
{
MatchFinder_Init_HighHash(p);
MatchFinder_Init_LowHash(p);
MatchFinder_Init_4(p);
// if (readData)
UInt32 i;
for (i = 0; i < p->hashSizeSum; i++)
p->hash[i] = kEmptyHashValue;
p->cyclicBufferPos = 0;
p->buffer = p->bufferBase;
p->pos = p->streamPos = p->cyclicBufferSize;
p->result = SZ_OK;
p->streamEndWasReached = 0;
MatchFinder_ReadBlock(p);
/* if we init (cyclicBufferPos = pos), then we can use one variable
instead of both (cyclicBufferPos) and (pos) : only before (cyclicBufferPos) wrapping */
p->cyclicBufferPos = (p->pos - CYC_TO_POS_OFFSET); // init with relation to (pos)
// p->cyclicBufferPos = 0; // smallest value
// p->son[0] = p->son[1] = 0; // unused: we can init skipped record for speculated accesses.
MatchFinder_SetLimits(p);
}
#ifdef MY_CPU_X86_OR_AMD64
#if defined(__clang__) && (__clang_major__ >= 4) \
|| defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40701)
// || defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1900)
#define USE_LZFIND_SATUR_SUB_128
#define USE_LZFIND_SATUR_SUB_256
#define LZFIND_ATTRIB_SSE41 __attribute__((__target__("sse4.1")))
#define LZFIND_ATTRIB_AVX2 __attribute__((__target__("avx2")))
#elif defined(_MSC_VER)
#if (_MSC_VER >= 1600)
#define USE_LZFIND_SATUR_SUB_128
#endif
#if (_MSC_VER >= 1900)
#define USE_LZFIND_SATUR_SUB_256
#endif
#endif
// #elif defined(MY_CPU_ARM_OR_ARM64)
#elif defined(MY_CPU_ARM64)
#if defined(__clang__) && (__clang_major__ >= 8) \
|| defined(__GNUC__) && (__GNUC__ >= 8)
#define USE_LZFIND_SATUR_SUB_128
#ifdef MY_CPU_ARM64
// #define LZFIND_ATTRIB_SSE41 __attribute__((__target__("")))
#else
// #define LZFIND_ATTRIB_SSE41 __attribute__((__target__("fpu=crypto-neon-fp-armv8")))
#endif
#elif defined(_MSC_VER)
#if (_MSC_VER >= 1910)
#define USE_LZFIND_SATUR_SUB_128
#endif
#endif
#if defined(_MSC_VER) && defined(MY_CPU_ARM64)
#include <arm64_neon.h>
#else
#include <arm_neon.h>
#endif
#endif
#ifdef USE_LZFIND_SATUR_SUB_128
// #define Z7_SHOW_HW_STATUS
#ifdef Z7_SHOW_HW_STATUS
#include <stdio.h>
#define PRF(x) x
PRF(;)
#else
#define PRF(x)
#endif
#ifdef MY_CPU_ARM_OR_ARM64
#ifdef MY_CPU_ARM64
// #define FORCE_LZFIND_SATUR_SUB_128
#endif
typedef uint32x4_t LzFind_v128;
#define SASUB_128_V(v, s) \
vsubq_u32(vmaxq_u32(v, s), s)
#else // MY_CPU_ARM_OR_ARM64
#include <smmintrin.h> // sse4.1
typedef __m128i LzFind_v128;
// SSE 4.1
#define SASUB_128_V(v, s) \
_mm_sub_epi32(_mm_max_epu32(v, s), s)
#endif // MY_CPU_ARM_OR_ARM64
#define SASUB_128(i) \
*( LzFind_v128 *)( void *)(items + (i) * 4) = SASUB_128_V( \
*(const LzFind_v128 *)(const void *)(items + (i) * 4), sub2);
Z7_NO_INLINE
static
#ifdef LZFIND_ATTRIB_SSE41
LZFIND_ATTRIB_SSE41
#endif
void
Z7_FASTCALL
LzFind_SaturSub_128(UInt32 subValue, CLzRef *items, const CLzRef *lim)
static UInt32 MatchFinder_GetSubValue(CMatchFinder *p)
{
const LzFind_v128 sub2 =
#ifdef MY_CPU_ARM_OR_ARM64
vdupq_n_u32(subValue);
#else
_mm_set_epi32((Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue);
#endif
Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
do
{
SASUB_128(0) SASUB_128(1) items += 2 * 4;
SASUB_128(0) SASUB_128(1) items += 2 * 4;
}
while (items != lim);
return (p->pos - p->historySize - 1) & kNormalizeMask;
}
#ifdef USE_LZFIND_SATUR_SUB_256
#include <immintrin.h> // avx
/*
clang :immintrin.h uses
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__AVX2__)
#include <avx2intrin.h>
#endif
so we need <avxintrin.h> for clang-cl */
#if defined(__clang__)
#include <avxintrin.h>
#include <avx2intrin.h>
#endif
// AVX2:
#define SASUB_256(i) \
*( __m256i *)( void *)(items + (i) * 8) = \
_mm256_sub_epi32(_mm256_max_epu32( \
*(const __m256i *)(const void *)(items + (i) * 8), sub2), sub2);
Z7_NO_INLINE
static
#ifdef LZFIND_ATTRIB_AVX2
LZFIND_ATTRIB_AVX2
#endif
void
Z7_FASTCALL
LzFind_SaturSub_256(UInt32 subValue, CLzRef *items, const CLzRef *lim)
void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, UInt32 numItems)
{
const __m256i sub2 = _mm256_set_epi32(
(Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue,
(Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue);
Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
do
UInt32 i;
for (i = 0; i < numItems; i++)
{
SASUB_256(0) SASUB_256(1) items += 2 * 8;
SASUB_256(0) SASUB_256(1) items += 2 * 8;
}
while (items != lim);
}
#endif // USE_LZFIND_SATUR_SUB_256
#ifndef FORCE_LZFIND_SATUR_SUB_128
typedef void (Z7_FASTCALL *LZFIND_SATUR_SUB_CODE_FUNC)(
UInt32 subValue, CLzRef *items, const CLzRef *lim);
static LZFIND_SATUR_SUB_CODE_FUNC g_LzFind_SaturSub;
#endif // FORCE_LZFIND_SATUR_SUB_128
#endif // USE_LZFIND_SATUR_SUB_128
// kEmptyHashValue must be zero
// #define SASUB_32(i) { UInt32 v = items[i]; UInt32 m = v - subValue; if (v < subValue) m = kEmptyHashValue; items[i] = m; }
#define SASUB_32(i) { UInt32 v = items[i]; if (v < subValue) v = subValue; items[i] = v - subValue; }
#ifdef FORCE_LZFIND_SATUR_SUB_128
#define DEFAULT_SaturSub LzFind_SaturSub_128
#else
#define DEFAULT_SaturSub LzFind_SaturSub_32
Z7_NO_INLINE
static
void
Z7_FASTCALL
LzFind_SaturSub_32(UInt32 subValue, CLzRef *items, const CLzRef *lim)
{
Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
do
{
SASUB_32(0) SASUB_32(1) items += 2;
SASUB_32(0) SASUB_32(1) items += 2;
SASUB_32(0) SASUB_32(1) items += 2;
SASUB_32(0) SASUB_32(1) items += 2;
}
while (items != lim);
}
#endif
Z7_NO_INLINE
void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems)
{
#define LZFIND_NORM_ALIGN_BLOCK_SIZE (1 << 7)
Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
for (; numItems != 0 && ((unsigned)(ptrdiff_t)items & (LZFIND_NORM_ALIGN_BLOCK_SIZE - 1)) != 0; numItems--)
{
SASUB_32(0)
items++;
}
{
const size_t k_Align_Mask = (LZFIND_NORM_ALIGN_BLOCK_SIZE / 4 - 1);
CLzRef *lim = items + (numItems & ~(size_t)k_Align_Mask);
numItems &= k_Align_Mask;
if (items != lim)
{
#if defined(USE_LZFIND_SATUR_SUB_128) && !defined(FORCE_LZFIND_SATUR_SUB_128)
if (g_LzFind_SaturSub)
g_LzFind_SaturSub(subValue, items, lim);
else
#endif
DEFAULT_SaturSub(subValue, items, lim);
}
items = lim;
}
Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
for (; numItems != 0; numItems--)
{
SASUB_32(0)
items++;
UInt32 value = items[i];
if (value <= subValue)
value = kEmptyHashValue;
else
value -= subValue;
items[i] = value;
}
}
static void MatchFinder_Normalize(CMatchFinder *p)
{
UInt32 subValue = MatchFinder_GetSubValue(p);
MatchFinder_Normalize3(subValue, p->hash, p->hashSizeSum + p->numSons);
MatchFinder_ReduceOffsets(p, subValue);
}
// call MatchFinder_CheckLimits() only after (p->pos++) update
Z7_NO_INLINE
static void MatchFinder_CheckLimits(CMatchFinder *p)
{
if (// !p->streamEndWasReached && p->result == SZ_OK &&
p->keepSizeAfter == GET_AVAIL_BYTES(p))
{
// we try to read only in exact state (p->keepSizeAfter == GET_AVAIL_BYTES(p))
if (MatchFinder_NeedMove(p))
MatchFinder_MoveBlock(p);
MatchFinder_ReadBlock(p);
}
if (p->pos == kMaxValForNormalize)
if (GET_AVAIL_BYTES(p) >= p->numHashBytes) // optional optimization for last bytes of data.
/*
if we disable normalization for last bytes of data, and
if (data_size == 4 GiB), we don't call wastfull normalization,
but (pos) will be wrapped over Zero (0) in that case.
And we cannot resume later to normal operation
*/
{
// MatchFinder_Normalize(p);
/* after normalization we need (p->pos >= p->historySize + 1); */
/* we can reduce subValue to aligned value, if want to keep alignment
of (p->pos) and (p->buffer) for speculated accesses. */
const UInt32 subValue = (p->pos - p->historySize - 1) /* & ~(UInt32)(kNormalizeAlign - 1) */;
// const UInt32 subValue = (1 << 15); // for debug
// printf("\nMatchFinder_Normalize() subValue == 0x%x\n", subValue);
MatchFinder_REDUCE_OFFSETS(p, subValue)
MatchFinder_Normalize3(subValue, p->hash, (size_t)p->hashMask + 1 + p->fixedHashSize);
{
size_t numSonRefs = p->cyclicBufferSize;
if (p->btMode)
numSonRefs <<= 1;
MatchFinder_Normalize3(subValue, p->son, numSonRefs);
}
}
MatchFinder_Normalize(p);
if (!p->streamEndWasReached && p->keepSizeAfter == p->streamPos - p->pos)
MatchFinder_CheckAndMoveAndRead(p);
if (p->cyclicBufferPos == p->cyclicBufferSize)
p->cyclicBufferPos = 0;
MatchFinder_SetLimits(p);
}
/*
(lenLimit > maxLen)
*/
Z7_FORCE_INLINE
static UInt32 * Hc_GetMatchesSpec(size_t lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,
UInt32 *d, unsigned maxLen)
static UInt32 * Hc_GetMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,
UInt32 *distances, UInt32 maxLen)
{
/*
son[_cyclicBufferPos] = curMatch;
for (;;)
{
UInt32 delta = pos - curMatch;
if (cutValue-- == 0 || delta >= _cyclicBufferSize)
return d;
return distances;
{
const Byte *pb = cur - delta;
curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)];
@@ -892,91 +330,35 @@ static UInt32 * Hc_GetMatchesSpec(size_t lenLimit, UInt32 curMatch, UInt32 pos,
break;
if (maxLen < len)
{
maxLen = len;
*d++ = len;
*d++ = delta - 1;
*distances++ = maxLen = len;
*distances++ = delta - 1;
if (len == lenLimit)
return d;
return distances;
}
}
}
}
*/
const Byte *lim = cur + lenLimit;
son[_cyclicBufferPos] = curMatch;
do
{
UInt32 delta;
if (curMatch == 0)
break;
// if (curMatch2 >= curMatch) return NULL;
delta = pos - curMatch;
if (delta >= _cyclicBufferSize)
break;
{
ptrdiff_t diff;
curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)];
diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
if (cur[maxLen] == cur[(ptrdiff_t)maxLen + diff])
{
const Byte *c = cur;
while (*c == c[diff])
{
if (++c == lim)
{
d[0] = (UInt32)(lim - cur);
d[1] = delta - 1;
return d + 2;
}
}
{
const unsigned len = (unsigned)(c - cur);
if (maxLen < len)
{
maxLen = len;
d[0] = (UInt32)len;
d[1] = delta - 1;
d += 2;
}
}
}
}
}
while (--cutValue);
return d;
}
Z7_FORCE_INLINE
UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,
UInt32 *d, UInt32 maxLen)
UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,
UInt32 *distances, UInt32 maxLen)
{
CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
unsigned len0 = 0, len1 = 0;
UInt32 cmCheck;
// if (curMatch >= pos) { *ptr0 = *ptr1 = kEmptyHashValue; return NULL; }
cmCheck = (UInt32)(pos - _cyclicBufferSize);
if ((UInt32)pos <= _cyclicBufferSize)
cmCheck = 0;
if (cmCheck < curMatch)
do
CLzRef *ptr0 = son + (_cyclicBufferPos << 1) + 1;
CLzRef *ptr1 = son + (_cyclicBufferPos << 1);
UInt32 len0 = 0, len1 = 0;
for (;;)
{
const UInt32 delta = pos - curMatch;
UInt32 delta = pos - curMatch;
if (cutValue-- == 0 || delta >= _cyclicBufferSize)
{
CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1);
*ptr0 = *ptr1 = kEmptyHashValue;
return distances;
}
{
CLzRef *pair = son + ((_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1);
const Byte *pb = cur - delta;
unsigned len = (len0 < len1 ? len0 : len1);
const UInt32 pair0 = pair[0];
UInt32 len = (len0 < len1 ? len0 : len1);
if (pb[len] == cur[len])
{
if (++len != lenLimit && pb[len] == cur[len])
@@ -985,65 +367,52 @@ UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byt
break;
if (maxLen < len)
{
maxLen = (UInt32)len;
*d++ = (UInt32)len;
*d++ = delta - 1;
*distances++ = maxLen = len;
*distances++ = delta - 1;
if (len == lenLimit)
{
*ptr1 = pair0;
*ptr1 = pair[0];
*ptr0 = pair[1];
return d;
return distances;
}
}
}
if (pb[len] < cur[len])
{
*ptr1 = curMatch;
// const UInt32 curMatch2 = pair[1];
// if (curMatch2 >= curMatch) { *ptr0 = *ptr1 = kEmptyHashValue; return NULL; }
// curMatch = curMatch2;
curMatch = pair[1];
ptr1 = pair + 1;
curMatch = *ptr1;
len1 = len;
}
else
{
*ptr0 = curMatch;
curMatch = pair[0];
ptr0 = pair;
curMatch = *ptr0;
len0 = len;
}
}
}
while(--cutValue && cmCheck < curMatch);
*ptr0 = *ptr1 = kEmptyHashValue;
return d;
}
static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue)
UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue)
{
CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
unsigned len0 = 0, len1 = 0;
UInt32 cmCheck;
cmCheck = (UInt32)(pos - _cyclicBufferSize);
if ((UInt32)pos <= _cyclicBufferSize)
cmCheck = 0;
if (// curMatch >= pos || // failure
cmCheck < curMatch)
do
CLzRef *ptr0 = son + (_cyclicBufferPos << 1) + 1;
CLzRef *ptr1 = son + (_cyclicBufferPos << 1);
UInt32 len0 = 0, len1 = 0;
for (;;)
{
const UInt32 delta = pos - curMatch;
UInt32 delta = pos - curMatch;
if (cutValue-- == 0 || delta >= _cyclicBufferSize)
{
CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1);
*ptr0 = *ptr1 = kEmptyHashValue;
return;
}
{
CLzRef *pair = son + ((_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1);
const Byte *pb = cur - delta;
unsigned len = (len0 < len1 ? len0 : len1);
UInt32 len = (len0 < len1 ? len0 : len1);
if (pb[len] == cur[len])
{
while (++len != lenLimit)
@@ -1061,594 +430,308 @@ static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const
if (pb[len] < cur[len])
{
*ptr1 = curMatch;
curMatch = pair[1];
ptr1 = pair + 1;
curMatch = *ptr1;
len1 = len;
}
else
{
*ptr0 = curMatch;
curMatch = pair[0];
ptr0 = pair;
curMatch = *ptr0;
len0 = len;
}
}
}
while(--cutValue && cmCheck < curMatch);
*ptr0 = *ptr1 = kEmptyHashValue;
return;
}
#define MOVE_POS \
++p->cyclicBufferPos; \
p->buffer++; \
{ const UInt32 pos1 = p->pos + 1; p->pos = pos1; if (pos1 == p->posLimit) MatchFinder_CheckLimits(p); }
if (++p->pos == p->posLimit) MatchFinder_CheckLimits(p);
#define MOVE_POS_RET MOVE_POS return distances;
#define MOVE_POS_RET MOVE_POS return offset;
Z7_NO_INLINE
static void MatchFinder_MovePos(CMatchFinder *p)
{
/* we go here at the end of stream data, when (avail < num_hash_bytes)
We don't update sons[cyclicBufferPos << btMode].
So (sons) record will contain junk. And we cannot resume match searching
to normal operation, even if we will provide more input data in buffer.
p->sons[p->cyclicBufferPos << p->btMode] = 0; // kEmptyHashValue
if (p->btMode)
p->sons[(p->cyclicBufferPos << p->btMode) + 1] = 0; // kEmptyHashValue
*/
MOVE_POS
}
static void MatchFinder_MovePos(CMatchFinder *p) { MOVE_POS; }
#define GET_MATCHES_HEADER2(minLen, ret_op) \
unsigned lenLimit; UInt32 hv; const Byte *cur; UInt32 curMatch; \
lenLimit = (unsigned)p->lenLimit; { if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; }} \
UInt32 lenLimit; UInt32 hashValue; const Byte *cur; UInt32 curMatch; \
lenLimit = p->lenLimit; { if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; }} \
cur = p->buffer;
#define GET_MATCHES_HEADER(minLen) GET_MATCHES_HEADER2(minLen, return distances)
#define SKIP_HEADER(minLen) do { GET_MATCHES_HEADER2(minLen, continue)
#define GET_MATCHES_HEADER(minLen) GET_MATCHES_HEADER2(minLen, return 0)
#define SKIP_HEADER(minLen) GET_MATCHES_HEADER2(minLen, continue)
#define MF_PARAMS(p) lenLimit, curMatch, p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue
#define MF_PARAMS(p) p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue
#define SKIP_FOOTER SkipMatchesSpec(MF_PARAMS(p)); MOVE_POS } while (--num);
#define GET_MATCHES_FOOTER(offset, maxLen) \
offset = (UInt32)(GetMatchesSpec1(lenLimit, curMatch, MF_PARAMS(p), \
distances + offset, maxLen) - distances); MOVE_POS_RET;
#define GET_MATCHES_FOOTER_BASE(_maxLen_, func) \
distances = func(MF_PARAMS(p), \
distances, (UInt32)_maxLen_); MOVE_POS_RET
#define SKIP_FOOTER \
SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p)); MOVE_POS;
#define GET_MATCHES_FOOTER_BT(_maxLen_) \
GET_MATCHES_FOOTER_BASE(_maxLen_, GetMatchesSpec1)
#define GET_MATCHES_FOOTER_HC(_maxLen_) \
GET_MATCHES_FOOTER_BASE(_maxLen_, Hc_GetMatchesSpec)
#define UPDATE_maxLen { \
const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)d2; \
const Byte *c = cur + maxLen; \
const Byte *lim = cur + lenLimit; \
for (; c != lim; c++) if (*(c + diff) != *c) break; \
maxLen = (unsigned)(c - cur); }
static UInt32* Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
static UInt32 Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
{
UInt32 offset;
GET_MATCHES_HEADER(2)
HASH2_CALC
curMatch = p->hash[hv];
p->hash[hv] = p->pos;
GET_MATCHES_FOOTER_BT(1)
HASH2_CALC;
curMatch = p->hash[hashValue];
p->hash[hashValue] = p->pos;
offset = 0;
GET_MATCHES_FOOTER(offset, 1)
}
UInt32* Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
{
UInt32 offset;
GET_MATCHES_HEADER(3)
HASH_ZIP_CALC
curMatch = p->hash[hv];
p->hash[hv] = p->pos;
GET_MATCHES_FOOTER_BT(2)
HASH_ZIP_CALC;
curMatch = p->hash[hashValue];
p->hash[hashValue] = p->pos;
offset = 0;
GET_MATCHES_FOOTER(offset, 2)
}
#define SET_mmm \
mmm = p->cyclicBufferSize; \
if (pos < mmm) \
mmm = pos;
static UInt32* Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
static UInt32 Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
{
UInt32 mmm;
UInt32 h2, d2, pos;
unsigned maxLen;
UInt32 *hash;
UInt32 hash2Value, delta2, maxLen, offset;
GET_MATCHES_HEADER(3)
HASH3_CALC
HASH3_CALC;
hash = p->hash;
pos = p->pos;
d2 = pos - hash[h2];
curMatch = (hash + kFix3HashSize)[hv];
delta2 = p->pos - p->hash[hash2Value];
curMatch = p->hash[kFix3HashSize + hashValue];
hash[h2] = pos;
(hash + kFix3HashSize)[hv] = pos;
p->hash[hash2Value] =
p->hash[kFix3HashSize + hashValue] = p->pos;
SET_mmm
maxLen = 2;
if (d2 < mmm && *(cur - d2) == *cur)
offset = 0;
if (delta2 < p->cyclicBufferSize && *(cur - delta2) == *cur)
{
UPDATE_maxLen
distances[0] = (UInt32)maxLen;
distances[1] = d2 - 1;
distances += 2;
for (; maxLen != lenLimit; maxLen++)
if (cur[(ptrdiff_t)maxLen - delta2] != cur[maxLen])
break;
distances[0] = maxLen;
distances[1] = delta2 - 1;
offset = 2;
if (maxLen == lenLimit)
{
SkipMatchesSpec(MF_PARAMS(p));
MOVE_POS_RET
SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p));
MOVE_POS_RET;
}
}
GET_MATCHES_FOOTER_BT(maxLen)
GET_MATCHES_FOOTER(offset, maxLen)
}
static UInt32* Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
static UInt32 Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
{
UInt32 mmm;
UInt32 h2, h3, d2, d3, pos;
unsigned maxLen;
UInt32 *hash;
UInt32 hash2Value, hash3Value, delta2, delta3, maxLen, offset;
GET_MATCHES_HEADER(4)
HASH4_CALC
HASH4_CALC;
hash = p->hash;
pos = p->pos;
d2 = pos - hash [h2];
d3 = pos - (hash + kFix3HashSize)[h3];
curMatch = (hash + kFix4HashSize)[hv];
hash [h2] = pos;
(hash + kFix3HashSize)[h3] = pos;
(hash + kFix4HashSize)[hv] = pos;
SET_mmm
maxLen = 3;
delta2 = p->pos - p->hash[ hash2Value];
delta3 = p->pos - p->hash[kFix3HashSize + hash3Value];
curMatch = p->hash[kFix4HashSize + hashValue];
for (;;)
p->hash[ hash2Value] =
p->hash[kFix3HashSize + hash3Value] =
p->hash[kFix4HashSize + hashValue] = p->pos;
maxLen = 1;
offset = 0;
if (delta2 < p->cyclicBufferSize && *(cur - delta2) == *cur)
{
if (d2 < mmm && *(cur - d2) == *cur)
{
distances[0] = 2;
distances[1] = d2 - 1;
distances += 2;
if (*(cur - d2 + 2) == cur[2])
{
// distances[-2] = 3;
}
else if (d3 < mmm && *(cur - d3) == *cur)
{
d2 = d3;
distances[1] = d3 - 1;
distances += 2;
}
else
distances[0] = maxLen = 2;
distances[1] = delta2 - 1;
offset = 2;
}
if (delta2 != delta3 && delta3 < p->cyclicBufferSize && *(cur - delta3) == *cur)
{
maxLen = 3;
distances[offset + 1] = delta3 - 1;
offset += 2;
delta2 = delta3;
}
if (offset != 0)
{
for (; maxLen != lenLimit; maxLen++)
if (cur[(ptrdiff_t)maxLen - delta2] != cur[maxLen])
break;
}
else if (d3 < mmm && *(cur - d3) == *cur)
{
d2 = d3;
distances[1] = d3 - 1;
distances += 2;
}
else
break;
UPDATE_maxLen
distances[-2] = (UInt32)maxLen;
distances[offset - 2] = maxLen;
if (maxLen == lenLimit)
{
SkipMatchesSpec(MF_PARAMS(p));
MOVE_POS_RET
SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p));
MOVE_POS_RET;
}
break;
}
GET_MATCHES_FOOTER_BT(maxLen)
if (maxLen < 3)
maxLen = 3;
GET_MATCHES_FOOTER(offset, maxLen)
}
static UInt32* Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
static UInt32 Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
{
UInt32 mmm;
UInt32 h2, h3, d2, d3, maxLen, pos;
UInt32 *hash;
GET_MATCHES_HEADER(5)
HASH5_CALC
hash = p->hash;
pos = p->pos;
d2 = pos - hash [h2];
d3 = pos - (hash + kFix3HashSize)[h3];
// d4 = pos - (hash + kFix4HashSize)[h4];
curMatch = (hash + kFix5HashSize)[hv];
hash [h2] = pos;
(hash + kFix3HashSize)[h3] = pos;
// (hash + kFix4HashSize)[h4] = pos;
(hash + kFix5HashSize)[hv] = pos;
SET_mmm
maxLen = 4;
for (;;)
{
if (d2 < mmm && *(cur - d2) == *cur)
{
distances[0] = 2;
distances[1] = d2 - 1;
distances += 2;
if (*(cur - d2 + 2) == cur[2])
{
}
else if (d3 < mmm && *(cur - d3) == *cur)
{
distances[1] = d3 - 1;
distances += 2;
d2 = d3;
}
else
break;
}
else if (d3 < mmm && *(cur - d3) == *cur)
{
distances[1] = d3 - 1;
distances += 2;
d2 = d3;
}
else
break;
distances[-2] = 3;
if (*(cur - d2 + 3) != cur[3])
break;
UPDATE_maxLen
distances[-2] = (UInt32)maxLen;
if (maxLen == lenLimit)
{
SkipMatchesSpec(MF_PARAMS(p));
MOVE_POS_RET
}
break;
}
GET_MATCHES_FOOTER_BT(maxLen)
}
static UInt32* Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
{
UInt32 mmm;
UInt32 h2, h3, d2, d3, pos;
unsigned maxLen;
UInt32 *hash;
UInt32 hash2Value, hash3Value, delta2, delta3, maxLen, offset;
GET_MATCHES_HEADER(4)
HASH4_CALC
HASH4_CALC;
hash = p->hash;
pos = p->pos;
d2 = pos - hash [h2];
d3 = pos - (hash + kFix3HashSize)[h3];
curMatch = (hash + kFix4HashSize)[hv];
delta2 = p->pos - p->hash[ hash2Value];
delta3 = p->pos - p->hash[kFix3HashSize + hash3Value];
curMatch = p->hash[kFix4HashSize + hashValue];
hash [h2] = pos;
(hash + kFix3HashSize)[h3] = pos;
(hash + kFix4HashSize)[hv] = pos;
p->hash[ hash2Value] =
p->hash[kFix3HashSize + hash3Value] =
p->hash[kFix4HashSize + hashValue] = p->pos;
SET_mmm
maxLen = 3;
for (;;)
maxLen = 1;
offset = 0;
if (delta2 < p->cyclicBufferSize && *(cur - delta2) == *cur)
{
if (d2 < mmm && *(cur - d2) == *cur)
{
distances[0] = 2;
distances[1] = d2 - 1;
distances += 2;
if (*(cur - d2 + 2) == cur[2])
{
// distances[-2] = 3;
}
else if (d3 < mmm && *(cur - d3) == *cur)
{
d2 = d3;
distances[1] = d3 - 1;
distances += 2;
}
else
distances[0] = maxLen = 2;
distances[1] = delta2 - 1;
offset = 2;
}
if (delta2 != delta3 && delta3 < p->cyclicBufferSize && *(cur - delta3) == *cur)
{
maxLen = 3;
distances[offset + 1] = delta3 - 1;
offset += 2;
delta2 = delta3;
}
if (offset != 0)
{
for (; maxLen != lenLimit; maxLen++)
if (cur[(ptrdiff_t)maxLen - delta2] != cur[maxLen])
break;
}
else if (d3 < mmm && *(cur - d3) == *cur)
{
d2 = d3;
distances[1] = d3 - 1;
distances += 2;
}
else
break;
UPDATE_maxLen
distances[-2] = (UInt32)maxLen;
distances[offset - 2] = maxLen;
if (maxLen == lenLimit)
{
p->son[p->cyclicBufferPos] = curMatch;
MOVE_POS_RET
MOVE_POS_RET;
}
break;
}
GET_MATCHES_FOOTER_HC(maxLen)
if (maxLen < 3)
maxLen = 3;
offset = (UInt32)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p),
distances + offset, maxLen) - (distances));
MOVE_POS_RET
}
static UInt32 * Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
{
UInt32 mmm;
UInt32 h2, h3, d2, d3, maxLen, pos;
UInt32 *hash;
GET_MATCHES_HEADER(5)
HASH5_CALC
hash = p->hash;
pos = p->pos;
d2 = pos - hash [h2];
d3 = pos - (hash + kFix3HashSize)[h3];
// d4 = pos - (hash + kFix4HashSize)[h4];
curMatch = (hash + kFix5HashSize)[hv];
hash [h2] = pos;
(hash + kFix3HashSize)[h3] = pos;
// (hash + kFix4HashSize)[h4] = pos;
(hash + kFix5HashSize)[hv] = pos;
SET_mmm
maxLen = 4;
for (;;)
{
if (d2 < mmm && *(cur - d2) == *cur)
{
distances[0] = 2;
distances[1] = d2 - 1;
distances += 2;
if (*(cur - d2 + 2) == cur[2])
{
}
else if (d3 < mmm && *(cur - d3) == *cur)
{
distances[1] = d3 - 1;
distances += 2;
d2 = d3;
}
else
break;
}
else if (d3 < mmm && *(cur - d3) == *cur)
{
distances[1] = d3 - 1;
distances += 2;
d2 = d3;
}
else
break;
distances[-2] = 3;
if (*(cur - d2 + 3) != cur[3])
break;
UPDATE_maxLen
distances[-2] = maxLen;
if (maxLen == lenLimit)
{
p->son[p->cyclicBufferPos] = curMatch;
MOVE_POS_RET
}
break;
}
GET_MATCHES_FOOTER_HC(maxLen)
}
UInt32* Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
{
UInt32 offset;
GET_MATCHES_HEADER(3)
HASH_ZIP_CALC
curMatch = p->hash[hv];
p->hash[hv] = p->pos;
GET_MATCHES_FOOTER_HC(2)
HASH_ZIP_CALC;
curMatch = p->hash[hashValue];
p->hash[hashValue] = p->pos;
offset = (UInt32)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p),
distances, 2) - (distances));
MOVE_POS_RET
}
static void Bt2_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
{
SKIP_HEADER(2)
do
{
HASH2_CALC
curMatch = p->hash[hv];
p->hash[hv] = p->pos;
SKIP_HEADER(2)
HASH2_CALC;
curMatch = p->hash[hashValue];
p->hash[hashValue] = p->pos;
SKIP_FOOTER
}
SKIP_FOOTER
while (--num != 0);
}
void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
{
SKIP_HEADER(3)
do
{
HASH_ZIP_CALC
curMatch = p->hash[hv];
p->hash[hv] = p->pos;
SKIP_HEADER(3)
HASH_ZIP_CALC;
curMatch = p->hash[hashValue];
p->hash[hashValue] = p->pos;
SKIP_FOOTER
}
SKIP_FOOTER
while (--num != 0);
}
static void Bt3_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
{
SKIP_HEADER(3)
do
{
UInt32 h2;
UInt32 *hash;
HASH3_CALC
hash = p->hash;
curMatch = (hash + kFix3HashSize)[hv];
hash[h2] =
(hash + kFix3HashSize)[hv] = p->pos;
UInt32 hash2Value;
SKIP_HEADER(3)
HASH3_CALC;
curMatch = p->hash[kFix3HashSize + hashValue];
p->hash[hash2Value] =
p->hash[kFix3HashSize + hashValue] = p->pos;
SKIP_FOOTER
}
SKIP_FOOTER
while (--num != 0);
}
static void Bt4_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
{
SKIP_HEADER(4)
do
{
UInt32 h2, h3;
UInt32 *hash;
HASH4_CALC
hash = p->hash;
curMatch = (hash + kFix4HashSize)[hv];
hash [h2] =
(hash + kFix3HashSize)[h3] =
(hash + kFix4HashSize)[hv] = p->pos;
UInt32 hash2Value, hash3Value;
SKIP_HEADER(4)
HASH4_CALC;
curMatch = p->hash[kFix4HashSize + hashValue];
p->hash[ hash2Value] =
p->hash[kFix3HashSize + hash3Value] = p->pos;
p->hash[kFix4HashSize + hashValue] = p->pos;
SKIP_FOOTER
}
SKIP_FOOTER
while (--num != 0);
}
static void Bt5_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
{
SKIP_HEADER(5)
{
UInt32 h2, h3;
UInt32 *hash;
HASH5_CALC
hash = p->hash;
curMatch = (hash + kFix5HashSize)[hv];
hash [h2] =
(hash + kFix3HashSize)[h3] =
// (hash + kFix4HashSize)[h4] =
(hash + kFix5HashSize)[hv] = p->pos;
}
SKIP_FOOTER
}
#define HC_SKIP_HEADER(minLen) \
do { if (p->lenLimit < minLen) { MatchFinder_MovePos(p); num--; continue; } { \
const Byte *cur; \
UInt32 *hash; \
UInt32 *son; \
UInt32 pos = p->pos; \
UInt32 num2 = num; \
/* (p->pos == p->posLimit) is not allowed here !!! */ \
{ const UInt32 rem = p->posLimit - pos; if (num2 > rem) num2 = rem; } \
num -= num2; \
{ const UInt32 cycPos = p->cyclicBufferPos; \
son = p->son + cycPos; \
p->cyclicBufferPos = cycPos + num2; } \
cur = p->buffer; \
hash = p->hash; \
do { \
UInt32 curMatch; \
UInt32 hv;
#define HC_SKIP_FOOTER \
cur++; pos++; *son++ = curMatch; \
} while (--num2); \
p->buffer = cur; \
p->pos = pos; \
if (pos == p->posLimit) MatchFinder_CheckLimits(p); \
}} while(num); \
static void Hc4_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
{
HC_SKIP_HEADER(4)
UInt32 h2, h3;
HASH4_CALC
curMatch = (hash + kFix4HashSize)[hv];
hash [h2] =
(hash + kFix3HashSize)[h3] =
(hash + kFix4HashSize)[hv] = pos;
HC_SKIP_FOOTER
do
{
UInt32 hash2Value, hash3Value;
SKIP_HEADER(4)
HASH4_CALC;
curMatch = p->hash[kFix4HashSize + hashValue];
p->hash[ hash2Value] =
p->hash[kFix3HashSize + hash3Value] =
p->hash[kFix4HashSize + hashValue] = p->pos;
p->son[p->cyclicBufferPos] = curMatch;
MOVE_POS
}
while (--num != 0);
}
static void Hc5_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
{
HC_SKIP_HEADER(5)
UInt32 h2, h3;
HASH5_CALC
curMatch = (hash + kFix5HashSize)[hv];
hash [h2] =
(hash + kFix3HashSize)[h3] =
// (hash + kFix4HashSize)[h4] =
(hash + kFix5HashSize)[hv] = pos;
HC_SKIP_FOOTER
}
void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
{
HC_SKIP_HEADER(3)
HASH_ZIP_CALC
curMatch = hash[hv];
hash[hv] = pos;
HC_SKIP_FOOTER
do
{
SKIP_HEADER(3)
HASH_ZIP_CALC;
curMatch = p->hash[hashValue];
p->hash[hashValue] = p->pos;
p->son[p->cyclicBufferPos] = curMatch;
MOVE_POS
}
while (--num != 0);
}
void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder2 *vTable)
void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable)
{
vTable->Init = (Mf_Init_Func)MatchFinder_Init;
vTable->GetIndexByte = (Mf_GetIndexByte_Func)MatchFinder_GetIndexByte;
vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinder_GetNumAvailableBytes;
vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinder_GetPointerToCurrentPos;
if (!p->btMode)
{
if (p->numHashBytes <= 4)
{
vTable->GetMatches = (Mf_GetMatches_Func)Hc4_MatchFinder_GetMatches;
vTable->Skip = (Mf_Skip_Func)Hc4_MatchFinder_Skip;
}
else
{
vTable->GetMatches = (Mf_GetMatches_Func)Hc5_MatchFinder_GetMatches;
vTable->Skip = (Mf_Skip_Func)Hc5_MatchFinder_Skip;
}
vTable->GetMatches = (Mf_GetMatches_Func)Hc4_MatchFinder_GetMatches;
vTable->Skip = (Mf_Skip_Func)Hc4_MatchFinder_Skip;
}
else if (p->numHashBytes == 2)
{
@@ -1660,58 +743,9 @@ void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder2 *vTable)
vTable->GetMatches = (Mf_GetMatches_Func)Bt3_MatchFinder_GetMatches;
vTable->Skip = (Mf_Skip_Func)Bt3_MatchFinder_Skip;
}
else if (p->numHashBytes == 4)
else
{
vTable->GetMatches = (Mf_GetMatches_Func)Bt4_MatchFinder_GetMatches;
vTable->Skip = (Mf_Skip_Func)Bt4_MatchFinder_Skip;
}
else
{
vTable->GetMatches = (Mf_GetMatches_Func)Bt5_MatchFinder_GetMatches;
vTable->Skip = (Mf_Skip_Func)Bt5_MatchFinder_Skip;
}
}
void LzFindPrepare(void)
{
#ifndef FORCE_LZFIND_SATUR_SUB_128
#ifdef USE_LZFIND_SATUR_SUB_128
LZFIND_SATUR_SUB_CODE_FUNC f = NULL;
#ifdef MY_CPU_ARM_OR_ARM64
{
if (CPU_IsSupported_NEON())
{
// #pragma message ("=== LzFind NEON")
PRF(printf("\n=== LzFind NEON\n"));
f = LzFind_SaturSub_128;
}
// f = 0; // for debug
}
#else // MY_CPU_ARM_OR_ARM64
if (CPU_IsSupported_SSE41())
{
// #pragma message ("=== LzFind SSE41")
PRF(printf("\n=== LzFind SSE41\n"));
f = LzFind_SaturSub_128;
#ifdef USE_LZFIND_SATUR_SUB_256
if (CPU_IsSupported_AVX2())
{
// #pragma message ("=== LzFind AVX2")
PRF(printf("\n=== LzFind AVX2\n"));
f = LzFind_SaturSub_256;
}
#endif
}
#endif // MY_CPU_ARM_OR_ARM64
g_LzFind_SaturSub = f;
#endif // USE_LZFIND_SATUR_SUB_128
#endif // FORCE_LZFIND_SATUR_SUB_128
}
#undef MOVE_POS
#undef MOVE_POS_RET
#undef PRF

126
extern/lzma/LzFind.h vendored
View File

@@ -1,121 +1,76 @@
/* LzFind.h -- Match finder for LZ algorithms
2023-03-04 : Igor Pavlov : Public domain */
2008-10-04 : Igor Pavlov : Public domain */
#ifndef ZIP7_INC_LZ_FIND_H
#define ZIP7_INC_LZ_FIND_H
#ifndef __LZFIND_H
#define __LZFIND_H
#include "7zTypes.h"
EXTERN_C_BEGIN
#include "Types.h"
typedef UInt32 CLzRef;
typedef struct
typedef struct _CMatchFinder
{
const Byte *buffer;
Byte *buffer;
UInt32 pos;
UInt32 posLimit;
UInt32 streamPos; /* wrap over Zero is allowed (streamPos < pos). Use (UInt32)(streamPos - pos) */
UInt32 streamPos;
UInt32 lenLimit;
UInt32 cyclicBufferPos;
UInt32 cyclicBufferSize; /* it must be = (historySize + 1) */
Byte streamEndWasReached;
Byte btMode;
Byte bigHash;
Byte directInput;
UInt32 matchMaxLen;
CLzRef *hash;
CLzRef *son;
UInt32 hashMask;
UInt32 cutValue;
Byte *bufBase;
ISeqInStreamPtr stream;
Byte *bufferBase;
ISeqInStream *stream;
int streamEndWasReached;
UInt32 blockSize;
UInt32 keepSizeBefore;
UInt32 keepSizeAfter;
UInt32 numHashBytes;
size_t directInputRem;
int directInput;
int btMode;
/* int skipModeBits; */
int bigHash;
UInt32 historySize;
UInt32 fixedHashSize;
Byte numHashBytes_Min;
Byte numHashOutBits;
Byte _pad2_[2];
UInt32 hashSizeSum;
UInt32 numSons;
SRes result;
UInt32 crc[256];
size_t numRefs;
UInt64 expectedDataSize;
} CMatchFinder;
#define Inline_MatchFinder_GetPointerToCurrentPos(p) ((const Byte *)(p)->buffer)
#define Inline_MatchFinder_GetPointerToCurrentPos(p) ((p)->buffer)
#define Inline_MatchFinder_GetIndexByte(p, index) ((p)->buffer[(Int32)(index)])
#define Inline_MatchFinder_GetNumAvailableBytes(p) ((UInt32)((p)->streamPos - (p)->pos))
#define Inline_MatchFinder_GetNumAvailableBytes(p) ((p)->streamPos - (p)->pos)
/*
#define Inline_MatchFinder_IsFinishedOK(p) \
((p)->streamEndWasReached \
&& (p)->streamPos == (p)->pos \
&& (!(p)->directInput || (p)->directInputRem == 0))
*/
int MatchFinder_NeedMove(CMatchFinder *p);
/* Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p); */
Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p);
void MatchFinder_MoveBlock(CMatchFinder *p);
void MatchFinder_ReadIfRequired(CMatchFinder *p);
void MatchFinder_Construct(CMatchFinder *p);
/* (directInput = 0) is default value.
It's required to provide correct (directInput) value
before calling MatchFinder_Create().
You can set (directInput) by any of the following calls:
- MatchFinder_SET_DIRECT_INPUT_BUF()
- MatchFinder_SET_STREAM()
- MatchFinder_SET_STREAM_MODE()
/* Conditions:
historySize <= 3 GB
keepAddBufferBefore + matchMaxLen + keepAddBufferAfter < 511MB
*/
#define MatchFinder_SET_DIRECT_INPUT_BUF(p, _src_, _srcLen_) { \
(p)->stream = NULL; \
(p)->directInput = 1; \
(p)->buffer = (_src_); \
(p)->directInputRem = (_srcLen_); }
/*
#define MatchFinder_SET_STREAM_MODE(p) { \
(p)->directInput = 0; }
*/
#define MatchFinder_SET_STREAM(p, _stream_) { \
(p)->stream = _stream_; \
(p)->directInput = 0; }
int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter,
ISzAllocPtr alloc);
void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc);
void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems);
/*
#define MatchFinder_INIT_POS(p, val) \
(p)->pos = (val); \
(p)->streamPos = (val);
*/
// void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue);
#define MatchFinder_REDUCE_OFFSETS(p, subValue) \
(p)->pos -= (subValue); \
(p)->streamPos -= (subValue);
ISzAlloc *alloc);
void MatchFinder_Free(CMatchFinder *p, ISzAlloc *alloc);
void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, UInt32 numItems);
void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue);
UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *buffer, CLzRef *son,
size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue,
UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue,
UInt32 *distances, UInt32 maxLen);
/*
@@ -125,35 +80,28 @@ Conditions:
*/
typedef void (*Mf_Init_Func)(void *object);
typedef Byte (*Mf_GetIndexByte_Func)(void *object, Int32 index);
typedef UInt32 (*Mf_GetNumAvailableBytes_Func)(void *object);
typedef const Byte * (*Mf_GetPointerToCurrentPos_Func)(void *object);
typedef UInt32 * (*Mf_GetMatches_Func)(void *object, UInt32 *distances);
typedef UInt32 (*Mf_GetMatches_Func)(void *object, UInt32 *distances);
typedef void (*Mf_Skip_Func)(void *object, UInt32);
typedef struct
typedef struct _IMatchFinder
{
Mf_Init_Func Init;
Mf_GetIndexByte_Func GetIndexByte;
Mf_GetNumAvailableBytes_Func GetNumAvailableBytes;
Mf_GetPointerToCurrentPos_Func GetPointerToCurrentPos;
Mf_GetMatches_Func GetMatches;
Mf_Skip_Func Skip;
} IMatchFinder2;
} IMatchFinder;
void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder2 *vTable);
void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable);
void MatchFinder_Init_LowHash(CMatchFinder *p);
void MatchFinder_Init_HighHash(CMatchFinder *p);
void MatchFinder_Init_4(CMatchFinder *p);
void MatchFinder_Init(CMatchFinder *p);
UInt32* Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);
UInt32* Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);
UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);
UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);
void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num);
void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num);
void LzFindPrepare(void);
EXTERN_C_END
#endif

1406
extern/lzma/LzFindMt.c vendored
View File

@@ -1,1406 +0,0 @@
/* LzFindMt.c -- multithreaded Match finder for LZ algorithms
2023-04-02 : Igor Pavlov : Public domain */
#include "Precomp.h"
// #include <stdio.h>
#include "CpuArch.h"
#include "LzHash.h"
#include "LzFindMt.h"
// #define LOG_ITERS
// #define LOG_THREAD
#ifdef LOG_THREAD
#include <stdio.h>
#define PRF(x) x
#else
#define PRF(x)
#endif
#ifdef LOG_ITERS
#include <stdio.h>
extern UInt64 g_NumIters_Tree;
extern UInt64 g_NumIters_Loop;
extern UInt64 g_NumIters_Bytes;
#define LOG_ITER(x) x
#else
#define LOG_ITER(x)
#endif
#define kMtHashBlockSize ((UInt32)1 << 17)
#define kMtHashNumBlocks (1 << 1)
#define GET_HASH_BLOCK_OFFSET(i) (((i) & (kMtHashNumBlocks - 1)) * kMtHashBlockSize)
#define kMtBtBlockSize ((UInt32)1 << 16)
#define kMtBtNumBlocks (1 << 4)
#define GET_BT_BLOCK_OFFSET(i) (((i) & (kMtBtNumBlocks - 1)) * (size_t)kMtBtBlockSize)
/*
HASH functions:
We use raw 8/16 bits from a[1] and a[2],
xored with crc(a[0]) and crc(a[3]).
We check a[0], a[3] only. We don't need to compare a[1] and a[2] in matches.
our crc() function provides one-to-one correspondence for low 8-bit values:
(crc[0...0xFF] & 0xFF) <-> [0...0xFF]
*/
#define MF(mt) ((mt)->MatchFinder)
#define MF_CRC (p->crc)
// #define MF(mt) (&(mt)->MatchFinder)
// #define MF_CRC (p->MatchFinder.crc)
#define MT_HASH2_CALC \
h2 = (MF_CRC[cur[0]] ^ cur[1]) & (kHash2Size - 1);
#define MT_HASH3_CALC { \
UInt32 temp = MF_CRC[cur[0]] ^ cur[1]; \
h2 = temp & (kHash2Size - 1); \
h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); }
/*
#define MT_HASH3_CALC__NO_2 { \
UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); }
#define MT_HASH4_CALC { \
UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
h2 = temp & (kHash2Size - 1); \
temp ^= ((UInt32)cur[2] << 8); \
h3 = temp & (kHash3Size - 1); \
h4 = (temp ^ (p->crc[cur[3]] << kLzHash_CrcShift_1)) & p->hash4Mask; }
// (kHash4Size - 1);
*/
Z7_NO_INLINE
static void MtSync_Construct(CMtSync *p)
{
p->affinity = 0;
p->wasCreated = False;
p->csWasInitialized = False;
p->csWasEntered = False;
Thread_CONSTRUCT(&p->thread)
Event_Construct(&p->canStart);
Event_Construct(&p->wasStopped);
Semaphore_Construct(&p->freeSemaphore);
Semaphore_Construct(&p->filledSemaphore);
}
#define DEBUG_BUFFER_LOCK // define it to debug lock state
#ifdef DEBUG_BUFFER_LOCK
#include <stdlib.h>
#define BUFFER_MUST_BE_LOCKED(p) if (!(p)->csWasEntered) exit(1);
#define BUFFER_MUST_BE_UNLOCKED(p) if ( (p)->csWasEntered) exit(1);
#else
#define BUFFER_MUST_BE_LOCKED(p)
#define BUFFER_MUST_BE_UNLOCKED(p)
#endif
#define LOCK_BUFFER(p) { \
BUFFER_MUST_BE_UNLOCKED(p); \
CriticalSection_Enter(&(p)->cs); \
(p)->csWasEntered = True; }
#define UNLOCK_BUFFER(p) { \
BUFFER_MUST_BE_LOCKED(p); \
CriticalSection_Leave(&(p)->cs); \
(p)->csWasEntered = False; }
Z7_NO_INLINE
static UInt32 MtSync_GetNextBlock(CMtSync *p)
{
UInt32 numBlocks = 0;
if (p->needStart)
{
BUFFER_MUST_BE_UNLOCKED(p)
p->numProcessedBlocks = 1;
p->needStart = False;
p->stopWriting = False;
p->exit = False;
Event_Reset(&p->wasStopped);
Event_Set(&p->canStart);
}
else
{
UNLOCK_BUFFER(p)
// we free current block
numBlocks = p->numProcessedBlocks++;
Semaphore_Release1(&p->freeSemaphore);
}
// buffer is UNLOCKED here
Semaphore_Wait(&p->filledSemaphore);
LOCK_BUFFER(p)
return numBlocks;
}
/* if Writing (Processing) thread was started, we must call MtSync_StopWriting() */
Z7_NO_INLINE
static void MtSync_StopWriting(CMtSync *p)
{
if (!Thread_WasCreated(&p->thread) || p->needStart)
return;
PRF(printf("\nMtSync_StopWriting %p\n", p));
if (p->csWasEntered)
{
/* we don't use buffer in this thread after StopWriting().
So we UNLOCK buffer.
And we restore default UNLOCKED state for stopped thread */
UNLOCK_BUFFER(p)
}
/* We send (p->stopWriting) message and release freeSemaphore
to free current block.
So the thread will see (p->stopWriting) at some
iteration after Wait(freeSemaphore).
The thread doesn't need to fill all avail free blocks,
so we can get fast thread stop.
*/
p->stopWriting = True;
Semaphore_Release1(&p->freeSemaphore); // check semaphore count !!!
PRF(printf("\nMtSync_StopWriting %p : Event_Wait(&p->wasStopped)\n", p));
Event_Wait(&p->wasStopped);
PRF(printf("\nMtSync_StopWriting %p : Event_Wait() finsihed\n", p));
/* 21.03 : we don't restore samaphore counters here.
We will recreate and reinit samaphores in next start */
p->needStart = True;
}
Z7_NO_INLINE
static void MtSync_Destruct(CMtSync *p)
{
PRF(printf("\nMtSync_Destruct %p\n", p));
if (Thread_WasCreated(&p->thread))
{
/* we want thread to be in Stopped state before sending EXIT command.
note: stop(btSync) will stop (htSync) also */
MtSync_StopWriting(p);
/* thread in Stopped state here : (p->needStart == true) */
p->exit = True;
// if (p->needStart) // it's (true)
Event_Set(&p->canStart); // we send EXIT command to thread
Thread_Wait_Close(&p->thread); // we wait thread finishing
}
if (p->csWasInitialized)
{
CriticalSection_Delete(&p->cs);
p->csWasInitialized = False;
}
p->csWasEntered = False;
Event_Close(&p->canStart);
Event_Close(&p->wasStopped);
Semaphore_Close(&p->freeSemaphore);
Semaphore_Close(&p->filledSemaphore);
p->wasCreated = False;
}
// #define RINOK_THREAD(x) { if ((x) != 0) return SZ_ERROR_THREAD; }
// we want to get real system error codes here instead of SZ_ERROR_THREAD
#define RINOK_THREAD(x) RINOK_WRes(x)
// call it before each new file (when new starting is required):
Z7_NO_INLINE
static SRes MtSync_Init(CMtSync *p, UInt32 numBlocks)
{
WRes wres;
// BUFFER_MUST_BE_UNLOCKED(p)
if (!p->needStart || p->csWasEntered)
return SZ_ERROR_FAIL;
wres = Semaphore_OptCreateInit(&p->freeSemaphore, numBlocks, numBlocks);
if (wres == 0)
wres = Semaphore_OptCreateInit(&p->filledSemaphore, 0, numBlocks);
return MY_SRes_HRESULT_FROM_WRes(wres);
}
static WRes MtSync_Create_WRes(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj)
{
WRes wres;
if (p->wasCreated)
return SZ_OK;
RINOK_THREAD(CriticalSection_Init(&p->cs))
p->csWasInitialized = True;
p->csWasEntered = False;
RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->canStart))
RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->wasStopped))
p->needStart = True;
p->exit = True; /* p->exit is unused before (canStart) Event.
But in case of some unexpected code failure we will get fast exit from thread */
// return ERROR_TOO_MANY_POSTS; // for debug
// return EINVAL; // for debug
if (p->affinity != 0)
wres = Thread_Create_With_Affinity(&p->thread, startAddress, obj, (CAffinityMask)p->affinity);
else
wres = Thread_Create(&p->thread, startAddress, obj);
RINOK_THREAD(wres)
p->wasCreated = True;
return SZ_OK;
}
Z7_NO_INLINE
static SRes MtSync_Create(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj)
{
const WRes wres = MtSync_Create_WRes(p, startAddress, obj);
if (wres == 0)
return 0;
MtSync_Destruct(p);
return MY_SRes_HRESULT_FROM_WRes(wres);
}
// ---------- HASH THREAD ----------
#define kMtMaxValForNormalize 0xFFFFFFFF
// #define kMtMaxValForNormalize ((1 << 21)) // for debug
// #define kNormalizeAlign (1 << 7) // alignment for speculated accesses
#ifdef MY_CPU_LE_UNALIGN
#define GetUi24hi_from32(p) ((UInt32)GetUi32(p) >> 8)
#else
#define GetUi24hi_from32(p) ((p)[1] ^ ((UInt32)(p)[2] << 8) ^ ((UInt32)(p)[3] << 16))
#endif
#define GetHeads_DECL(name) \
static void GetHeads ## name(const Byte *p, UInt32 pos, \
UInt32 *hash, UInt32 hashMask, UInt32 *heads, UInt32 numHeads, const UInt32 *crc)
#define GetHeads_LOOP(v) \
for (; numHeads != 0; numHeads--) { \
const UInt32 value = (v); \
p++; \
*heads++ = pos - hash[value]; \
hash[value] = pos++; }
#define DEF_GetHeads2(name, v, action) \
GetHeads_DECL(name) { action \
GetHeads_LOOP(v) }
#define DEF_GetHeads(name, v) DEF_GetHeads2(name, v, ;)
DEF_GetHeads2(2, GetUi16(p), UNUSED_VAR(hashMask); UNUSED_VAR(crc); )
DEF_GetHeads(3, (crc[p[0]] ^ GetUi16(p + 1)) & hashMask)
DEF_GetHeads2(3b, GetUi16(p) ^ ((UInt32)(p)[2] << 16), UNUSED_VAR(hashMask); UNUSED_VAR(crc); )
// BT3 is not good for crc collisions for big hashMask values.
/*
GetHeads_DECL(3b)
{
UNUSED_VAR(hashMask);
UNUSED_VAR(crc);
{
const Byte *pLim = p + numHeads;
if (numHeads == 0)
return;
pLim--;
while (p < pLim)
{
UInt32 v1 = GetUi32(p);
UInt32 v0 = v1 & 0xFFFFFF;
UInt32 h0, h1;
p += 2;
v1 >>= 8;
h0 = hash[v0]; hash[v0] = pos; heads[0] = pos - h0; pos++;
h1 = hash[v1]; hash[v1] = pos; heads[1] = pos - h1; pos++;
heads += 2;
}
if (p == pLim)
{
UInt32 v0 = GetUi16(p) ^ ((UInt32)(p)[2] << 16);
*heads = pos - hash[v0];
hash[v0] = pos;
}
}
}
*/
/*
GetHeads_DECL(4)
{
unsigned sh = 0;
UNUSED_VAR(crc)
while ((hashMask & 0x80000000) == 0)
{
hashMask <<= 1;
sh++;
}
GetHeads_LOOP((GetUi32(p) * 0xa54a1) >> sh)
}
#define GetHeads4b GetHeads4
*/
#define USE_GetHeads_LOCAL_CRC
#ifdef USE_GetHeads_LOCAL_CRC
GetHeads_DECL(4)
{
UInt32 crc0[256];
UInt32 crc1[256];
{
unsigned i;
for (i = 0; i < 256; i++)
{
UInt32 v = crc[i];
crc0[i] = v & hashMask;
crc1[i] = (v << kLzHash_CrcShift_1) & hashMask;
// crc1[i] = rotlFixed(v, 8) & hashMask;
}
}
GetHeads_LOOP(crc0[p[0]] ^ crc1[p[3]] ^ (UInt32)GetUi16(p+1))
}
GetHeads_DECL(4b)
{
UInt32 crc0[256];
{
unsigned i;
for (i = 0; i < 256; i++)
crc0[i] = crc[i] & hashMask;
}
GetHeads_LOOP(crc0[p[0]] ^ GetUi24hi_from32(p))
}
GetHeads_DECL(5)
{
UInt32 crc0[256];
UInt32 crc1[256];
UInt32 crc2[256];
{
unsigned i;
for (i = 0; i < 256; i++)
{
UInt32 v = crc[i];
crc0[i] = v & hashMask;
crc1[i] = (v << kLzHash_CrcShift_1) & hashMask;
crc2[i] = (v << kLzHash_CrcShift_2) & hashMask;
}
}
GetHeads_LOOP(crc0[p[0]] ^ crc1[p[3]] ^ crc2[p[4]] ^ (UInt32)GetUi16(p+1))
}
GetHeads_DECL(5b)
{
UInt32 crc0[256];
UInt32 crc1[256];
{
unsigned i;
for (i = 0; i < 256; i++)
{
UInt32 v = crc[i];
crc0[i] = v & hashMask;
crc1[i] = (v << kLzHash_CrcShift_1) & hashMask;
}
}
GetHeads_LOOP(crc0[p[0]] ^ crc1[p[4]] ^ GetUi24hi_from32(p))
}
#else
DEF_GetHeads(4, (crc[p[0]] ^ (crc[p[3]] << kLzHash_CrcShift_1) ^ (UInt32)GetUi16(p+1)) & hashMask)
DEF_GetHeads(4b, (crc[p[0]] ^ GetUi24hi_from32(p)) & hashMask)
DEF_GetHeads(5, (crc[p[0]] ^ (crc[p[3]] << kLzHash_CrcShift_1) ^ (crc[p[4]] << kLzHash_CrcShift_2) ^ (UInt32)GetUi16(p + 1)) & hashMask)
DEF_GetHeads(5b, (crc[p[0]] ^ (crc[p[4]] << kLzHash_CrcShift_1) ^ GetUi24hi_from32(p)) & hashMask)
#endif
static void HashThreadFunc(CMatchFinderMt *mt)
{
CMtSync *p = &mt->hashSync;
PRF(printf("\nHashThreadFunc\n"));
for (;;)
{
UInt32 blockIndex = 0;
PRF(printf("\nHashThreadFunc : Event_Wait(&p->canStart)\n"));
Event_Wait(&p->canStart);
PRF(printf("\nHashThreadFunc : Event_Wait(&p->canStart) : after \n"));
if (p->exit)
{
PRF(printf("\nHashThreadFunc : exit \n"));
return;
}
MatchFinder_Init_HighHash(MF(mt));
for (;;)
{
PRF(printf("Hash thread block = %d pos = %d\n", (unsigned)blockIndex, mt->MatchFinder->pos));
{
CMatchFinder *mf = MF(mt);
if (MatchFinder_NeedMove(mf))
{
CriticalSection_Enter(&mt->btSync.cs);
CriticalSection_Enter(&mt->hashSync.cs);
{
const Byte *beforePtr = Inline_MatchFinder_GetPointerToCurrentPos(mf);
ptrdiff_t offset;
MatchFinder_MoveBlock(mf);
offset = beforePtr - Inline_MatchFinder_GetPointerToCurrentPos(mf);
mt->pointerToCurPos -= offset;
mt->buffer -= offset;
}
CriticalSection_Leave(&mt->hashSync.cs);
CriticalSection_Leave(&mt->btSync.cs);
continue;
}
Semaphore_Wait(&p->freeSemaphore);
if (p->exit) // exit is unexpected here. But we check it here for some failure case
return;
// for faster stop : we check (p->stopWriting) after Wait(freeSemaphore)
if (p->stopWriting)
break;
MatchFinder_ReadIfRequired(mf);
{
UInt32 *heads = mt->hashBuf + GET_HASH_BLOCK_OFFSET(blockIndex++);
UInt32 num = Inline_MatchFinder_GetNumAvailableBytes(mf);
heads[0] = 2;
heads[1] = num;
/* heads[1] contains the number of avail bytes:
if (avail < mf->numHashBytes) :
{
it means that stream was finished
HASH_THREAD and BT_TREAD must move position for heads[1] (avail) bytes.
HASH_THREAD doesn't stop,
HASH_THREAD fills only the header (2 numbers) for all next blocks:
{2, NumHashBytes - 1}, {2,0}, {2,0}, ... , {2,0}
}
else
{
HASH_THREAD and BT_TREAD must move position for (heads[0] - 2) bytes;
}
*/
if (num >= mf->numHashBytes)
{
num = num - mf->numHashBytes + 1;
if (num > kMtHashBlockSize - 2)
num = kMtHashBlockSize - 2;
if (mf->pos > (UInt32)kMtMaxValForNormalize - num)
{
const UInt32 subValue = (mf->pos - mf->historySize - 1); // & ~(UInt32)(kNormalizeAlign - 1);
MatchFinder_REDUCE_OFFSETS(mf, subValue)
MatchFinder_Normalize3(subValue, mf->hash + mf->fixedHashSize, (size_t)mf->hashMask + 1);
}
heads[0] = 2 + num;
mt->GetHeadsFunc(mf->buffer, mf->pos, mf->hash + mf->fixedHashSize, mf->hashMask, heads + 2, num, mf->crc);
}
mf->pos += num; // wrap over zero is allowed at the end of stream
mf->buffer += num;
}
}
Semaphore_Release1(&p->filledSemaphore);
} // for() processing end
// p->numBlocks_Sent = blockIndex;
Event_Set(&p->wasStopped);
} // for() thread end
}
// ---------- BT THREAD ----------
/* we use one variable instead of two (cyclicBufferPos == pos) before CyclicBuf wrap.
here we define fixed offset of (p->pos) from (p->cyclicBufferPos) */
#define CYC_TO_POS_OFFSET 0
// #define CYC_TO_POS_OFFSET 1 // for debug
#define MFMT_GM_INLINE
#ifdef MFMT_GM_INLINE
/*
we use size_t for (pos) instead of UInt32
to eliminate "movsx" BUG in old MSVC x64 compiler.
*/
UInt32 * Z7_FASTCALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,
UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,
size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,
UInt32 *posRes);
#endif
static void BtGetMatches(CMatchFinderMt *p, UInt32 *d)
{
UInt32 numProcessed = 0;
UInt32 curPos = 2;
/* GetMatchesSpec() functions don't create (len = 1)
in [len, dist] match pairs, if (p->numHashBytes >= 2)
Also we suppose here that (matchMaxLen >= 2).
So the following code for (reserve) is not required
UInt32 reserve = (p->matchMaxLen * 2);
const UInt32 kNumHashBytes_Max = 5; // BT_HASH_BYTES_MAX
if (reserve < kNumHashBytes_Max - 1)
reserve = kNumHashBytes_Max - 1;
const UInt32 limit = kMtBtBlockSize - (reserve);
*/
const UInt32 limit = kMtBtBlockSize - (p->matchMaxLen * 2);
d[1] = p->hashNumAvail;
if (p->failure_BT)
{
// printf("\n == 1 BtGetMatches() p->failure_BT\n");
d[0] = 0;
// d[1] = 0;
return;
}
while (curPos < limit)
{
if (p->hashBufPos == p->hashBufPosLimit)
{
// MatchFinderMt_GetNextBlock_Hash(p);
UInt32 avail;
{
const UInt32 bi = MtSync_GetNextBlock(&p->hashSync);
const UInt32 k = GET_HASH_BLOCK_OFFSET(bi);
const UInt32 *h = p->hashBuf + k;
avail = h[1];
p->hashBufPosLimit = k + h[0];
p->hashNumAvail = avail;
p->hashBufPos = k + 2;
}
{
/* we must prevent UInt32 overflow for avail total value,
if avail was increased with new hash block */
UInt32 availSum = numProcessed + avail;
if (availSum < numProcessed)
availSum = (UInt32)(Int32)-1;
d[1] = availSum;
}
if (avail >= p->numHashBytes)
continue;
// if (p->hashBufPos != p->hashBufPosLimit) exit(1);
/* (avail < p->numHashBytes)
It means that stream was finished.
And (avail) - is a number of remaining bytes,
we fill (d) for (avail) bytes for LZ_THREAD (receiver).
but we don't update (p->pos) and (p->cyclicBufferPos) here in BT_THREAD */
/* here we suppose that we have space enough:
(kMtBtBlockSize - curPos >= p->hashNumAvail) */
p->hashNumAvail = 0;
d[0] = curPos + avail;
d += curPos;
for (; avail != 0; avail--)
*d++ = 0;
return;
}
{
UInt32 size = p->hashBufPosLimit - p->hashBufPos;
UInt32 pos = p->pos;
UInt32 cyclicBufferPos = p->cyclicBufferPos;
UInt32 lenLimit = p->matchMaxLen;
if (lenLimit >= p->hashNumAvail)
lenLimit = p->hashNumAvail;
{
UInt32 size2 = p->hashNumAvail - lenLimit + 1;
if (size2 < size)
size = size2;
size2 = p->cyclicBufferSize - cyclicBufferPos;
if (size2 < size)
size = size2;
}
if (pos > (UInt32)kMtMaxValForNormalize - size)
{
const UInt32 subValue = (pos - p->cyclicBufferSize); // & ~(UInt32)(kNormalizeAlign - 1);
pos -= subValue;
p->pos = pos;
MatchFinder_Normalize3(subValue, p->son, (size_t)p->cyclicBufferSize * 2);
}
#ifndef MFMT_GM_INLINE
while (curPos < limit && size-- != 0)
{
UInt32 *startDistances = d + curPos;
UInt32 num = (UInt32)(GetMatchesSpec1(lenLimit, pos - p->hashBuf[p->hashBufPos++],
pos, p->buffer, p->son, cyclicBufferPos, p->cyclicBufferSize, p->cutValue,
startDistances + 1, p->numHashBytes - 1) - startDistances);
*startDistances = num - 1;
curPos += num;
cyclicBufferPos++;
pos++;
p->buffer++;
}
#else
{
UInt32 posRes = pos;
const UInt32 *d_end;
{
d_end = GetMatchesSpecN_2(
p->buffer + lenLimit - 1,
pos, p->buffer, p->son, p->cutValue, d + curPos,
p->numHashBytes - 1, p->hashBuf + p->hashBufPos,
d + limit, p->hashBuf + p->hashBufPos + size,
cyclicBufferPos, p->cyclicBufferSize,
&posRes);
}
{
if (!d_end)
{
// printf("\n == 2 BtGetMatches() p->failure_BT\n");
// internal data failure
p->failure_BT = True;
d[0] = 0;
// d[1] = 0;
return;
}
}
curPos = (UInt32)(d_end - d);
{
const UInt32 processed = posRes - pos;
pos = posRes;
p->hashBufPos += processed;
cyclicBufferPos += processed;
p->buffer += processed;
}
}
#endif
{
const UInt32 processed = pos - p->pos;
numProcessed += processed;
p->hashNumAvail -= processed;
p->pos = pos;
}
if (cyclicBufferPos == p->cyclicBufferSize)
cyclicBufferPos = 0;
p->cyclicBufferPos = cyclicBufferPos;
}
}
d[0] = curPos;
}
static void BtFillBlock(CMatchFinderMt *p, UInt32 globalBlockIndex)
{
CMtSync *sync = &p->hashSync;
BUFFER_MUST_BE_UNLOCKED(sync)
if (!sync->needStart)
{
LOCK_BUFFER(sync)
}
BtGetMatches(p, p->btBuf + GET_BT_BLOCK_OFFSET(globalBlockIndex));
/* We suppose that we have called GetNextBlock() from start.
So buffer is LOCKED */
UNLOCK_BUFFER(sync)
}
Z7_NO_INLINE
static void BtThreadFunc(CMatchFinderMt *mt)
{
CMtSync *p = &mt->btSync;
for (;;)
{
UInt32 blockIndex = 0;
Event_Wait(&p->canStart);
for (;;)
{
PRF(printf(" BT thread block = %d pos = %d\n", (unsigned)blockIndex, mt->pos));
/* (p->exit == true) is possible after (p->canStart) at first loop iteration
and is unexpected after more Wait(freeSemaphore) iterations */
if (p->exit)
return;
Semaphore_Wait(&p->freeSemaphore);
// for faster stop : we check (p->stopWriting) after Wait(freeSemaphore)
if (p->stopWriting)
break;
BtFillBlock(mt, blockIndex++);
Semaphore_Release1(&p->filledSemaphore);
}
// we stop HASH_THREAD here
MtSync_StopWriting(&mt->hashSync);
// p->numBlocks_Sent = blockIndex;
Event_Set(&p->wasStopped);
}
}
void MatchFinderMt_Construct(CMatchFinderMt *p)
{
p->hashBuf = NULL;
MtSync_Construct(&p->hashSync);
MtSync_Construct(&p->btSync);
}
static void MatchFinderMt_FreeMem(CMatchFinderMt *p, ISzAllocPtr alloc)
{
ISzAlloc_Free(alloc, p->hashBuf);
p->hashBuf = NULL;
}
void MatchFinderMt_Destruct(CMatchFinderMt *p, ISzAllocPtr alloc)
{
/*
HASH_THREAD can use CriticalSection(s) btSync.cs and hashSync.cs.
So we must be sure that HASH_THREAD will not use CriticalSection(s)
after deleting CriticalSection here.
we call ReleaseStream(p)
that calls StopWriting(btSync)
that calls StopWriting(hashSync), if it's required to stop HASH_THREAD.
after StopWriting() it's safe to destruct MtSync(s) in any order */
MatchFinderMt_ReleaseStream(p);
MtSync_Destruct(&p->btSync);
MtSync_Destruct(&p->hashSync);
LOG_ITER(
printf("\nTree %9d * %7d iter = %9d = sum : bytes = %9d\n",
(UInt32)(g_NumIters_Tree / 1000),
(UInt32)(((UInt64)g_NumIters_Loop * 1000) / (g_NumIters_Tree + 1)),
(UInt32)(g_NumIters_Loop / 1000),
(UInt32)(g_NumIters_Bytes / 1000)
));
MatchFinderMt_FreeMem(p, alloc);
}
#define kHashBufferSize (kMtHashBlockSize * kMtHashNumBlocks)
#define kBtBufferSize (kMtBtBlockSize * kMtBtNumBlocks)
static THREAD_FUNC_DECL HashThreadFunc2(void *p) { HashThreadFunc((CMatchFinderMt *)p); return 0; }
static THREAD_FUNC_DECL BtThreadFunc2(void *p)
{
Byte allocaDummy[0x180];
unsigned i = 0;
for (i = 0; i < 16; i++)
allocaDummy[i] = (Byte)0;
if (allocaDummy[0] == 0)
BtThreadFunc((CMatchFinderMt *)p);
return 0;
}
SRes MatchFinderMt_Create(CMatchFinderMt *p, UInt32 historySize, UInt32 keepAddBufferBefore,
UInt32 matchMaxLen, UInt32 keepAddBufferAfter, ISzAllocPtr alloc)
{
CMatchFinder *mf = MF(p);
p->historySize = historySize;
if (kMtBtBlockSize <= matchMaxLen * 4)
return SZ_ERROR_PARAM;
if (!p->hashBuf)
{
p->hashBuf = (UInt32 *)ISzAlloc_Alloc(alloc, ((size_t)kHashBufferSize + (size_t)kBtBufferSize) * sizeof(UInt32));
if (!p->hashBuf)
return SZ_ERROR_MEM;
p->btBuf = p->hashBuf + kHashBufferSize;
}
keepAddBufferBefore += (kHashBufferSize + kBtBufferSize);
keepAddBufferAfter += kMtHashBlockSize;
if (!MatchFinder_Create(mf, historySize, keepAddBufferBefore, matchMaxLen, keepAddBufferAfter, alloc))
return SZ_ERROR_MEM;
RINOK(MtSync_Create(&p->hashSync, HashThreadFunc2, p))
RINOK(MtSync_Create(&p->btSync, BtThreadFunc2, p))
return SZ_OK;
}
SRes MatchFinderMt_InitMt(CMatchFinderMt *p)
{
RINOK(MtSync_Init(&p->hashSync, kMtHashNumBlocks))
return MtSync_Init(&p->btSync, kMtBtNumBlocks);
}
static void MatchFinderMt_Init(CMatchFinderMt *p)
{
CMatchFinder *mf = MF(p);
p->btBufPos =
p->btBufPosLimit = NULL;
p->hashBufPos =
p->hashBufPosLimit = 0;
p->hashNumAvail = 0; // 21.03
p->failure_BT = False;
/* Init without data reading. We don't want to read data in this thread */
MatchFinder_Init_4(mf);
MatchFinder_Init_LowHash(mf);
p->pointerToCurPos = Inline_MatchFinder_GetPointerToCurrentPos(mf);
p->btNumAvailBytes = 0;
p->failure_LZ_BT = False;
// p->failure_LZ_LZ = False;
p->lzPos =
1; // optimal smallest value
// 0; // for debug: ignores match to start
// kNormalizeAlign; // for debug
p->hash = mf->hash;
p->fixedHashSize = mf->fixedHashSize;
// p->hash4Mask = mf->hash4Mask;
p->crc = mf->crc;
// memcpy(p->crc, mf->crc, sizeof(mf->crc));
p->son = mf->son;
p->matchMaxLen = mf->matchMaxLen;
p->numHashBytes = mf->numHashBytes;
/* (mf->pos) and (mf->streamPos) were already initialized to 1 in MatchFinder_Init_4() */
// mf->streamPos = mf->pos = 1; // optimal smallest value
// 0; // for debug: ignores match to start
// kNormalizeAlign; // for debug
/* we must init (p->pos = mf->pos) for BT, because
BT code needs (p->pos == delta_value_for_empty_hash_record == mf->pos) */
p->pos = mf->pos; // do not change it
p->cyclicBufferPos = (p->pos - CYC_TO_POS_OFFSET);
p->cyclicBufferSize = mf->cyclicBufferSize;
p->buffer = mf->buffer;
p->cutValue = mf->cutValue;
// p->son[0] = p->son[1] = 0; // unused: to init skipped record for speculated accesses.
}
/* ReleaseStream is required to finish multithreading */
void MatchFinderMt_ReleaseStream(CMatchFinderMt *p)
{
// Sleep(1); // for debug
MtSync_StopWriting(&p->btSync);
// Sleep(200); // for debug
/* p->MatchFinder->ReleaseStream(); */
}
Z7_NO_INLINE
static UInt32 MatchFinderMt_GetNextBlock_Bt(CMatchFinderMt *p)
{
if (p->failure_LZ_BT)
p->btBufPos = p->failureBuf;
else
{
const UInt32 bi = MtSync_GetNextBlock(&p->btSync);
const UInt32 *bt = p->btBuf + GET_BT_BLOCK_OFFSET(bi);
{
const UInt32 numItems = bt[0];
p->btBufPosLimit = bt + numItems;
p->btNumAvailBytes = bt[1];
p->btBufPos = bt + 2;
if (numItems < 2 || numItems > kMtBtBlockSize)
{
p->failureBuf[0] = 0;
p->btBufPos = p->failureBuf;
p->btBufPosLimit = p->failureBuf + 1;
p->failure_LZ_BT = True;
// p->btNumAvailBytes = 0;
/* we don't want to decrease AvailBytes, that was load before.
that can be unxepected for the code that have loaded anopther value before */
}
}
if (p->lzPos >= (UInt32)kMtMaxValForNormalize - (UInt32)kMtBtBlockSize)
{
/* we don't check (lzPos) over exact avail bytes in (btBuf).
(fixedHashSize) is small, so normalization is fast */
const UInt32 subValue = (p->lzPos - p->historySize - 1); // & ~(UInt32)(kNormalizeAlign - 1);
p->lzPos -= subValue;
MatchFinder_Normalize3(subValue, p->hash, p->fixedHashSize);
}
}
return p->btNumAvailBytes;
}
static const Byte * MatchFinderMt_GetPointerToCurrentPos(CMatchFinderMt *p)
{
return p->pointerToCurPos;
}
#define GET_NEXT_BLOCK_IF_REQUIRED if (p->btBufPos == p->btBufPosLimit) MatchFinderMt_GetNextBlock_Bt(p);
static UInt32 MatchFinderMt_GetNumAvailableBytes(CMatchFinderMt *p)
{
if (p->btBufPos != p->btBufPosLimit)
return p->btNumAvailBytes;
return MatchFinderMt_GetNextBlock_Bt(p);
}
// #define CHECK_FAILURE_LZ(_match_, _pos_) if (_match_ >= _pos_) { p->failure_LZ_LZ = True; return d; }
#define CHECK_FAILURE_LZ(_match_, _pos_)
static UInt32 * MixMatches2(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d)
{
UInt32 h2, c2;
UInt32 *hash = p->hash;
const Byte *cur = p->pointerToCurPos;
const UInt32 m = p->lzPos;
MT_HASH2_CALC
c2 = hash[h2];
hash[h2] = m;
if (c2 >= matchMinPos)
{
CHECK_FAILURE_LZ(c2, m)
if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0])
{
*d++ = 2;
*d++ = m - c2 - 1;
}
}
return d;
}
static UInt32 * MixMatches3(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d)
{
UInt32 h2, h3, c2, c3;
UInt32 *hash = p->hash;
const Byte *cur = p->pointerToCurPos;
const UInt32 m = p->lzPos;
MT_HASH3_CALC
c2 = hash[h2];
c3 = (hash + kFix3HashSize)[h3];
hash[h2] = m;
(hash + kFix3HashSize)[h3] = m;
if (c2 >= matchMinPos)
{
CHECK_FAILURE_LZ(c2, m)
if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0])
{
d[1] = m - c2 - 1;
if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 2] == cur[2])
{
d[0] = 3;
return d + 2;
}
d[0] = 2;
d += 2;
}
}
if (c3 >= matchMinPos)
{
CHECK_FAILURE_LZ(c3, m)
if (cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0])
{
*d++ = 3;
*d++ = m - c3 - 1;
}
}
return d;
}
#define INCREASE_LZ_POS p->lzPos++; p->pointerToCurPos++;
/*
static
UInt32* MatchFinderMt_GetMatches_Bt4(CMatchFinderMt *p, UInt32 *d)
{
const UInt32 *bt = p->btBufPos;
const UInt32 len = *bt++;
const UInt32 *btLim = bt + len;
UInt32 matchMinPos;
UInt32 avail = p->btNumAvailBytes - 1;
p->btBufPos = btLim;
{
p->btNumAvailBytes = avail;
#define BT_HASH_BYTES_MAX 5
matchMinPos = p->lzPos;
if (len != 0)
matchMinPos -= bt[1];
else if (avail < (BT_HASH_BYTES_MAX - 1) - 1)
{
INCREASE_LZ_POS
return d;
}
else
{
const UInt32 hs = p->historySize;
if (matchMinPos > hs)
matchMinPos -= hs;
else
matchMinPos = 1;
}
}
for (;;)
{
UInt32 h2, h3, c2, c3;
UInt32 *hash = p->hash;
const Byte *cur = p->pointerToCurPos;
UInt32 m = p->lzPos;
MT_HASH3_CALC
c2 = hash[h2];
c3 = (hash + kFix3HashSize)[h3];
hash[h2] = m;
(hash + kFix3HashSize)[h3] = m;
if (c2 >= matchMinPos && cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0])
{
d[1] = m - c2 - 1;
if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 2] == cur[2])
{
d[0] = 3;
d += 2;
break;
}
// else
{
d[0] = 2;
d += 2;
}
}
if (c3 >= matchMinPos && cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0])
{
*d++ = 3;
*d++ = m - c3 - 1;
}
break;
}
if (len != 0)
{
do
{
const UInt32 v0 = bt[0];
const UInt32 v1 = bt[1];
bt += 2;
d[0] = v0;
d[1] = v1;
d += 2;
}
while (bt != btLim);
}
INCREASE_LZ_POS
return d;
}
*/
static UInt32 * MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d)
{
UInt32 h2, h3, /* h4, */ c2, c3 /* , c4 */;
UInt32 *hash = p->hash;
const Byte *cur = p->pointerToCurPos;
const UInt32 m = p->lzPos;
MT_HASH3_CALC
// MT_HASH4_CALC
c2 = hash[h2];
c3 = (hash + kFix3HashSize)[h3];
// c4 = (hash + kFix4HashSize)[h4];
hash[h2] = m;
(hash + kFix3HashSize)[h3] = m;
// (hash + kFix4HashSize)[h4] = m;
// #define BT5_USE_H2
// #ifdef BT5_USE_H2
if (c2 >= matchMinPos && cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0])
{
d[1] = m - c2 - 1;
if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 2] == cur[2])
{
// d[0] = (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 3] == cur[3]) ? 4 : 3;
// return d + 2;
if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 3] == cur[3])
{
d[0] = 4;
return d + 2;
}
d[0] = 3;
d += 2;
#ifdef BT5_USE_H4
if (c4 >= matchMinPos)
if (
cur[(ptrdiff_t)c4 - (ptrdiff_t)m] == cur[0] &&
cur[(ptrdiff_t)c4 - (ptrdiff_t)m + 3] == cur[3]
)
{
*d++ = 4;
*d++ = m - c4 - 1;
}
#endif
return d;
}
d[0] = 2;
d += 2;
}
// #endif
if (c3 >= matchMinPos && cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0])
{
d[1] = m - c3 - 1;
if (cur[(ptrdiff_t)c3 - (ptrdiff_t)m + 3] == cur[3])
{
d[0] = 4;
return d + 2;
}
d[0] = 3;
d += 2;
}
#ifdef BT5_USE_H4
if (c4 >= matchMinPos)
if (
cur[(ptrdiff_t)c4 - (ptrdiff_t)m] == cur[0] &&
cur[(ptrdiff_t)c4 - (ptrdiff_t)m + 3] == cur[3]
)
{
*d++ = 4;
*d++ = m - c4 - 1;
}
#endif
return d;
}
static UInt32 * MatchFinderMt2_GetMatches(CMatchFinderMt *p, UInt32 *d)
{
const UInt32 *bt = p->btBufPos;
const UInt32 len = *bt++;
const UInt32 *btLim = bt + len;
p->btBufPos = btLim;
p->btNumAvailBytes--;
INCREASE_LZ_POS
{
while (bt != btLim)
{
const UInt32 v0 = bt[0];
const UInt32 v1 = bt[1];
bt += 2;
d[0] = v0;
d[1] = v1;
d += 2;
}
}
return d;
}
static UInt32 * MatchFinderMt_GetMatches(CMatchFinderMt *p, UInt32 *d)
{
const UInt32 *bt = p->btBufPos;
UInt32 len = *bt++;
const UInt32 avail = p->btNumAvailBytes - 1;
p->btNumAvailBytes = avail;
p->btBufPos = bt + len;
if (len == 0)
{
#define BT_HASH_BYTES_MAX 5
if (avail >= (BT_HASH_BYTES_MAX - 1) - 1)
{
UInt32 m = p->lzPos;
if (m > p->historySize)
m -= p->historySize;
else
m = 1;
d = p->MixMatchesFunc(p, m, d);
}
}
else
{
/*
first match pair from BinTree: (match_len, match_dist),
(match_len >= numHashBytes).
MixMatchesFunc() inserts only hash matches that are nearer than (match_dist)
*/
d = p->MixMatchesFunc(p, p->lzPos - bt[1], d);
// if (d) // check for failure
do
{
const UInt32 v0 = bt[0];
const UInt32 v1 = bt[1];
bt += 2;
d[0] = v0;
d[1] = v1;
d += 2;
}
while (len -= 2);
}
INCREASE_LZ_POS
return d;
}
#define SKIP_HEADER2_MT do { GET_NEXT_BLOCK_IF_REQUIRED
#define SKIP_HEADER_MT(n) SKIP_HEADER2_MT if (p->btNumAvailBytes-- >= (n)) { const Byte *cur = p->pointerToCurPos; UInt32 *hash = p->hash;
#define SKIP_FOOTER_MT } INCREASE_LZ_POS p->btBufPos += (size_t)*p->btBufPos + 1; } while (--num != 0);
static void MatchFinderMt0_Skip(CMatchFinderMt *p, UInt32 num)
{
SKIP_HEADER2_MT { p->btNumAvailBytes--;
SKIP_FOOTER_MT
}
static void MatchFinderMt2_Skip(CMatchFinderMt *p, UInt32 num)
{
SKIP_HEADER_MT(2)
UInt32 h2;
MT_HASH2_CALC
hash[h2] = p->lzPos;
SKIP_FOOTER_MT
}
static void MatchFinderMt3_Skip(CMatchFinderMt *p, UInt32 num)
{
SKIP_HEADER_MT(3)
UInt32 h2, h3;
MT_HASH3_CALC
(hash + kFix3HashSize)[h3] =
hash[ h2] =
p->lzPos;
SKIP_FOOTER_MT
}
/*
// MatchFinderMt4_Skip() is similar to MatchFinderMt3_Skip().
// The difference is that MatchFinderMt3_Skip() updates hash for last 3 bytes of stream.
static void MatchFinderMt4_Skip(CMatchFinderMt *p, UInt32 num)
{
SKIP_HEADER_MT(4)
UInt32 h2, h3; // h4
MT_HASH3_CALC
// MT_HASH4_CALC
// (hash + kFix4HashSize)[h4] =
(hash + kFix3HashSize)[h3] =
hash[ h2] =
p->lzPos;
SKIP_FOOTER_MT
}
*/
void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder2 *vTable)
{
vTable->Init = (Mf_Init_Func)MatchFinderMt_Init;
vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinderMt_GetNumAvailableBytes;
vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinderMt_GetPointerToCurrentPos;
vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt_GetMatches;
switch (MF(p)->numHashBytes)
{
case 2:
p->GetHeadsFunc = GetHeads2;
p->MixMatchesFunc = (Mf_Mix_Matches)NULL;
vTable->Skip = (Mf_Skip_Func)MatchFinderMt0_Skip;
vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt2_GetMatches;
break;
case 3:
p->GetHeadsFunc = MF(p)->bigHash ? GetHeads3b : GetHeads3;
p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches2;
vTable->Skip = (Mf_Skip_Func)MatchFinderMt2_Skip;
break;
case 4:
p->GetHeadsFunc = MF(p)->bigHash ? GetHeads4b : GetHeads4;
// it's fast inline version of GetMatches()
// vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt_GetMatches_Bt4;
p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches3;
vTable->Skip = (Mf_Skip_Func)MatchFinderMt3_Skip;
break;
default:
p->GetHeadsFunc = MF(p)->bigHash ? GetHeads5b : GetHeads5;
p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches4;
vTable->Skip =
(Mf_Skip_Func)MatchFinderMt3_Skip;
// (Mf_Skip_Func)MatchFinderMt4_Skip;
break;
}
}
#undef RINOK_THREAD
#undef PRF
#undef MF
#undef GetUi24hi_from32
#undef LOCK_BUFFER
#undef UNLOCK_BUFFER

109
extern/lzma/LzFindMt.h vendored
View File

@@ -1,109 +0,0 @@
/* LzFindMt.h -- multithreaded Match finder for LZ algorithms
2023-03-05 : Igor Pavlov : Public domain */
#ifndef ZIP7_INC_LZ_FIND_MT_H
#define ZIP7_INC_LZ_FIND_MT_H
#include "LzFind.h"
#include "Threads.h"
EXTERN_C_BEGIN
typedef struct
{
UInt32 numProcessedBlocks;
CThread thread;
UInt64 affinity;
BoolInt wasCreated;
BoolInt needStart;
BoolInt csWasInitialized;
BoolInt csWasEntered;
BoolInt exit;
BoolInt stopWriting;
CAutoResetEvent canStart;
CAutoResetEvent wasStopped;
CSemaphore freeSemaphore;
CSemaphore filledSemaphore;
CCriticalSection cs;
// UInt32 numBlocks_Sent;
} CMtSync;
typedef UInt32 * (*Mf_Mix_Matches)(void *p, UInt32 matchMinPos, UInt32 *distances);
/* kMtCacheLineDummy must be >= size_of_CPU_cache_line */
#define kMtCacheLineDummy 128
typedef void (*Mf_GetHeads)(const Byte *buffer, UInt32 pos,
UInt32 *hash, UInt32 hashMask, UInt32 *heads, UInt32 numHeads, const UInt32 *crc);
typedef struct
{
/* LZ */
const Byte *pointerToCurPos;
UInt32 *btBuf;
const UInt32 *btBufPos;
const UInt32 *btBufPosLimit;
UInt32 lzPos;
UInt32 btNumAvailBytes;
UInt32 *hash;
UInt32 fixedHashSize;
// UInt32 hash4Mask;
UInt32 historySize;
const UInt32 *crc;
Mf_Mix_Matches MixMatchesFunc;
UInt32 failure_LZ_BT; // failure in BT transfered to LZ
// UInt32 failure_LZ_LZ; // failure in LZ tables
UInt32 failureBuf[1];
// UInt32 crc[256];
/* LZ + BT */
CMtSync btSync;
Byte btDummy[kMtCacheLineDummy];
/* BT */
UInt32 *hashBuf;
UInt32 hashBufPos;
UInt32 hashBufPosLimit;
UInt32 hashNumAvail;
UInt32 failure_BT;
CLzRef *son;
UInt32 matchMaxLen;
UInt32 numHashBytes;
UInt32 pos;
const Byte *buffer;
UInt32 cyclicBufferPos;
UInt32 cyclicBufferSize; /* it must be = (historySize + 1) */
UInt32 cutValue;
/* BT + Hash */
CMtSync hashSync;
/* Byte hashDummy[kMtCacheLineDummy]; */
/* Hash */
Mf_GetHeads GetHeadsFunc;
CMatchFinder *MatchFinder;
// CMatchFinder MatchFinder;
} CMatchFinderMt;
// only for Mt part
void MatchFinderMt_Construct(CMatchFinderMt *p);
void MatchFinderMt_Destruct(CMatchFinderMt *p, ISzAllocPtr alloc);
SRes MatchFinderMt_Create(CMatchFinderMt *p, UInt32 historySize, UInt32 keepAddBufferBefore,
UInt32 matchMaxLen, UInt32 keepAddBufferAfter, ISzAllocPtr alloc);
void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder2 *vTable);
/* call MatchFinderMt_InitMt() before IMatchFinder::Init() */
SRes MatchFinderMt_InitMt(CMatchFinderMt *p);
void MatchFinderMt_ReleaseStream(CMatchFinderMt *p);
EXTERN_C_END
#endif

View File

@@ -1,578 +0,0 @@
/* LzFindOpt.c -- multithreaded Match finder for LZ algorithms
2023-04-02 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include "CpuArch.h"
#include "LzFind.h"
// #include "LzFindMt.h"
// #define LOG_ITERS
// #define LOG_THREAD
#ifdef LOG_THREAD
#include <stdio.h>
#define PRF(x) x
#else
// #define PRF(x)
#endif
#ifdef LOG_ITERS
#include <stdio.h>
UInt64 g_NumIters_Tree;
UInt64 g_NumIters_Loop;
UInt64 g_NumIters_Bytes;
#define LOG_ITER(x) x
#else
#define LOG_ITER(x)
#endif
// ---------- BT THREAD ----------
#define USE_SON_PREFETCH
#define USE_LONG_MATCH_OPT
#define kEmptyHashValue 0
// #define CYC_TO_POS_OFFSET 0
// #define CYC_TO_POS_OFFSET 1 // for debug
/*
Z7_NO_INLINE
UInt32 * Z7_FASTCALL GetMatchesSpecN_1(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,
UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size, UInt32 *posRes)
{
do
{
UInt32 delta;
if (hash == size)
break;
delta = *hash++;
if (delta == 0 || delta > (UInt32)pos)
return NULL;
lenLimit++;
if (delta == (UInt32)pos)
{
CLzRef *ptr1 = son + ((size_t)pos << 1) - CYC_TO_POS_OFFSET * 2;
*d++ = 0;
ptr1[0] = kEmptyHashValue;
ptr1[1] = kEmptyHashValue;
}
else
{
UInt32 *_distances = ++d;
CLzRef *ptr0 = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2 + 1;
CLzRef *ptr1 = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2;
const Byte *len0 = cur, *len1 = cur;
UInt32 cutValue = _cutValue;
const Byte *maxLen = cur + _maxLen;
for (LOG_ITER(g_NumIters_Tree++);;)
{
LOG_ITER(g_NumIters_Loop++);
{
const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
CLzRef *pair = son + ((size_t)(((ptrdiff_t)pos - CYC_TO_POS_OFFSET) + diff) << 1);
const Byte *len = (len0 < len1 ? len0 : len1);
#ifdef USE_SON_PREFETCH
const UInt32 pair0 = *pair;
#endif
if (len[diff] == len[0])
{
if (++len != lenLimit && len[diff] == len[0])
while (++len != lenLimit)
{
LOG_ITER(g_NumIters_Bytes++);
if (len[diff] != len[0])
break;
}
if (maxLen < len)
{
maxLen = len;
*d++ = (UInt32)(len - cur);
*d++ = delta - 1;
if (len == lenLimit)
{
const UInt32 pair1 = pair[1];
*ptr1 =
#ifdef USE_SON_PREFETCH
pair0;
#else
pair[0];
#endif
*ptr0 = pair1;
_distances[-1] = (UInt32)(d - _distances);
#ifdef USE_LONG_MATCH_OPT
if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)
break;
{
for (;;)
{
hash++;
pos++;
cur++;
lenLimit++;
{
CLzRef *ptr = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2;
#if 0
*(UInt64 *)(void *)ptr = ((const UInt64 *)(const void *)ptr)[diff];
#else
const UInt32 p0 = ptr[0 + (diff * 2)];
const UInt32 p1 = ptr[1 + (diff * 2)];
ptr[0] = p0;
ptr[1] = p1;
// ptr[0] = ptr[0 + (diff * 2)];
// ptr[1] = ptr[1 + (diff * 2)];
#endif
}
// PrintSon(son + 2, pos - 1);
// printf("\npos = %x delta = %x\n", pos, delta);
len++;
*d++ = 2;
*d++ = (UInt32)(len - cur);
*d++ = delta - 1;
if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)
break;
}
}
#endif
break;
}
}
}
{
const UInt32 curMatch = (UInt32)pos - delta; // (UInt32)(pos + diff);
if (len[diff] < len[0])
{
delta = pair[1];
if (delta >= curMatch)
return NULL;
*ptr1 = curMatch;
ptr1 = pair + 1;
len1 = len;
}
else
{
delta = *pair;
if (delta >= curMatch)
return NULL;
*ptr0 = curMatch;
ptr0 = pair;
len0 = len;
}
delta = (UInt32)pos - delta;
if (--cutValue == 0 || delta >= pos)
{
*ptr0 = *ptr1 = kEmptyHashValue;
_distances[-1] = (UInt32)(d - _distances);
break;
}
}
}
} // for (tree iterations)
}
pos++;
cur++;
}
while (d < limit);
*posRes = (UInt32)pos;
return d;
}
*/
/* define cbs if you use 2 functions.
GetMatchesSpecN_1() : (pos < _cyclicBufferSize)
GetMatchesSpecN_2() : (pos >= _cyclicBufferSize)
do not define cbs if you use 1 function:
GetMatchesSpecN_2()
*/
// #define cbs _cyclicBufferSize
/*
we use size_t for (pos) and (_cyclicBufferPos_ instead of UInt32
to eliminate "movsx" BUG in old MSVC x64 compiler.
*/
UInt32 * Z7_FASTCALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,
UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,
size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,
UInt32 *posRes);
Z7_NO_INLINE
UInt32 * Z7_FASTCALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,
UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,
size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,
UInt32 *posRes)
{
do // while (hash != size)
{
UInt32 delta;
#ifndef cbs
UInt32 cbs;
#endif
if (hash == size)
break;
delta = *hash++;
if (delta == 0)
return NULL;
lenLimit++;
#ifndef cbs
cbs = _cyclicBufferSize;
if ((UInt32)pos < cbs)
{
if (delta > (UInt32)pos)
return NULL;
cbs = (UInt32)pos;
}
#endif
if (delta >= cbs)
{
CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
*d++ = 0;
ptr1[0] = kEmptyHashValue;
ptr1[1] = kEmptyHashValue;
}
else
{
UInt32 *_distances = ++d;
CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
UInt32 cutValue = _cutValue;
const Byte *len0 = cur, *len1 = cur;
const Byte *maxLen = cur + _maxLen;
// if (cutValue == 0) { *ptr0 = *ptr1 = kEmptyHashValue; } else
for (LOG_ITER(g_NumIters_Tree++);;)
{
LOG_ITER(g_NumIters_Loop++);
{
// SPEC code
CLzRef *pair = son + ((size_t)((ptrdiff_t)_cyclicBufferPos - (ptrdiff_t)delta
+ (ptrdiff_t)(UInt32)(_cyclicBufferPos < delta ? cbs : 0)
) << 1);
const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
const Byte *len = (len0 < len1 ? len0 : len1);
#ifdef USE_SON_PREFETCH
const UInt32 pair0 = *pair;
#endif
if (len[diff] == len[0])
{
if (++len != lenLimit && len[diff] == len[0])
while (++len != lenLimit)
{
LOG_ITER(g_NumIters_Bytes++);
if (len[diff] != len[0])
break;
}
if (maxLen < len)
{
maxLen = len;
*d++ = (UInt32)(len - cur);
*d++ = delta - 1;
if (len == lenLimit)
{
const UInt32 pair1 = pair[1];
*ptr1 =
#ifdef USE_SON_PREFETCH
pair0;
#else
pair[0];
#endif
*ptr0 = pair1;
_distances[-1] = (UInt32)(d - _distances);
#ifdef USE_LONG_MATCH_OPT
if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)
break;
{
for (;;)
{
*d++ = 2;
*d++ = (UInt32)(lenLimit - cur);
*d++ = delta - 1;
cur++;
lenLimit++;
// SPEC
_cyclicBufferPos++;
{
// SPEC code
CLzRef *dest = son + ((size_t)(_cyclicBufferPos) << 1);
const CLzRef *src = dest + ((diff
+ (ptrdiff_t)(UInt32)((_cyclicBufferPos < delta) ? cbs : 0)) << 1);
// CLzRef *ptr = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2;
#if 0
*(UInt64 *)(void *)dest = *((const UInt64 *)(const void *)src);
#else
const UInt32 p0 = src[0];
const UInt32 p1 = src[1];
dest[0] = p0;
dest[1] = p1;
#endif
}
pos++;
hash++;
if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)
break;
} // for() end for long matches
}
#endif
break; // break from TREE iterations
}
}
}
{
const UInt32 curMatch = (UInt32)pos - delta; // (UInt32)(pos + diff);
if (len[diff] < len[0])
{
delta = pair[1];
*ptr1 = curMatch;
ptr1 = pair + 1;
len1 = len;
if (delta >= curMatch)
return NULL;
}
else
{
delta = *pair;
*ptr0 = curMatch;
ptr0 = pair;
len0 = len;
if (delta >= curMatch)
return NULL;
}
delta = (UInt32)pos - delta;
if (--cutValue == 0 || delta >= cbs)
{
*ptr0 = *ptr1 = kEmptyHashValue;
_distances[-1] = (UInt32)(d - _distances);
break;
}
}
}
} // for (tree iterations)
}
pos++;
_cyclicBufferPos++;
cur++;
}
while (d < limit);
*posRes = (UInt32)pos;
return d;
}
/*
typedef UInt32 uint32plus; // size_t
UInt32 * Z7_FASTCALL GetMatchesSpecN_3(uint32plus lenLimit, size_t pos, const Byte *cur, CLzRef *son,
UInt32 _cutValue, UInt32 *d, uint32plus _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,
size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,
UInt32 *posRes)
{
do // while (hash != size)
{
UInt32 delta;
#ifndef cbs
UInt32 cbs;
#endif
if (hash == size)
break;
delta = *hash++;
if (delta == 0)
return NULL;
#ifndef cbs
cbs = _cyclicBufferSize;
if ((UInt32)pos < cbs)
{
if (delta > (UInt32)pos)
return NULL;
cbs = (UInt32)pos;
}
#endif
if (delta >= cbs)
{
CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
*d++ = 0;
ptr1[0] = kEmptyHashValue;
ptr1[1] = kEmptyHashValue;
}
else
{
CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
UInt32 *_distances = ++d;
uint32plus len0 = 0, len1 = 0;
UInt32 cutValue = _cutValue;
uint32plus maxLen = _maxLen;
// lenLimit++; // const Byte *lenLimit = cur + _lenLimit;
for (LOG_ITER(g_NumIters_Tree++);;)
{
LOG_ITER(g_NumIters_Loop++);
{
// const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
CLzRef *pair = son + ((size_t)((ptrdiff_t)_cyclicBufferPos - delta
+ (ptrdiff_t)(UInt32)(_cyclicBufferPos < delta ? cbs : 0)
) << 1);
const Byte *pb = cur - delta;
uint32plus len = (len0 < len1 ? len0 : len1);
#ifdef USE_SON_PREFETCH
const UInt32 pair0 = *pair;
#endif
if (pb[len] == cur[len])
{
if (++len != lenLimit && pb[len] == cur[len])
while (++len != lenLimit)
if (pb[len] != cur[len])
break;
if (maxLen < len)
{
maxLen = len;
*d++ = (UInt32)len;
*d++ = delta - 1;
if (len == lenLimit)
{
{
const UInt32 pair1 = pair[1];
*ptr0 = pair1;
*ptr1 =
#ifdef USE_SON_PREFETCH
pair0;
#else
pair[0];
#endif
}
_distances[-1] = (UInt32)(d - _distances);
#ifdef USE_LONG_MATCH_OPT
if (hash == size || *hash != delta || pb[lenLimit] != cur[lenLimit] || d >= limit)
break;
{
const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
for (;;)
{
*d++ = 2;
*d++ = (UInt32)lenLimit;
*d++ = delta - 1;
_cyclicBufferPos++;
{
CLzRef *dest = son + ((size_t)_cyclicBufferPos << 1);
const CLzRef *src = dest + ((diff +
(ptrdiff_t)(UInt32)(_cyclicBufferPos < delta ? cbs : 0)) << 1);
#if 0
*(UInt64 *)(void *)dest = *((const UInt64 *)(const void *)src);
#else
const UInt32 p0 = src[0];
const UInt32 p1 = src[1];
dest[0] = p0;
dest[1] = p1;
#endif
}
hash++;
pos++;
cur++;
pb++;
if (hash == size || *hash != delta || pb[lenLimit] != cur[lenLimit] || d >= limit)
break;
}
}
#endif
break;
}
}
}
{
const UInt32 curMatch = (UInt32)pos - delta;
if (pb[len] < cur[len])
{
delta = pair[1];
*ptr1 = curMatch;
ptr1 = pair + 1;
len1 = len;
}
else
{
delta = *pair;
*ptr0 = curMatch;
ptr0 = pair;
len0 = len;
}
{
if (delta >= curMatch)
return NULL;
delta = (UInt32)pos - delta;
if (delta >= cbs
// delta >= _cyclicBufferSize || delta >= pos
|| --cutValue == 0)
{
*ptr0 = *ptr1 = kEmptyHashValue;
_distances[-1] = (UInt32)(d - _distances);
break;
}
}
}
}
} // for (tree iterations)
}
pos++;
_cyclicBufferPos++;
cur++;
}
while (d < limit);
*posRes = (UInt32)pos;
return d;
}
*/

66
extern/lzma/LzHash.h vendored
View File

@@ -1,34 +1,54 @@
/* LzHash.h -- HASH constants for LZ algorithms
2023-03-05 : Igor Pavlov : Public domain */
/* LzHash.h -- HASH functions for LZ algorithms
2008-10-04 : Igor Pavlov : Public domain */
#ifndef ZIP7_INC_LZ_HASH_H
#define ZIP7_INC_LZ_HASH_H
/*
(kHash2Size >= (1 << 8)) : Required
(kHash3Size >= (1 << 16)) : Required
*/
#ifndef __LZHASH_H
#define __LZHASH_H
#define kHash2Size (1 << 10)
#define kHash3Size (1 << 16)
// #define kHash4Size (1 << 20)
#define kHash4Size (1 << 20)
#define kFix3HashSize (kHash2Size)
#define kFix4HashSize (kHash2Size + kHash3Size)
// #define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size)
#define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size)
/*
We use up to 3 crc values for hash:
crc0
crc1 << Shift_1
crc2 << Shift_2
(Shift_1 = 5) and (Shift_2 = 10) is good tradeoff.
Small values for Shift are not good for collision rate.
Big value for Shift_2 increases the minimum size
of hash table, that will be slow for small files.
*/
#define HASH2_CALC hashValue = cur[0] | ((UInt32)cur[1] << 8);
#define kLzHash_CrcShift_1 5
#define kLzHash_CrcShift_2 10
#define HASH3_CALC { \
UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
hash2Value = temp & (kHash2Size - 1); \
hashValue = (temp ^ ((UInt32)cur[2] << 8)) & p->hashMask; }
#define HASH4_CALC { \
UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
hash2Value = temp & (kHash2Size - 1); \
hash3Value = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); \
hashValue = (temp ^ ((UInt32)cur[2] << 8) ^ (p->crc[cur[3]] << 5)) & p->hashMask; }
#define HASH5_CALC { \
UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
hash2Value = temp & (kHash2Size - 1); \
hash3Value = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); \
hash4Value = (temp ^ ((UInt32)cur[2] << 8) ^ (p->crc[cur[3]] << 5)); \
hashValue = (hash4Value ^ (p->crc[cur[4]] << 3)) & p->hashMask; \
hash4Value &= (kHash4Size - 1); }
/* #define HASH_ZIP_CALC hashValue = ((cur[0] | ((UInt32)cur[1] << 8)) ^ p->crc[cur[2]]) & 0xFFFF; */
#define HASH_ZIP_CALC hashValue = ((cur[2] | ((UInt32)cur[0] << 8)) ^ p->crc[cur[1]]) & 0xFFFF;
#define MT_HASH2_CALC \
hash2Value = (p->crc[cur[0]] ^ cur[1]) & (kHash2Size - 1);
#define MT_HASH3_CALC { \
UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
hash2Value = temp & (kHash2Size - 1); \
hash3Value = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); }
#define MT_HASH4_CALC { \
UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
hash2Value = temp & (kHash2Size - 1); \
hash3Value = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); \
hash4Value = (temp ^ ((UInt32)cur[2] << 8) ^ (p->crc[cur[3]] << 5)) & (kHash4Size - 1); }
#endif

1050
extern/lzma/LzmaDec.c vendored
View File

@@ -1,107 +1,81 @@
/* LzmaDec.c -- LZMA Decoder
2023-04-07 : Igor Pavlov : Public domain */
2008-11-06 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include "LzmaDec.h"
#include <string.h>
/* #include "CpuArch.h" */
#include "LzmaDec.h"
// #define kNumTopBits 24
#define kTopValue ((UInt32)1 << 24)
#define kNumTopBits 24
#define kTopValue ((UInt32)1 << kNumTopBits)
#define kNumBitModelTotalBits 11
#define kBitModelTotal (1 << kNumBitModelTotalBits)
#define kNumMoveBits 5
#define RC_INIT_SIZE 5
#ifndef Z7_LZMA_DEC_OPT
#define kNumMoveBits 5
#define NORMALIZE if (range < kTopValue) { range <<= 8; code = (code << 8) | (*buf++); }
#define IF_BIT_0(p) ttt = *(p); NORMALIZE; bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound)
#define IF_BIT_0(p) ttt = *(p); NORMALIZE; bound = (range >> kNumBitModelTotalBits) * ttt; if (code < bound)
#define UPDATE_0(p) range = bound; *(p) = (CLzmaProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits));
#define UPDATE_1(p) range -= bound; code -= bound; *(p) = (CLzmaProb)(ttt - (ttt >> kNumMoveBits));
#define GET_BIT2(p, i, A0, A1) IF_BIT_0(p) \
{ UPDATE_0(p) i = (i + i); A0; } else \
{ UPDATE_1(p) i = (i + i) + 1; A1; }
#define TREE_GET_BIT(probs, i) { GET_BIT2(probs + i, i, ;, ;); }
#define REV_BIT(p, i, A0, A1) IF_BIT_0(p + i) \
{ UPDATE_0(p + i) A0; } else \
{ UPDATE_1(p + i) A1; }
#define REV_BIT_VAR( p, i, m) REV_BIT(p, i, i += m; m += m, m += m; i += m; )
#define REV_BIT_CONST(p, i, m) REV_BIT(p, i, i += m; , i += m * 2; )
#define REV_BIT_LAST( p, i, m) REV_BIT(p, i, i -= m , ; )
{ UPDATE_0(p); i = (i + i); A0; } else \
{ UPDATE_1(p); i = (i + i) + 1; A1; }
#define GET_BIT(p, i) GET_BIT2(p, i, ; , ;)
#define TREE_GET_BIT(probs, i) { GET_BIT((probs + i), i); }
#define TREE_DECODE(probs, limit, i) \
{ i = 1; do { TREE_GET_BIT(probs, i); } while (i < limit); i -= limit; }
/* #define Z7_LZMA_SIZE_OPT */
/* #define _LZMA_SIZE_OPT */
#ifdef Z7_LZMA_SIZE_OPT
#ifdef _LZMA_SIZE_OPT
#define TREE_6_DECODE(probs, i) TREE_DECODE(probs, (1 << 6), i)
#else
#define TREE_6_DECODE(probs, i) \
{ i = 1; \
TREE_GET_BIT(probs, i) \
TREE_GET_BIT(probs, i) \
TREE_GET_BIT(probs, i) \
TREE_GET_BIT(probs, i) \
TREE_GET_BIT(probs, i) \
TREE_GET_BIT(probs, i) \
TREE_GET_BIT(probs, i); \
TREE_GET_BIT(probs, i); \
TREE_GET_BIT(probs, i); \
TREE_GET_BIT(probs, i); \
TREE_GET_BIT(probs, i); \
TREE_GET_BIT(probs, i); \
i -= 0x40; }
#endif
#define NORMAL_LITER_DEC TREE_GET_BIT(prob, symbol)
#define MATCHED_LITER_DEC \
matchByte += matchByte; \
bit = offs; \
offs &= matchByte; \
probLit = prob + (offs + bit + symbol); \
GET_BIT2(probLit, symbol, offs ^= bit; , ;)
#define NORMALIZE_CHECK if (range < kTopValue) { if (buf >= bufLimit) return DUMMY_ERROR; range <<= 8; code = (code << 8) | (*buf++); }
#endif // Z7_LZMA_DEC_OPT
#define NORMALIZE_CHECK if (range < kTopValue) { if (buf >= bufLimit) return DUMMY_INPUT_EOF; range <<= 8; code = (code << 8) | (*buf++); }
#define IF_BIT_0_CHECK(p) ttt = *(p); NORMALIZE_CHECK bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound)
#define IF_BIT_0_CHECK(p) ttt = *(p); NORMALIZE_CHECK; bound = (range >> kNumBitModelTotalBits) * ttt; if (code < bound)
#define UPDATE_0_CHECK range = bound;
#define UPDATE_1_CHECK range -= bound; code -= bound;
#define GET_BIT2_CHECK(p, i, A0, A1) IF_BIT_0_CHECK(p) \
{ UPDATE_0_CHECK i = (i + i); A0; } else \
{ UPDATE_1_CHECK i = (i + i) + 1; A1; }
{ UPDATE_0_CHECK; i = (i + i); A0; } else \
{ UPDATE_1_CHECK; i = (i + i) + 1; A1; }
#define GET_BIT_CHECK(p, i) GET_BIT2_CHECK(p, i, ; , ;)
#define TREE_DECODE_CHECK(probs, limit, i) \
{ i = 1; do { GET_BIT_CHECK(probs + i, i) } while (i < limit); i -= limit; }
#define REV_BIT_CHECK(p, i, m) IF_BIT_0_CHECK(p + i) \
{ UPDATE_0_CHECK i += m; m += m; } else \
{ UPDATE_1_CHECK m += m; i += m; }
#define kNumPosBitsMax 4
#define kNumPosStatesMax (1 << kNumPosBitsMax)
#define kLenNumLowBits 3
#define kLenNumLowSymbols (1 << kLenNumLowBits)
#define kLenNumMidBits 3
#define kLenNumMidSymbols (1 << kLenNumMidBits)
#define kLenNumHighBits 8
#define kLenNumHighSymbols (1 << kLenNumHighBits)
#define LenLow 0
#define LenHigh (LenLow + 2 * (kNumPosStatesMax << kLenNumLowBits))
#define LenChoice 0
#define LenChoice2 (LenChoice + 1)
#define LenLow (LenChoice2 + 1)
#define LenMid (LenLow + (kNumPosStatesMax << kLenNumLowBits))
#define LenHigh (LenMid + (kNumPosStatesMax << kLenNumMidBits))
#define kNumLenProbs (LenHigh + kLenNumHighSymbols)
#define LenChoice LenLow
#define LenChoice2 (LenLow + (1 << kLenNumLowBits))
#define kNumStates 12
#define kNumStates2 16
#define kNumLitStates 7
#define kStartPosModelIndex 4
@@ -115,130 +89,60 @@
#define kAlignTableSize (1 << kNumAlignBits)
#define kMatchMinLen 2
#define kMatchSpecLenStart (kMatchMinLen + kLenNumLowSymbols * 2 + kLenNumHighSymbols)
#define kMatchSpecLenStart (kMatchMinLen + kLenNumLowSymbols + kLenNumMidSymbols + kLenNumHighSymbols)
#define kMatchSpecLen_Error_Data (1 << 9)
#define kMatchSpecLen_Error_Fail (kMatchSpecLen_Error_Data - 1)
/* External ASM code needs same CLzmaProb array layout. So don't change it. */
/* (probs_1664) is faster and better for code size at some platforms */
/*
#ifdef MY_CPU_X86_OR_AMD64
*/
#define kStartOffset 1664
#define GET_PROBS p->probs_1664
/*
#define GET_PROBS p->probs + kStartOffset
#else
#define kStartOffset 0
#define GET_PROBS p->probs
#endif
*/
#define SpecPos (-kStartOffset)
#define IsRep0Long (SpecPos + kNumFullDistances)
#define RepLenCoder (IsRep0Long + (kNumStates2 << kNumPosBitsMax))
#define LenCoder (RepLenCoder + kNumLenProbs)
#define IsMatch (LenCoder + kNumLenProbs)
#define Align (IsMatch + (kNumStates2 << kNumPosBitsMax))
#define IsRep (Align + kAlignTableSize)
#define IsMatch 0
#define IsRep (IsMatch + (kNumStates << kNumPosBitsMax))
#define IsRepG0 (IsRep + kNumStates)
#define IsRepG1 (IsRepG0 + kNumStates)
#define IsRepG2 (IsRepG1 + kNumStates)
#define PosSlot (IsRepG2 + kNumStates)
#define Literal (PosSlot + (kNumLenToPosStates << kNumPosSlotBits))
#define NUM_BASE_PROBS (Literal + kStartOffset)
#define IsRep0Long (IsRepG2 + kNumStates)
#define PosSlot (IsRep0Long + (kNumStates << kNumPosBitsMax))
#define SpecPos (PosSlot + (kNumLenToPosStates << kNumPosSlotBits))
#define Align (SpecPos + kNumFullDistances - kEndPosModelIndex)
#define LenCoder (Align + kAlignTableSize)
#define RepLenCoder (LenCoder + kNumLenProbs)
#define Literal (RepLenCoder + kNumLenProbs)
#if Align != 0 && kStartOffset != 0
#error Stop_Compiling_Bad_LZMA_kAlign
#define LZMA_BASE_SIZE 1846
#define LZMA_LIT_SIZE 768
#define LzmaProps_GetNumProbs(p) ((UInt32)LZMA_BASE_SIZE + (LZMA_LIT_SIZE << ((p)->lc + (p)->lp)))
#if Literal != LZMA_BASE_SIZE
StopCompilingDueBUG
#endif
#if NUM_BASE_PROBS != 1984
#error Stop_Compiling_Bad_LZMA_PROBS
#endif
#define LZMA_LIT_SIZE 0x300
#define LzmaProps_GetNumProbs(p) (NUM_BASE_PROBS + ((UInt32)LZMA_LIT_SIZE << ((p)->lc + (p)->lp)))
#define CALC_POS_STATE(processedPos, pbMask) (((processedPos) & (pbMask)) << 4)
#define COMBINED_PS_STATE (posState + state)
#define GET_LEN_STATE (posState)
static const Byte kLiteralNextStates[kNumStates * 2] =
{
0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5,
7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10
};
#define LZMA_DIC_MIN (1 << 12)
/*
p->remainLen : shows status of LZMA decoder:
< kMatchSpecLenStart : the number of bytes to be copied with (p->rep0) offset
= kMatchSpecLenStart : the LZMA stream was finished with end mark
= kMatchSpecLenStart + 1 : need init range coder
= kMatchSpecLenStart + 2 : need init range coder and state
= kMatchSpecLen_Error_Fail : Internal Code Failure
= kMatchSpecLen_Error_Data + [0 ... 273] : LZMA Data Error
*/
/* ---------- LZMA_DECODE_REAL ---------- */
/*
LzmaDec_DecodeReal_3() can be implemented in external ASM file.
3 - is the code compatibility version of that function for check at link time.
*/
#define LZMA_DECODE_REAL LzmaDec_DecodeReal_3
/*
LZMA_DECODE_REAL()
In:
RangeCoder is normalized
if (p->dicPos == limit)
{
LzmaDec_TryDummy() was called before to exclude LITERAL and MATCH-REP cases.
So first symbol can be only MATCH-NON-REP. And if that MATCH-NON-REP symbol
is not END_OF_PAYALOAD_MARKER, then the function doesn't write any byte to dictionary,
the function returns SZ_OK, and the caller can use (p->remainLen) and (p->reps[0]) later.
}
Processing:
The first LZMA symbol will be decoded in any case.
All main checks for limits are at the end of main loop,
It decodes additional LZMA-symbols while (p->buf < bufLimit && dicPos < limit),
RangeCoder is still without last normalization when (p->buf < bufLimit) is being checked.
But if (p->buf < bufLimit), the caller provided at least (LZMA_REQUIRED_INPUT_MAX + 1) bytes for
next iteration before limit (bufLimit + LZMA_REQUIRED_INPUT_MAX),
that is enough for worst case LZMA symbol with one additional RangeCoder normalization for one bit.
So that function never reads bufLimit [LZMA_REQUIRED_INPUT_MAX] byte.
/* First LZMA-symbol is always decoded.
And it decodes new LZMA-symbols while (buf < bufLimit), but "buf" is without last normalization
Out:
RangeCoder is normalized
Result:
SZ_OK - OK
p->remainLen:
< kMatchSpecLenStart : the number of bytes to be copied with (p->reps[0]) offset
= kMatchSpecLenStart : the LZMA stream was finished with end mark
SZ_ERROR_DATA - error, when the MATCH-Symbol refers out of dictionary
p->remainLen : undefined
p->reps[*] : undefined
SZ_ERROR_DATA - Error
p->remainLen:
< kMatchSpecLenStart : normal remain
= kMatchSpecLenStart : finished
= kMatchSpecLenStart + 1 : Flush marker
= kMatchSpecLenStart + 2 : State Init Marker
*/
#ifdef Z7_LZMA_DEC_OPT
int Z7_FASTCALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit);
#else
static
int Z7_FASTCALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit)
static int MY_FAST_CALL LzmaDec_DecodeReal(CLzmaDec *p, SizeT limit, const Byte *bufLimit)
{
CLzmaProb *probs = GET_PROBS;
unsigned state = (unsigned)p->state;
CLzmaProb *probs = p->probs;
unsigned state = p->state;
UInt32 rep0 = p->reps[0], rep1 = p->reps[1], rep2 = p->reps[2], rep3 = p->reps[3];
unsigned pbMask = ((unsigned)1 << (p->prop.pb)) - 1;
unsigned lpMask = ((unsigned)1 << (p->prop.lp)) - 1;
unsigned lc = p->prop.lc;
unsigned lpMask = ((unsigned)0x100 << p->prop.lp) - ((unsigned)0x100 >> lc);
Byte *dic = p->dic;
SizeT dicBufSize = p->dicBufSize;
@@ -257,126 +161,99 @@ int Z7_FASTCALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit)
CLzmaProb *prob;
UInt32 bound;
unsigned ttt;
unsigned posState = CALC_POS_STATE(processedPos, pbMask);
unsigned posState = processedPos & pbMask;
prob = probs + IsMatch + COMBINED_PS_STATE;
prob = probs + IsMatch + (state << kNumPosBitsMax) + posState;
IF_BIT_0(prob)
{
unsigned symbol;
UPDATE_0(prob)
UPDATE_0(prob);
prob = probs + Literal;
if (processedPos != 0 || checkDicSize != 0)
prob += (UInt32)3 * ((((processedPos << 8) + dic[(dicPos == 0 ? dicBufSize : dicPos) - 1]) & lpMask) << lc);
processedPos++;
if (checkDicSize != 0 || processedPos != 0)
prob += (LZMA_LIT_SIZE * (((processedPos & lpMask) << lc) +
(dic[(dicPos == 0 ? dicBufSize : dicPos) - 1] >> (8 - lc))));
if (state < kNumLitStates)
{
state -= (state < 4) ? state : 3;
symbol = 1;
#ifdef Z7_LZMA_SIZE_OPT
do { NORMAL_LITER_DEC } while (symbol < 0x100);
#else
NORMAL_LITER_DEC
NORMAL_LITER_DEC
NORMAL_LITER_DEC
NORMAL_LITER_DEC
NORMAL_LITER_DEC
NORMAL_LITER_DEC
NORMAL_LITER_DEC
NORMAL_LITER_DEC
#endif
do { GET_BIT(prob + symbol, symbol) } while (symbol < 0x100);
}
else
{
unsigned matchByte = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)];
unsigned matchByte = p->dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)];
unsigned offs = 0x100;
state -= (state < 10) ? 3 : 6;
symbol = 1;
#ifdef Z7_LZMA_SIZE_OPT
do
{
unsigned bit;
CLzmaProb *probLit;
MATCHED_LITER_DEC
matchByte <<= 1;
bit = (matchByte & offs);
probLit = prob + offs + bit + symbol;
GET_BIT2(probLit, symbol, offs &= ~bit, offs &= bit)
}
while (symbol < 0x100);
#else
{
unsigned bit;
CLzmaProb *probLit;
MATCHED_LITER_DEC
MATCHED_LITER_DEC
MATCHED_LITER_DEC
MATCHED_LITER_DEC
MATCHED_LITER_DEC
MATCHED_LITER_DEC
MATCHED_LITER_DEC
MATCHED_LITER_DEC
}
#endif
}
dic[dicPos++] = (Byte)symbol;
processedPos++;
state = kLiteralNextStates[state];
/* if (state < 4) state = 0; else if (state < 10) state -= 3; else state -= 6; */
continue;
}
else
{
UPDATE_1(prob)
UPDATE_1(prob);
prob = probs + IsRep + state;
IF_BIT_0(prob)
{
UPDATE_0(prob)
UPDATE_0(prob);
state += kNumStates;
prob = probs + LenCoder;
}
else
{
UPDATE_1(prob)
UPDATE_1(prob);
if (checkDicSize == 0 && processedPos == 0)
return SZ_ERROR_DATA;
prob = probs + IsRepG0 + state;
IF_BIT_0(prob)
{
UPDATE_0(prob)
prob = probs + IsRep0Long + COMBINED_PS_STATE;
UPDATE_0(prob);
prob = probs + IsRep0Long + (state << kNumPosBitsMax) + posState;
IF_BIT_0(prob)
{
UPDATE_0(prob)
// that case was checked before with kBadRepCode
// if (checkDicSize == 0 && processedPos == 0) { len = kMatchSpecLen_Error_Data + 1; break; }
// The caller doesn't allow (dicPos == limit) case here
// so we don't need the following check:
// if (dicPos == limit) { state = state < kNumLitStates ? 9 : 11; len = 1; break; }
dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)];
UPDATE_0(prob);
dic[dicPos] = dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)];
dicPos++;
processedPos++;
state = state < kNumLitStates ? 9 : 11;
continue;
}
UPDATE_1(prob)
UPDATE_1(prob);
}
else
{
UInt32 distance;
UPDATE_1(prob)
UPDATE_1(prob);
prob = probs + IsRepG1 + state;
IF_BIT_0(prob)
{
UPDATE_0(prob)
UPDATE_0(prob);
distance = rep1;
}
else
{
UPDATE_1(prob)
UPDATE_1(prob);
prob = probs + IsRepG2 + state;
IF_BIT_0(prob)
{
UPDATE_0(prob)
UPDATE_0(prob);
distance = rep2;
}
else
{
UPDATE_1(prob)
UPDATE_1(prob);
distance = rep3;
rep3 = rep2;
}
@@ -388,101 +265,63 @@ int Z7_FASTCALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit)
state = state < kNumLitStates ? 8 : 11;
prob = probs + RepLenCoder;
}
#ifdef Z7_LZMA_SIZE_OPT
{
unsigned lim, offset;
unsigned limit, offset;
CLzmaProb *probLen = prob + LenChoice;
IF_BIT_0(probLen)
{
UPDATE_0(probLen)
probLen = prob + LenLow + GET_LEN_STATE;
UPDATE_0(probLen);
probLen = prob + LenLow + (posState << kLenNumLowBits);
offset = 0;
lim = (1 << kLenNumLowBits);
limit = (1 << kLenNumLowBits);
}
else
{
UPDATE_1(probLen)
UPDATE_1(probLen);
probLen = prob + LenChoice2;
IF_BIT_0(probLen)
{
UPDATE_0(probLen)
probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits);
UPDATE_0(probLen);
probLen = prob + LenMid + (posState << kLenNumMidBits);
offset = kLenNumLowSymbols;
lim = (1 << kLenNumLowBits);
limit = (1 << kLenNumMidBits);
}
else
{
UPDATE_1(probLen)
UPDATE_1(probLen);
probLen = prob + LenHigh;
offset = kLenNumLowSymbols * 2;
lim = (1 << kLenNumHighBits);
offset = kLenNumLowSymbols + kLenNumMidSymbols;
limit = (1 << kLenNumHighBits);
}
}
TREE_DECODE(probLen, lim, len)
TREE_DECODE(probLen, limit, len);
len += offset;
}
#else
{
CLzmaProb *probLen = prob + LenChoice;
IF_BIT_0(probLen)
{
UPDATE_0(probLen)
probLen = prob + LenLow + GET_LEN_STATE;
len = 1;
TREE_GET_BIT(probLen, len)
TREE_GET_BIT(probLen, len)
TREE_GET_BIT(probLen, len)
len -= 8;
}
else
{
UPDATE_1(probLen)
probLen = prob + LenChoice2;
IF_BIT_0(probLen)
{
UPDATE_0(probLen)
probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits);
len = 1;
TREE_GET_BIT(probLen, len)
TREE_GET_BIT(probLen, len)
TREE_GET_BIT(probLen, len)
}
else
{
UPDATE_1(probLen)
probLen = prob + LenHigh;
TREE_DECODE(probLen, (1 << kLenNumHighBits), len)
len += kLenNumLowSymbols * 2;
}
}
}
#endif
if (state >= kNumStates)
{
UInt32 distance;
prob = probs + PosSlot +
((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits);
TREE_6_DECODE(prob, distance)
TREE_6_DECODE(prob, distance);
if (distance >= kStartPosModelIndex)
{
unsigned posSlot = (unsigned)distance;
unsigned numDirectBits = (unsigned)(((distance >> 1) - 1));
int numDirectBits = (int)(((distance >> 1) - 1));
distance = (2 | (distance & 1));
if (posSlot < kEndPosModelIndex)
{
distance <<= numDirectBits;
prob = probs + SpecPos;
prob = probs + SpecPos + distance - posSlot - 1;
{
UInt32 m = 1;
distance++;
UInt32 mask = 1;
unsigned i = 1;
do
{
REV_BIT_VAR(prob, distance, m)
GET_BIT2(prob + i, i, ; , distance |= mask);
mask <<= 1;
}
while (--numDirectBits);
distance -= m;
while (--numDirectBits != 0);
}
}
else
@@ -509,70 +348,57 @@ int Z7_FASTCALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit)
}
*/
}
while (--numDirectBits);
while (--numDirectBits != 0);
prob = probs + Align;
distance <<= kNumAlignBits;
{
unsigned i = 1;
REV_BIT_CONST(prob, i, 1)
REV_BIT_CONST(prob, i, 2)
REV_BIT_CONST(prob, i, 4)
REV_BIT_LAST (prob, i, 8)
distance |= i;
GET_BIT2(prob + i, i, ; , distance |= 1);
GET_BIT2(prob + i, i, ; , distance |= 2);
GET_BIT2(prob + i, i, ; , distance |= 4);
GET_BIT2(prob + i, i, ; , distance |= 8);
}
if (distance == (UInt32)0xFFFFFFFF)
{
len = kMatchSpecLenStart;
len += kMatchSpecLenStart;
state -= kNumStates;
break;
}
}
}
rep3 = rep2;
rep2 = rep1;
rep1 = rep0;
rep0 = distance + 1;
state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3;
if (distance >= (checkDicSize == 0 ? processedPos: checkDicSize))
if (checkDicSize == 0)
{
len += kMatchSpecLen_Error_Data + kMatchMinLen;
// len = kMatchSpecLen_Error_Data;
// len += kMatchMinLen;
break;
if (distance >= processedPos)
return SZ_ERROR_DATA;
}
else if (distance >= checkDicSize)
return SZ_ERROR_DATA;
state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3;
/* state = kLiteralNextStates[state]; */
}
len += kMatchMinLen;
if (limit == dicPos)
return SZ_ERROR_DATA;
{
SizeT rem;
unsigned curLen;
SizeT pos;
if ((rem = limit - dicPos) == 0)
{
/*
We stop decoding and return SZ_OK, and we can resume decoding later.
Any error conditions can be tested later in caller code.
For more strict mode we can stop decoding with error
// len += kMatchSpecLen_Error_Data;
*/
break;
}
curLen = ((rem < len) ? (unsigned)rem : len);
pos = dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0);
SizeT rem = limit - dicPos;
unsigned curLen = ((rem < len) ? (unsigned)rem : len);
SizeT pos = (dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0);
processedPos += (UInt32)curLen;
processedPos += curLen;
len -= curLen;
if (curLen <= dicBufSize - pos)
if (pos + curLen <= dicBufSize)
{
Byte *dest = dic + dicPos;
ptrdiff_t src = (ptrdiff_t)pos - (ptrdiff_t)dicPos;
const Byte *lim = dest + curLen;
dicPos += (SizeT)curLen;
dicPos += curLen;
do
*(dest) = (Byte)*(dest + src);
while (++dest != lim);
@@ -591,153 +417,108 @@ int Z7_FASTCALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit)
}
}
while (dicPos < limit && buf < bufLimit);
NORMALIZE
NORMALIZE;
p->buf = buf;
p->range = range;
p->code = code;
p->remainLen = (UInt32)len; // & (kMatchSpecLen_Error_Data - 1); // we can write real length for error matches too.
p->remainLen = len;
p->dicPos = dicPos;
p->processedPos = processedPos;
p->reps[0] = rep0;
p->reps[1] = rep1;
p->reps[2] = rep2;
p->reps[3] = rep3;
p->state = (UInt32)state;
if (len >= kMatchSpecLen_Error_Data)
return SZ_ERROR_DATA;
p->state = state;
return SZ_OK;
}
#endif
static void Z7_FASTCALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit)
static void MY_FAST_CALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit)
{
unsigned len = (unsigned)p->remainLen;
if (len == 0 /* || len >= kMatchSpecLenStart */)
return;
if (p->remainLen != 0 && p->remainLen < kMatchSpecLenStart)
{
Byte *dic = p->dic;
SizeT dicPos = p->dicPos;
Byte *dic;
SizeT dicBufSize;
SizeT rep0; /* we use SizeT to avoid the BUG of VC14 for AMD64 */
{
SizeT rem = limit - dicPos;
if (rem < len)
{
len = (unsigned)(rem);
if (len == 0)
return;
}
}
SizeT dicBufSize = p->dicBufSize;
unsigned len = p->remainLen;
UInt32 rep0 = p->reps[0];
if (limit - dicPos < len)
len = (unsigned)(limit - dicPos);
if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= len)
p->checkDicSize = p->prop.dicSize;
p->processedPos += (UInt32)len;
p->remainLen -= (UInt32)len;
dic = p->dic;
rep0 = p->reps[0];
dicBufSize = p->dicBufSize;
do
p->processedPos += len;
p->remainLen -= len;
while (len-- != 0)
{
dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)];
dic[dicPos] = dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)];
dicPos++;
}
while (--len);
p->dicPos = dicPos;
}
}
/*
At staring of new stream we have one of the following symbols:
- Literal - is allowed
- Non-Rep-Match - is allowed only if it's end marker symbol
- Rep-Match - is not allowed
We use early check of (RangeCoder:Code) over kBadRepCode to simplify main decoding code
*/
#define kRange0 0xFFFFFFFF
#define kBound0 ((kRange0 >> kNumBitModelTotalBits) << (kNumBitModelTotalBits - 1))
#define kBadRepCode (kBound0 + (((kRange0 - kBound0) >> kNumBitModelTotalBits) << (kNumBitModelTotalBits - 1)))
#if kBadRepCode != (0xC0000000 - 0x400)
#error Stop_Compiling_Bad_LZMA_Check
#endif
/*
LzmaDec_DecodeReal2():
It calls LZMA_DECODE_REAL() and it adjusts limit according (p->checkDicSize).
We correct (p->checkDicSize) after LZMA_DECODE_REAL() and in LzmaDec_WriteRem(),
and we support the following state of (p->checkDicSize):
if (total_processed < p->prop.dicSize) then
{
(total_processed == p->processedPos)
(p->checkDicSize == 0)
}
else
(p->checkDicSize == p->prop.dicSize)
*/
static int Z7_FASTCALL LzmaDec_DecodeReal2(CLzmaDec *p, SizeT limit, const Byte *bufLimit)
static int MY_FAST_CALL LzmaDec_DecodeReal2(CLzmaDec *p, SizeT limit, const Byte *bufLimit)
{
if (p->checkDicSize == 0)
do
{
UInt32 rem = p->prop.dicSize - p->processedPos;
if (limit - p->dicPos > rem)
limit = p->dicPos + rem;
}
{
int res = LZMA_DECODE_REAL(p, limit, bufLimit);
if (p->checkDicSize == 0 && p->processedPos >= p->prop.dicSize)
SizeT limit2 = limit;
if (p->checkDicSize == 0)
{
UInt32 rem = p->prop.dicSize - p->processedPos;
if (limit - p->dicPos > rem)
limit2 = p->dicPos + rem;
}
RINOK(LzmaDec_DecodeReal(p, limit2, bufLimit));
if (p->processedPos >= p->prop.dicSize)
p->checkDicSize = p->prop.dicSize;
return res;
LzmaDec_WriteRem(p, limit);
}
while (p->dicPos < limit && p->buf < bufLimit && p->remainLen < kMatchSpecLenStart);
if (p->remainLen > kMatchSpecLenStart)
{
p->remainLen = kMatchSpecLenStart;
}
return 0;
}
typedef enum
{
DUMMY_INPUT_EOF, /* need more input data */
DUMMY_ERROR, /* unexpected end of input stream */
DUMMY_LIT,
DUMMY_MATCH,
DUMMY_REP
} ELzmaDummy;
#define IS_DUMMY_END_MARKER_POSSIBLE(dummyRes) ((dummyRes) == DUMMY_MATCH)
static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byte **bufOut)
static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inSize)
{
UInt32 range = p->range;
UInt32 code = p->code;
const Byte *bufLimit = *bufOut;
const CLzmaProb *probs = GET_PROBS;
unsigned state = (unsigned)p->state;
const Byte *bufLimit = buf + inSize;
CLzmaProb *probs = p->probs;
unsigned state = p->state;
ELzmaDummy res;
for (;;)
{
const CLzmaProb *prob;
CLzmaProb *prob;
UInt32 bound;
unsigned ttt;
unsigned posState = CALC_POS_STATE(p->processedPos, ((unsigned)1 << p->prop.pb) - 1);
unsigned posState = (p->processedPos) & ((1 << p->prop.pb) - 1);
prob = probs + IsMatch + COMBINED_PS_STATE;
prob = probs + IsMatch + (state << kNumPosBitsMax) + posState;
IF_BIT_0_CHECK(prob)
{
UPDATE_0_CHECK
/* if (bufLimit - buf >= 7) return DUMMY_LIT; */
prob = probs + Literal;
if (p->checkDicSize != 0 || p->processedPos != 0)
prob += ((UInt32)LZMA_LIT_SIZE *
((((p->processedPos) & (((unsigned)1 << (p->prop.lp)) - 1)) << p->prop.lc) +
((unsigned)p->dic[(p->dicPos == 0 ? p->dicBufSize : p->dicPos) - 1] >> (8 - p->prop.lc))));
prob += (LZMA_LIT_SIZE *
((((p->processedPos) & ((1 << (p->prop.lp)) - 1)) << p->prop.lc) +
(p->dic[(p->dicPos == 0 ? p->dicBufSize : p->dicPos) - 1] >> (8 - p->prop.lc))));
if (state < kNumLitStates)
{
@@ -747,18 +528,17 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byt
else
{
unsigned matchByte = p->dic[p->dicPos - p->reps[0] +
(p->dicPos < p->reps[0] ? p->dicBufSize : 0)];
((p->dicPos < p->reps[0]) ? p->dicBufSize : 0)];
unsigned offs = 0x100;
unsigned symbol = 1;
do
{
unsigned bit;
const CLzmaProb *probLit;
matchByte += matchByte;
bit = offs;
offs &= matchByte;
probLit = prob + (offs + bit + symbol);
GET_BIT2_CHECK(probLit, symbol, offs ^= bit; , ; )
CLzmaProb *probLit;
matchByte <<= 1;
bit = (matchByte & offs);
probLit = prob + offs + bit + symbol;
GET_BIT2_CHECK(probLit, symbol, offs &= ~bit, offs &= bit)
}
while (symbol < 0x100);
}
@@ -767,54 +547,55 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byt
else
{
unsigned len;
UPDATE_1_CHECK
UPDATE_1_CHECK;
prob = probs + IsRep + state;
IF_BIT_0_CHECK(prob)
{
UPDATE_0_CHECK
UPDATE_0_CHECK;
state = 0;
prob = probs + LenCoder;
res = DUMMY_MATCH;
}
else
{
UPDATE_1_CHECK
UPDATE_1_CHECK;
res = DUMMY_REP;
prob = probs + IsRepG0 + state;
IF_BIT_0_CHECK(prob)
{
UPDATE_0_CHECK
prob = probs + IsRep0Long + COMBINED_PS_STATE;
UPDATE_0_CHECK;
prob = probs + IsRep0Long + (state << kNumPosBitsMax) + posState;
IF_BIT_0_CHECK(prob)
{
UPDATE_0_CHECK
break;
UPDATE_0_CHECK;
NORMALIZE_CHECK;
return DUMMY_REP;
}
else
{
UPDATE_1_CHECK
UPDATE_1_CHECK;
}
}
else
{
UPDATE_1_CHECK
UPDATE_1_CHECK;
prob = probs + IsRepG1 + state;
IF_BIT_0_CHECK(prob)
{
UPDATE_0_CHECK
UPDATE_0_CHECK;
}
else
{
UPDATE_1_CHECK
UPDATE_1_CHECK;
prob = probs + IsRepG2 + state;
IF_BIT_0_CHECK(prob)
{
UPDATE_0_CHECK
UPDATE_0_CHECK;
}
else
{
UPDATE_1_CHECK
UPDATE_1_CHECK;
}
}
}
@@ -823,34 +604,34 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byt
}
{
unsigned limit, offset;
const CLzmaProb *probLen = prob + LenChoice;
CLzmaProb *probLen = prob + LenChoice;
IF_BIT_0_CHECK(probLen)
{
UPDATE_0_CHECK
probLen = prob + LenLow + GET_LEN_STATE;
UPDATE_0_CHECK;
probLen = prob + LenLow + (posState << kLenNumLowBits);
offset = 0;
limit = 1 << kLenNumLowBits;
}
else
{
UPDATE_1_CHECK
UPDATE_1_CHECK;
probLen = prob + LenChoice2;
IF_BIT_0_CHECK(probLen)
{
UPDATE_0_CHECK
probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits);
UPDATE_0_CHECK;
probLen = prob + LenMid + (posState << kLenNumMidBits);
offset = kLenNumLowSymbols;
limit = 1 << kLenNumLowBits;
limit = 1 << kLenNumMidBits;
}
else
{
UPDATE_1_CHECK
UPDATE_1_CHECK;
probLen = prob + LenHigh;
offset = kLenNumLowSymbols * 2;
offset = kLenNumLowSymbols + kLenNumMidSymbols;
limit = 1 << kLenNumHighBits;
}
}
TREE_DECODE_CHECK(probLen, limit, len)
TREE_DECODE_CHECK(probLen, limit, len);
len += offset;
}
@@ -858,16 +639,18 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byt
{
unsigned posSlot;
prob = probs + PosSlot +
((len < kNumLenToPosStates - 1 ? len : kNumLenToPosStates - 1) <<
((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) <<
kNumPosSlotBits);
TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot)
TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot);
if (posSlot >= kStartPosModelIndex)
{
unsigned numDirectBits = ((posSlot >> 1) - 1);
int numDirectBits = ((posSlot >> 1) - 1);
/* if (bufLimit - buf >= 8) return DUMMY_MATCH; */
if (posSlot < kEndPosModelIndex)
{
prob = probs + SpecPos + ((2 | (posSlot & 1)) << numDirectBits);
prob = probs + SpecPos + ((2 | (posSlot & 1)) << numDirectBits) - posSlot - 1;
}
else
{
@@ -879,44 +662,48 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byt
code -= range & (((code - range) >> 31) - 1);
/* if (code >= range) code -= range; */
}
while (--numDirectBits);
while (--numDirectBits != 0);
prob = probs + Align;
numDirectBits = kNumAlignBits;
}
{
unsigned i = 1;
unsigned m = 1;
do
{
REV_BIT_CHECK(prob, i, m)
GET_BIT_CHECK(prob + i, i);
}
while (--numDirectBits);
while (--numDirectBits != 0);
}
}
}
}
break;
}
NORMALIZE_CHECK
*bufOut = buf;
NORMALIZE_CHECK;
return res;
}
void LzmaDec_InitDicAndState(CLzmaDec *p, BoolInt initDic, BoolInt initState);
void LzmaDec_InitDicAndState(CLzmaDec *p, BoolInt initDic, BoolInt initState)
static void LzmaDec_InitRc(CLzmaDec *p, const Byte *data)
{
p->remainLen = kMatchSpecLenStart + 1;
p->code = ((UInt32)data[1] << 24) | ((UInt32)data[2] << 16) | ((UInt32)data[3] << 8) | ((UInt32)data[4]);
p->range = 0xFFFFFFFF;
p->needFlush = 0;
}
void LzmaDec_InitDicAndState(CLzmaDec *p, Bool initDic, Bool initState)
{
p->needFlush = 1;
p->remainLen = 0;
p->tempBufSize = 0;
if (initDic)
{
p->processedPos = 0;
p->checkDicSize = 0;
p->remainLen = kMatchSpecLenStart + 2;
p->needInitState = 1;
}
if (initState)
p->remainLen = kMatchSpecLenStart + 2;
p->needInitState = 1;
}
void LzmaDec_Init(CLzmaDec *p)
@@ -925,96 +712,48 @@ void LzmaDec_Init(CLzmaDec *p)
LzmaDec_InitDicAndState(p, True, True);
}
/*
LZMA supports optional end_marker.
So the decoder can lookahead for one additional LZMA-Symbol to check end_marker.
That additional LZMA-Symbol can require up to LZMA_REQUIRED_INPUT_MAX bytes in input stream.
When the decoder reaches dicLimit, it looks (finishMode) parameter:
if (finishMode == LZMA_FINISH_ANY), the decoder doesn't lookahead
if (finishMode != LZMA_FINISH_ANY), the decoder lookahead, if end_marker is possible for current position
When the decoder lookahead, and the lookahead symbol is not end_marker, we have two ways:
1) Strict mode (default) : the decoder returns SZ_ERROR_DATA.
2) The relaxed mode (alternative mode) : we could return SZ_OK, and the caller
must check (status) value. The caller can show the error,
if the end of stream is expected, and the (status) is noit
LZMA_STATUS_FINISHED_WITH_MARK or LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK.
*/
#define RETURN_NOT_FINISHED_FOR_FINISH \
*status = LZMA_STATUS_NOT_FINISHED; \
return SZ_ERROR_DATA; // for strict mode
// return SZ_OK; // for relaxed mode
static void LzmaDec_InitStateReal(CLzmaDec *p)
{
UInt32 numProbs = Literal + ((UInt32)LZMA_LIT_SIZE << (p->prop.lc + p->prop.lp));
UInt32 i;
CLzmaProb *probs = p->probs;
for (i = 0; i < numProbs; i++)
probs[i] = kBitModelTotal >> 1;
p->reps[0] = p->reps[1] = p->reps[2] = p->reps[3] = 1;
p->state = 0;
p->needInitState = 0;
}
SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *srcLen,
ELzmaFinishMode finishMode, ELzmaStatus *status)
{
SizeT inSize = *srcLen;
(*srcLen) = 0;
LzmaDec_WriteRem(p, dicLimit);
*status = LZMA_STATUS_NOT_SPECIFIED;
if (p->remainLen > kMatchSpecLenStart)
while (p->remainLen != kMatchSpecLenStart)
{
if (p->remainLen > kMatchSpecLenStart + 2)
return p->remainLen == kMatchSpecLen_Error_Fail ? SZ_ERROR_FAIL : SZ_ERROR_DATA;
int checkEndMarkNow;
for (; inSize > 0 && p->tempBufSize < RC_INIT_SIZE; (*srcLen)++, inSize--)
p->tempBuf[p->tempBufSize++] = *src++;
if (p->tempBufSize != 0 && p->tempBuf[0] != 0)
return SZ_ERROR_DATA;
if (p->tempBufSize < RC_INIT_SIZE)
{
*status = LZMA_STATUS_NEEDS_MORE_INPUT;
return SZ_OK;
}
p->code =
((UInt32)p->tempBuf[1] << 24)
| ((UInt32)p->tempBuf[2] << 16)
| ((UInt32)p->tempBuf[3] << 8)
| ((UInt32)p->tempBuf[4]);
if (p->needFlush != 0)
{
for (; inSize > 0 && p->tempBufSize < RC_INIT_SIZE; (*srcLen)++, inSize--)
p->tempBuf[p->tempBufSize++] = *src++;
if (p->tempBufSize < RC_INIT_SIZE)
{
*status = LZMA_STATUS_NEEDS_MORE_INPUT;
return SZ_OK;
}
if (p->tempBuf[0] != 0)
return SZ_ERROR_DATA;
if (p->checkDicSize == 0
&& p->processedPos == 0
&& p->code >= kBadRepCode)
return SZ_ERROR_DATA;
p->range = 0xFFFFFFFF;
p->tempBufSize = 0;
if (p->remainLen > kMatchSpecLenStart + 1)
{
SizeT numProbs = LzmaProps_GetNumProbs(&p->prop);
SizeT i;
CLzmaProb *probs = p->probs;
for (i = 0; i < numProbs; i++)
probs[i] = kBitModelTotal >> 1;
p->reps[0] = p->reps[1] = p->reps[2] = p->reps[3] = 1;
p->state = 0;
}
p->remainLen = 0;
}
for (;;)
{
if (p->remainLen == kMatchSpecLenStart)
{
if (p->code != 0)
return SZ_ERROR_DATA;
*status = LZMA_STATUS_FINISHED_WITH_MARK;
return SZ_OK;
}
LzmaDec_WriteRem(p, dicLimit);
{
// (p->remainLen == 0 || p->dicPos == dicLimit)
int checkEndMarkNow = 0;
LzmaDec_InitRc(p, p->tempBuf);
p->tempBufSize = 0;
}
checkEndMarkNow = 0;
if (p->dicPos >= dicLimit)
{
if (p->remainLen == 0 && p->code == 0)
@@ -1029,174 +768,83 @@ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *sr
}
if (p->remainLen != 0)
{
RETURN_NOT_FINISHED_FOR_FINISH
*status = LZMA_STATUS_NOT_FINISHED;
return SZ_ERROR_DATA;
}
checkEndMarkNow = 1;
}
// (p->remainLen == 0)
if (p->needInitState)
LzmaDec_InitStateReal(p);
if (p->tempBufSize == 0)
{
SizeT processed;
const Byte *bufLimit;
int dummyProcessed = -1;
if (inSize < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow)
{
const Byte *bufOut = src + inSize;
ELzmaDummy dummyRes = LzmaDec_TryDummy(p, src, &bufOut);
if (dummyRes == DUMMY_INPUT_EOF)
int dummyRes = LzmaDec_TryDummy(p, src, inSize);
if (dummyRes == DUMMY_ERROR)
{
size_t i;
if (inSize >= LZMA_REQUIRED_INPUT_MAX)
break;
(*srcLen) += inSize;
memcpy(p->tempBuf, src, inSize);
p->tempBufSize = (unsigned)inSize;
for (i = 0; i < inSize; i++)
p->tempBuf[i] = src[i];
(*srcLen) += inSize;
*status = LZMA_STATUS_NEEDS_MORE_INPUT;
return SZ_OK;
}
dummyProcessed = (int)(bufOut - src);
if ((unsigned)dummyProcessed > LZMA_REQUIRED_INPUT_MAX)
break;
if (checkEndMarkNow && !IS_DUMMY_END_MARKER_POSSIBLE(dummyRes))
if (checkEndMarkNow && dummyRes != DUMMY_MATCH)
{
unsigned i;
(*srcLen) += (unsigned)dummyProcessed;
p->tempBufSize = (unsigned)dummyProcessed;
for (i = 0; i < (unsigned)dummyProcessed; i++)
p->tempBuf[i] = src[i];
// p->remainLen = kMatchSpecLen_Error_Data;
RETURN_NOT_FINISHED_FOR_FINISH
*status = LZMA_STATUS_NOT_FINISHED;
return SZ_ERROR_DATA;
}
bufLimit = src;
// we will decode only one iteration
}
else
bufLimit = src + inSize - LZMA_REQUIRED_INPUT_MAX;
p->buf = src;
{
int res = LzmaDec_DecodeReal2(p, dicLimit, bufLimit);
SizeT processed = (SizeT)(p->buf - src);
if (dummyProcessed < 0)
{
if (processed > inSize)
break;
}
else if ((unsigned)dummyProcessed != processed)
break;
src += processed;
inSize -= processed;
(*srcLen) += processed;
if (res != SZ_OK)
{
p->remainLen = kMatchSpecLen_Error_Data;
return SZ_ERROR_DATA;
}
}
continue;
if (LzmaDec_DecodeReal2(p, dicLimit, bufLimit) != 0)
return SZ_ERROR_DATA;
processed = (SizeT)(p->buf - src);
(*srcLen) += processed;
src += processed;
inSize -= processed;
}
else
{
// we have some data in (p->tempBuf)
// in strict mode: tempBufSize is not enough for one Symbol decoding.
// in relaxed mode: tempBufSize not larger than required for one Symbol decoding.
unsigned rem = p->tempBufSize;
unsigned ahead = 0;
int dummyProcessed = -1;
while (rem < LZMA_REQUIRED_INPUT_MAX && ahead < inSize)
p->tempBuf[rem++] = src[ahead++];
// ahead - the size of new data copied from (src) to (p->tempBuf)
// rem - the size of temp buffer including new data from (src)
unsigned rem = p->tempBufSize, lookAhead = 0;
while (rem < LZMA_REQUIRED_INPUT_MAX && lookAhead < inSize)
p->tempBuf[rem++] = src[lookAhead++];
p->tempBufSize = rem;
if (rem < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow)
{
const Byte *bufOut = p->tempBuf + rem;
ELzmaDummy dummyRes = LzmaDec_TryDummy(p, p->tempBuf, &bufOut);
if (dummyRes == DUMMY_INPUT_EOF)
int dummyRes = LzmaDec_TryDummy(p, p->tempBuf, rem);
if (dummyRes == DUMMY_ERROR)
{
if (rem >= LZMA_REQUIRED_INPUT_MAX)
break;
p->tempBufSize = rem;
(*srcLen) += (SizeT)ahead;
(*srcLen) += lookAhead;
*status = LZMA_STATUS_NEEDS_MORE_INPUT;
return SZ_OK;
}
dummyProcessed = (int)(bufOut - p->tempBuf);
if ((unsigned)dummyProcessed < p->tempBufSize)
break;
if (checkEndMarkNow && !IS_DUMMY_END_MARKER_POSSIBLE(dummyRes))
if (checkEndMarkNow && dummyRes != DUMMY_MATCH)
{
(*srcLen) += (unsigned)dummyProcessed - p->tempBufSize;
p->tempBufSize = (unsigned)dummyProcessed;
// p->remainLen = kMatchSpecLen_Error_Data;
RETURN_NOT_FINISHED_FOR_FINISH
}
}
p->buf = p->tempBuf;
{
// we decode one symbol from (p->tempBuf) here, so the (bufLimit) is equal to (p->buf)
int res = LzmaDec_DecodeReal2(p, dicLimit, p->buf);
SizeT processed = (SizeT)(p->buf - p->tempBuf);
rem = p->tempBufSize;
if (dummyProcessed < 0)
{
if (processed > LZMA_REQUIRED_INPUT_MAX)
break;
if (processed < rem)
break;
}
else if ((unsigned)dummyProcessed != processed)
break;
processed -= rem;
src += processed;
inSize -= processed;
(*srcLen) += processed;
p->tempBufSize = 0;
if (res != SZ_OK)
{
p->remainLen = kMatchSpecLen_Error_Data;
*status = LZMA_STATUS_NOT_FINISHED;
return SZ_ERROR_DATA;
}
}
p->buf = p->tempBuf;
if (LzmaDec_DecodeReal2(p, dicLimit, p->buf) != 0)
return SZ_ERROR_DATA;
lookAhead -= (rem - (unsigned)(p->buf - p->tempBuf));
(*srcLen) += lookAhead;
src += lookAhead;
inSize -= lookAhead;
p->tempBufSize = 0;
}
}
}
/* Some unexpected error: internal error of code, memory corruption or hardware failure */
p->remainLen = kMatchSpecLen_Error_Fail;
return SZ_ERROR_FAIL;
if (p->code == 0)
*status = LZMA_STATUS_FINISHED_WITH_MARK;
return (p->code == 0) ? SZ_OK : SZ_ERROR_DATA;
}
SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status)
{
SizeT outSize = *destLen;
@@ -1237,19 +885,19 @@ SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, const Byte *sr
}
}
void LzmaDec_FreeProbs(CLzmaDec *p, ISzAllocPtr alloc)
void LzmaDec_FreeProbs(CLzmaDec *p, ISzAlloc *alloc)
{
ISzAlloc_Free(alloc, p->probs);
p->probs = NULL;
alloc->Free(alloc, p->probs);
p->probs = 0;
}
static void LzmaDec_FreeDict(CLzmaDec *p, ISzAllocPtr alloc)
static void LzmaDec_FreeDict(CLzmaDec *p, ISzAlloc *alloc)
{
ISzAlloc_Free(alloc, p->dic);
p->dic = NULL;
alloc->Free(alloc, p->dic);
p->dic = 0;
}
void LzmaDec_Free(CLzmaDec *p, ISzAllocPtr alloc)
void LzmaDec_Free(CLzmaDec *p, ISzAlloc *alloc)
{
LzmaDec_FreeProbs(p, alloc);
LzmaDec_FreeDict(p, alloc);
@@ -1273,60 +921,49 @@ SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size)
if (d >= (9 * 5 * 5))
return SZ_ERROR_UNSUPPORTED;
p->lc = (Byte)(d % 9);
p->lc = d % 9;
d /= 9;
p->pb = (Byte)(d / 5);
p->lp = (Byte)(d % 5);
p->pb = d / 5;
p->lp = d % 5;
return SZ_OK;
}
static SRes LzmaDec_AllocateProbs2(CLzmaDec *p, const CLzmaProps *propNew, ISzAllocPtr alloc)
static SRes LzmaDec_AllocateProbs2(CLzmaDec *p, const CLzmaProps *propNew, ISzAlloc *alloc)
{
UInt32 numProbs = LzmaProps_GetNumProbs(propNew);
if (!p->probs || numProbs != p->numProbs)
if (p->probs == 0 || numProbs != p->numProbs)
{
LzmaDec_FreeProbs(p, alloc);
p->probs = (CLzmaProb *)ISzAlloc_Alloc(alloc, numProbs * sizeof(CLzmaProb));
if (!p->probs)
return SZ_ERROR_MEM;
p->probs_1664 = p->probs + 1664;
p->probs = (CLzmaProb *)alloc->Alloc(alloc, numProbs * sizeof(CLzmaProb));
p->numProbs = numProbs;
if (p->probs == 0)
return SZ_ERROR_MEM;
}
return SZ_OK;
}
SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc)
SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAlloc *alloc)
{
CLzmaProps propNew;
RINOK(LzmaProps_Decode(&propNew, props, propsSize))
RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc))
RINOK(LzmaProps_Decode(&propNew, props, propsSize));
RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc));
p->prop = propNew;
return SZ_OK;
}
SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc)
SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAlloc *alloc)
{
CLzmaProps propNew;
SizeT dicBufSize;
RINOK(LzmaProps_Decode(&propNew, props, propsSize))
RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc))
{
UInt32 dictSize = propNew.dicSize;
SizeT mask = ((UInt32)1 << 12) - 1;
if (dictSize >= ((UInt32)1 << 30)) mask = ((UInt32)1 << 22) - 1;
else if (dictSize >= ((UInt32)1 << 22)) mask = ((UInt32)1 << 20) - 1;
dicBufSize = ((SizeT)dictSize + mask) & ~mask;
if (dicBufSize < dictSize)
dicBufSize = dictSize;
}
if (!p->dic || dicBufSize != p->dicBufSize)
RINOK(LzmaProps_Decode(&propNew, props, propsSize));
RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc));
dicBufSize = propNew.dicSize;
if (p->dic == 0 || dicBufSize != p->dicBufSize)
{
LzmaDec_FreeDict(p, alloc);
p->dic = (Byte *)ISzAlloc_Alloc(alloc, dicBufSize);
if (!p->dic)
p->dic = (Byte *)alloc->Alloc(alloc, dicBufSize);
if (p->dic == 0)
{
LzmaDec_FreeProbs(p, alloc);
return SZ_ERROR_MEM;
@@ -1339,25 +976,32 @@ SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAll
SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode,
ELzmaStatus *status, ISzAllocPtr alloc)
ELzmaStatus *status, ISzAlloc *alloc)
{
CLzmaDec p;
SRes res;
SizeT outSize = *destLen, inSize = *srcLen;
*destLen = *srcLen = 0;
*status = LZMA_STATUS_NOT_SPECIFIED;
SizeT inSize = *srcLen;
SizeT outSize = *destLen;
*srcLen = *destLen = 0;
if (inSize < RC_INIT_SIZE)
return SZ_ERROR_INPUT_EOF;
LzmaDec_CONSTRUCT(&p)
RINOK(LzmaDec_AllocateProbs(&p, propData, propSize, alloc))
LzmaDec_Construct(&p);
res = LzmaDec_AllocateProbs(&p, propData, propSize, alloc);
if (res != 0)
return res;
p.dic = dest;
p.dicBufSize = outSize;
LzmaDec_Init(&p);
*srcLen = inSize;
res = LzmaDec_DecodeToDic(&p, outSize, src, srcLen, finishMode, status);
*destLen = p.dicPos;
if (res == SZ_OK && *status == LZMA_STATUS_NEEDS_MORE_INPUT)
res = SZ_ERROR_INPUT_EOF;
(*destLen) = p.dicPos;
LzmaDec_FreeProbs(&p, alloc);
return res;
}

68
extern/lzma/LzmaDec.h vendored
View File

@@ -1,36 +1,29 @@
/* LzmaDec.h -- LZMA Decoder
2023-04-02 : Igor Pavlov : Public domain */
2008-10-04 : Igor Pavlov : Public domain */
#ifndef ZIP7_INC_LZMA_DEC_H
#define ZIP7_INC_LZMA_DEC_H
#ifndef __LZMADEC_H
#define __LZMADEC_H
#include "7zTypes.h"
#include "Types.h"
EXTERN_C_BEGIN
/* #define Z7_LZMA_PROB32 */
/* Z7_LZMA_PROB32 can increase the speed on some CPUs,
/* #define _LZMA_PROB32 */
/* _LZMA_PROB32 can increase the speed on some CPUs,
but memory usage for CLzmaDec::probs will be doubled in that case */
typedef
#ifdef Z7_LZMA_PROB32
UInt32
#ifdef _LZMA_PROB32
#define CLzmaProb UInt32
#else
UInt16
#define CLzmaProb UInt16
#endif
CLzmaProb;
/* ---------- LZMA Properties ---------- */
#define LZMA_PROPS_SIZE 5
typedef struct
typedef struct _CLzmaProps
{
Byte lc;
Byte lp;
Byte pb;
Byte _pad_;
unsigned lc, lp, pb;
UInt32 dicSize;
} CLzmaProps;
@@ -52,35 +45,32 @@ SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size);
typedef struct
{
/* Don't change this structure. ASM code can use it. */
CLzmaProps prop;
CLzmaProb *probs;
CLzmaProb *probs_1664;
Byte *dic;
SizeT dicBufSize;
SizeT dicPos;
const Byte *buf;
UInt32 range;
UInt32 code;
UInt32 range, code;
SizeT dicPos;
SizeT dicBufSize;
UInt32 processedPos;
UInt32 checkDicSize;
unsigned state;
UInt32 reps[4];
UInt32 state;
UInt32 remainLen;
unsigned remainLen;
int needFlush;
int needInitState;
UInt32 numProbs;
unsigned tempBufSize;
Byte tempBuf[LZMA_REQUIRED_INPUT_MAX];
} CLzmaDec;
#define LzmaDec_CONSTRUCT(p) { (p)->dic = NULL; (p)->probs = NULL; }
#define LzmaDec_Construct(p) LzmaDec_CONSTRUCT(p)
#define LzmaDec_Construct(p) { (p)->dic = 0; (p)->probs = 0; }
void LzmaDec_Init(CLzmaDec *p);
/* There are two types of LZMA streams:
- Stream with end mark. That end mark adds about 6 bytes to compressed size.
- Stream without end mark. You must know exact uncompressed size to decompress such stream. */
0) Stream with end mark. That end mark adds about 6 bytes to compressed size.
1) Stream without end mark. You must know exact uncompressed size to decompress such stream. */
typedef enum
{
@@ -137,11 +127,11 @@ LzmaDec_Allocate* can return:
SZ_ERROR_UNSUPPORTED - Unsupported properties
*/
SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc);
void LzmaDec_FreeProbs(CLzmaDec *p, ISzAllocPtr alloc);
SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAlloc *alloc);
void LzmaDec_FreeProbs(CLzmaDec *p, ISzAlloc *alloc);
SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc);
void LzmaDec_Free(CLzmaDec *p, ISzAllocPtr alloc);
SRes LzmaDec_Allocate(CLzmaDec *state, const Byte *prop, unsigned propsSize, ISzAlloc *alloc);
void LzmaDec_Free(CLzmaDec *state, ISzAlloc *alloc);
/* ---------- Dictionary Interface ---------- */
@@ -150,7 +140,7 @@ void LzmaDec_Free(CLzmaDec *p, ISzAllocPtr alloc);
You must work with CLzmaDec variables directly in this interface.
STEPS:
LzmaDec_Construct()
LzmaDec_Constr()
LzmaDec_Allocate()
for (each new stream)
{
@@ -182,7 +172,6 @@ Returns:
LZMA_STATUS_NEEDS_MORE_INPUT
LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK
SZ_ERROR_DATA - Data error
SZ_ERROR_FAIL - Some unexpected error: internal error of code, memory corruption or hardware failure
*/
SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit,
@@ -225,13 +214,10 @@ Returns:
SZ_ERROR_MEM - Memory allocation error
SZ_ERROR_UNSUPPORTED - Unsupported properties
SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src).
SZ_ERROR_FAIL - Some unexpected error: internal error of code, memory corruption or hardware failure
*/
SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode,
ELzmaStatus *status, ISzAllocPtr alloc);
EXTERN_C_END
ELzmaStatus *status, ISzAlloc *alloc);
#endif

3390
extern/lzma/LzmaEnc.c vendored
View File

@@ -1,7 +1,5 @@
/* LzmaEnc.c -- LZMA Encoder
2023-04-13: Igor Pavlov : Public domain */
#include "Precomp.h"
2009-02-02 : Igor Pavlov : Public domain */
#include <string.h>
@@ -12,36 +10,28 @@
#include <stdio.h>
#endif
#include "CpuArch.h"
#include "LzmaEnc.h"
#include "LzFind.h"
#ifndef Z7_ST
#ifdef COMPRESS_MF_MT
#include "LzFindMt.h"
#endif
/* the following LzmaEnc_* declarations is internal LZMA interface for LZMA2 encoder */
SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle p, ISeqInStreamPtr inStream, UInt32 keepWindowSize,
ISzAllocPtr alloc, ISzAllocPtr allocBig);
SRes LzmaEnc_MemPrepare(CLzmaEncHandle p, const Byte *src, SizeT srcLen,
UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig);
SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle p, BoolInt reInit,
Byte *dest, size_t *destLen, UInt32 desiredPackSize, UInt32 *unpackSize);
const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle p);
void LzmaEnc_Finish(CLzmaEncHandle p);
void LzmaEnc_SaveState(CLzmaEncHandle p);
void LzmaEnc_RestoreState(CLzmaEncHandle p);
#ifdef SHOW_STAT
static unsigned g_STAT_OFFSET = 0;
static int ttt = 0;
#endif
/* for good normalization speed we still reserve 256 MB before 4 GB range */
#define kLzmaMaxHistorySize ((UInt32)15 << 28)
#define kBlockSizeMax ((1 << LZMA_NUM_BLOCK_SIZE_BITS) - 1)
// #define kNumTopBits 24
#define kTopValue ((UInt32)1 << 24)
#define kBlockSize (9 << 10)
#define kUnpackBlockSize (1 << 18)
#define kMatchArraySize (1 << 21)
#define kMatchRecordMaxSize ((LZMA_MATCH_LEN_MAX * 2 + 3) * LZMA_MATCH_LEN_MAX)
#define kNumMaxDirectBits (31)
#define kNumTopBits 24
#define kTopValue ((UInt32)1 << kNumTopBits)
#define kNumBitModelTotalBits 11
#define kBitModelTotal (1 << kNumBitModelTotalBits)
@@ -50,19 +40,14 @@ static unsigned g_STAT_OFFSET = 0;
#define kNumMoveReducingBits 4
#define kNumBitPriceShiftBits 4
// #define kBitPrice (1 << kNumBitPriceShiftBits)
#define REP_LEN_COUNT 64
#define kBitPrice (1 << kNumBitPriceShiftBits)
void LzmaEncProps_Init(CLzmaEncProps *p)
{
p->level = 5;
p->dictSize = p->mc = 0;
p->reduceSize = (UInt64)(Int64)-1;
p->lc = p->lp = p->pb = p->algo = p->fb = p->btMode = p->numHashBytes = p->numThreads = -1;
p->numHashOutBits = 0;
p->writeEndMark = 0;
p->affinity = 0;
}
void LzmaEncProps_Normalize(CLzmaEncProps *p)
@@ -70,37 +55,18 @@ void LzmaEncProps_Normalize(CLzmaEncProps *p)
int level = p->level;
if (level < 0) level = 5;
p->level = level;
if (p->dictSize == 0)
p->dictSize =
( level <= 3 ? ((UInt32)1 << (level * 2 + 16)) :
( level <= 6 ? ((UInt32)1 << (level + 19)) :
( level <= 7 ? ((UInt32)1 << 25) : ((UInt32)1 << 26)
)));
if (p->dictSize > p->reduceSize)
{
UInt32 v = (UInt32)p->reduceSize;
const UInt32 kReduceMin = ((UInt32)1 << 12);
if (v < kReduceMin)
v = kReduceMin;
if (p->dictSize > v)
p->dictSize = v;
}
if (p->dictSize == 0) p->dictSize = (level <= 5 ? (1 << (level * 2 + 14)) : (level == 6 ? (1 << 25) : (1 << 26)));
if (p->lc < 0) p->lc = 3;
if (p->lp < 0) p->lp = 0;
if (p->pb < 0) p->pb = 2;
if (p->algo < 0) p->algo = (level < 5 ? 0 : 1);
if (p->fb < 0) p->fb = (level < 7 ? 32 : 64);
if (p->btMode < 0) p->btMode = (p->algo == 0 ? 0 : 1);
if (p->numHashBytes < 0) p->numHashBytes = (p->btMode ? 4 : 5);
if (p->mc == 0) p->mc = (16 + ((unsigned)p->fb >> 1)) >> (p->btMode ? 0 : 1);
if (p->numHashBytes < 0) p->numHashBytes = 4;
if (p->mc == 0) p->mc = (16 + (p->fb >> 1)) >> (p->btMode ? 0 : 1);
if (p->numThreads < 0)
p->numThreads =
#ifndef Z7_ST
#ifdef COMPRESS_MF_MT
((p->btMode && p->algo) ? 2 : 1);
#else
1;
@@ -114,133 +80,48 @@ UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps *props2)
return props.dictSize;
}
/* #define LZMA_LOG_BSR */
/* Define it for Intel's CPU */
/*
x86/x64:
BSR:
IF (SRC == 0) ZF = 1, DEST is undefined;
AMD : DEST is unchanged;
IF (SRC != 0) ZF = 0; DEST is index of top non-zero bit
BSR is slow in some processors
LZCNT:
IF (SRC == 0) CF = 1, DEST is size_in_bits_of_register(src) (32 or 64)
IF (SRC != 0) CF = 0, DEST = num_lead_zero_bits
IF (DEST == 0) ZF = 1;
LZCNT works only in new processors starting from Haswell.
if LZCNT is not supported by processor, then it's executed as BSR.
LZCNT can be faster than BSR, if supported.
*/
// #define LZMA_LOG_BSR
#if defined(MY_CPU_ARM_OR_ARM64) /* || defined(MY_CPU_X86_OR_AMD64) */
#if (defined(__clang__) && (__clang_major__ >= 6)) \
|| (defined(__GNUC__) && (__GNUC__ >= 6))
#define LZMA_LOG_BSR
#elif defined(_MSC_VER) && (_MSC_VER >= 1300)
// #if defined(MY_CPU_ARM_OR_ARM64)
#define LZMA_LOG_BSR
// #endif
#endif
#endif
// #include <intrin.h>
#ifdef LZMA_LOG_BSR
#if defined(__clang__) \
|| defined(__GNUC__)
#define kDicLogSizeMaxCompress 30
/*
C code: : (30 - __builtin_clz(x))
gcc9/gcc10 for x64 /x86 : 30 - (bsr(x) xor 31)
clang10 for x64 : 31 + (bsr(x) xor -32)
*/
#define BSR2_RET(pos, res) { unsigned long i; _BitScanReverse(&i, (pos)); res = (i + i) + ((pos >> (i - 1)) & 1); }
#define MY_clz(x) ((unsigned)__builtin_clz(x))
// __lzcnt32
// __builtin_ia32_lzcnt_u32
#else // #if defined(_MSC_VER)
#ifdef MY_CPU_ARM_OR_ARM64
#define MY_clz _CountLeadingZeros
#else // if defined(MY_CPU_X86_OR_AMD64)
// #define MY_clz __lzcnt // we can use lzcnt (unsupported by old CPU)
// _BitScanReverse code is not optimal for some MSVC compilers
#define BSR2_RET(pos, res) { unsigned long zz; _BitScanReverse(&zz, (pos)); zz--; \
res = (zz + zz) + (pos >> zz); }
#endif // MY_CPU_X86_OR_AMD64
#endif // _MSC_VER
#ifndef BSR2_RET
#define BSR2_RET(pos, res) { unsigned zz = 30 - MY_clz(pos); \
res = (zz + zz) + (pos >> zz); }
#endif
unsigned GetPosSlot1(UInt32 pos);
unsigned GetPosSlot1(UInt32 pos)
UInt32 GetPosSlot1(UInt32 pos)
{
unsigned res;
UInt32 res;
BSR2_RET(pos, res);
return res;
}
#define GetPosSlot2(pos, res) { BSR2_RET(pos, res); }
#define GetPosSlot(pos, res) { if (pos < 2) res = pos; else BSR2_RET(pos, res); }
#else
#else // ! LZMA_LOG_BSR
#define kNumLogBits (11 + sizeof(size_t) / 8 * 3)
#define kNumLogBits (9 + (int)sizeof(size_t) / 2)
#define kDicLogSizeMaxCompress ((kNumLogBits - 1) * 2 + 7)
static void LzmaEnc_FastPosInit(Byte *g_FastPos)
void LzmaEnc_FastPosInit(Byte *g_FastPos)
{
unsigned slot;
int c = 2, slotFast;
g_FastPos[0] = 0;
g_FastPos[1] = 1;
g_FastPos += 2;
for (slot = 2; slot < kNumLogBits * 2; slot++)
for (slotFast = 2; slotFast < kNumLogBits * 2; slotFast++)
{
size_t k = ((size_t)1 << ((slot >> 1) - 1));
size_t j;
for (j = 0; j < k; j++)
g_FastPos[j] = (Byte)slot;
g_FastPos += k;
UInt32 k = (1 << ((slotFast >> 1) - 1));
UInt32 j;
for (j = 0; j < k; j++, c++)
g_FastPos[c] = (Byte)slotFast;
}
}
/* we can use ((limit - pos) >> 31) only if (pos < ((UInt32)1 << 31)) */
/*
#define BSR2_RET(pos, res) { unsigned zz = 6 + ((kNumLogBits - 1) & \
#define BSR2_RET(pos, res) { UInt32 i = 6 + ((kNumLogBits - 1) & \
(0 - (((((UInt32)1 << (kNumLogBits + 6)) - 1) - pos) >> 31))); \
res = p->g_FastPos[pos >> zz] + (zz * 2); }
*/
/*
#define BSR2_RET(pos, res) { unsigned zz = 6 + ((kNumLogBits - 1) & \
(0 - (((((UInt32)1 << (kNumLogBits)) - 1) - (pos >> 6)) >> 31))); \
res = p->g_FastPos[pos >> zz] + (zz * 2); }
*/
#define BSR2_RET(pos, res) { unsigned zz = (pos < (1 << (kNumLogBits + 6))) ? 6 : 6 + kNumLogBits - 1; \
res = p->g_FastPos[pos >> zz] + (zz * 2); }
res = p->g_FastPos[pos >> i] + (i * 2); }
/*
#define BSR2_RET(pos, res) { res = (pos < (1 << (kNumLogBits + 6))) ? \
p->g_FastPos[pos >> 6] + 12 : \
@@ -249,57 +130,55 @@ static void LzmaEnc_FastPosInit(Byte *g_FastPos)
#define GetPosSlot1(pos) p->g_FastPos[pos]
#define GetPosSlot2(pos, res) { BSR2_RET(pos, res); }
#define GetPosSlot(pos, res) { if (pos < kNumFullDistances) res = p->g_FastPos[pos & (kNumFullDistances - 1)]; else BSR2_RET(pos, res); }
#define GetPosSlot(pos, res) { if (pos < kNumFullDistances) res = p->g_FastPos[pos]; else BSR2_RET(pos, res); }
#endif // LZMA_LOG_BSR
#endif
#define LZMA_NUM_REPS 4
typedef UInt16 CState;
typedef UInt16 CExtra;
typedef unsigned CState;
typedef struct
typedef struct _COptimal
{
UInt32 price;
CState state;
CExtra extra;
// 0 : normal
// 1 : LIT : MATCH
// > 1 : MATCH (extra-1) : LIT : REP0 (len)
UInt32 len;
UInt32 dist;
UInt32 reps[LZMA_NUM_REPS];
int prev1IsChar;
int prev2;
UInt32 posPrev2;
UInt32 backPrev2;
UInt32 posPrev;
UInt32 backPrev;
UInt32 backs[LZMA_NUM_REPS];
} COptimal;
// 18.06
#define kNumOpts (1 << 11)
#define kPackReserve (kNumOpts * 8)
// #define kNumOpts (1 << 12)
// #define kPackReserve (1 + kNumOpts * 2)
#define kNumOpts (1 << 12)
#define kNumLenToPosStates 4
#define kNumPosSlotBits 6
// #define kDicLogSizeMin 0
#define kDicLogSizeMin 0
#define kDicLogSizeMax 32
#define kDistTableSizeMax (kDicLogSizeMax * 2)
#define kNumAlignBits 4
#define kAlignTableSize (1 << kNumAlignBits)
#define kAlignMask (kAlignTableSize - 1)
#define kStartPosModelIndex 4
#define kEndPosModelIndex 14
#define kNumFullDistances (1 << (kEndPosModelIndex >> 1))
#define kNumPosModels (kEndPosModelIndex - kStartPosModelIndex)
typedef
#ifdef Z7_LZMA_PROB32
UInt32
#define kNumFullDistances (1 << (kEndPosModelIndex / 2))
#ifdef _LZMA_PROB32
#define CLzmaProb UInt32
#else
UInt16
#define CLzmaProb UInt16
#endif
CLzmaProb;
#define LZMA_PB_MAX 4
#define LZMA_LC_MAX 8
@@ -307,287 +186,270 @@ typedef
#define LZMA_NUM_PB_STATES_MAX (1 << LZMA_PB_MAX)
#define kLenNumLowBits 3
#define kLenNumLowSymbols (1 << kLenNumLowBits)
#define kLenNumMidBits 3
#define kLenNumMidSymbols (1 << kLenNumMidBits)
#define kLenNumHighBits 8
#define kLenNumHighSymbols (1 << kLenNumHighBits)
#define kLenNumSymbolsTotal (kLenNumLowSymbols * 2 + kLenNumHighSymbols)
#define kLenNumSymbolsTotal (kLenNumLowSymbols + kLenNumMidSymbols + kLenNumHighSymbols)
#define LZMA_MATCH_LEN_MIN 2
#define LZMA_MATCH_LEN_MAX (LZMA_MATCH_LEN_MIN + kLenNumSymbolsTotal - 1)
#define kNumStates 12
typedef struct
{
CLzmaProb low[LZMA_NUM_PB_STATES_MAX << (kLenNumLowBits + 1)];
CLzmaProb choice;
CLzmaProb choice2;
CLzmaProb low[LZMA_NUM_PB_STATES_MAX << kLenNumLowBits];
CLzmaProb mid[LZMA_NUM_PB_STATES_MAX << kLenNumMidBits];
CLzmaProb high[kLenNumHighSymbols];
} CLenEnc;
typedef struct
{
unsigned tableSize;
CLenEnc p;
UInt32 prices[LZMA_NUM_PB_STATES_MAX][kLenNumSymbolsTotal];
// UInt32 prices1[LZMA_NUM_PB_STATES_MAX][kLenNumLowSymbols * 2];
// UInt32 prices2[kLenNumSymbolsTotal];
UInt32 tableSize;
UInt32 counters[LZMA_NUM_PB_STATES_MAX];
} CLenPriceEnc;
#define GET_PRICE_LEN(p, posState, len) \
((p)->prices[posState][(size_t)(len) - LZMA_MATCH_LEN_MIN])
/*
#define GET_PRICE_LEN(p, posState, len) \
((p)->prices2[(size_t)(len) - 2] + ((p)->prices1[posState][((len) - 2) & (kLenNumLowSymbols * 2 - 1)] & (((len) - 2 - kLenNumLowSymbols * 2) >> 9)))
*/
typedef struct
typedef struct _CRangeEnc
{
UInt32 range;
unsigned cache;
Byte cache;
UInt64 low;
UInt64 cacheSize;
Byte *buf;
Byte *bufLim;
Byte *bufBase;
ISeqOutStreamPtr outStream;
ISeqOutStream *outStream;
UInt64 processed;
SRes res;
} CRangeEnc;
typedef struct _CSeqInStreamBuf
{
ISeqInStream funcTable;
const Byte *data;
SizeT rem;
} CSeqInStreamBuf;
static SRes MyRead(void *pp, void *data, size_t *size)
{
size_t curSize = *size;
CSeqInStreamBuf *p = (CSeqInStreamBuf *)pp;
if (p->rem < curSize)
curSize = p->rem;
memcpy(data, p->data, curSize);
p->rem -= curSize;
p->data += curSize;
*size = curSize;
return SZ_OK;
}
typedef struct
{
CLzmaProb *litProbs;
unsigned state;
UInt32 reps[LZMA_NUM_REPS];
CLzmaProb posAlignEncoder[1 << kNumAlignBits];
CLzmaProb isMatch[kNumStates][LZMA_NUM_PB_STATES_MAX];
CLzmaProb isRep[kNumStates];
CLzmaProb isRepG0[kNumStates];
CLzmaProb isRepG1[kNumStates];
CLzmaProb isRepG2[kNumStates];
CLzmaProb isMatch[kNumStates][LZMA_NUM_PB_STATES_MAX];
CLzmaProb isRep0Long[kNumStates][LZMA_NUM_PB_STATES_MAX];
CLzmaProb posSlotEncoder[kNumLenToPosStates][1 << kNumPosSlotBits];
CLzmaProb posEncoders[kNumFullDistances];
CLzmaProb posEncoders[kNumFullDistances - kEndPosModelIndex];
CLzmaProb posAlignEncoder[1 << kNumAlignBits];
CLenEnc lenProbs;
CLenEnc repLenProbs;
CLenPriceEnc lenEnc;
CLenPriceEnc repLenEnc;
UInt32 reps[LZMA_NUM_REPS];
UInt32 state;
} CSaveState;
typedef UInt32 CProbPrice;
struct CLzmaEnc
typedef struct _CLzmaEnc
{
IMatchFinder matchFinder;
void *matchFinderObj;
IMatchFinder2 matchFinder;
unsigned optCur;
unsigned optEnd;
unsigned longestMatchLen;
unsigned numPairs;
UInt32 numAvail;
unsigned state;
unsigned numFastBytes;
unsigned additionalOffset;
UInt32 reps[LZMA_NUM_REPS];
unsigned lpMask, pbMask;
CLzmaProb *litProbs;
CRangeEnc rc;
UInt32 backRes;
unsigned lc, lp, pb;
unsigned lclp;
BoolInt fastMode;
BoolInt writeEndMark;
BoolInt finished;
BoolInt multiThread;
BoolInt needInit;
// BoolInt _maxMode;
UInt64 nowPos64;
unsigned matchPriceCount;
// unsigned alignPriceCount;
int repLenEncCounter;
unsigned distTableSize;
UInt32 dictSize;
SRes result;
#ifndef Z7_ST
BoolInt mtMode;
// begin of CMatchFinderMt is used in LZ thread
#ifdef COMPRESS_MF_MT
Bool mtMode;
CMatchFinderMt matchFinderMt;
// end of CMatchFinderMt is used in BT and HASH threads
// #else
// CMatchFinder matchFinderBase;
#endif
CMatchFinder matchFinderBase;
// we suppose that we have 8-bytes alignment after CMatchFinder
#ifndef Z7_ST
#ifdef COMPRESS_MF_MT
Byte pad[128];
#endif
// LZ thread
CProbPrice ProbPrices[kBitModelTotal >> kNumMoveReducingBits];
UInt32 optimumEndIndex;
UInt32 optimumCurrentIndex;
// we want {len , dist} pairs to be 8-bytes aligned in matches array
UInt32 matches[LZMA_MATCH_LEN_MAX * 2 + 2];
// we want 8-bytes alignment here
UInt32 alignPrices[kAlignTableSize];
UInt32 posSlotPrices[kNumLenToPosStates][kDistTableSizeMax];
UInt32 distancesPrices[kNumLenToPosStates][kNumFullDistances];
CLzmaProb posAlignEncoder[1 << kNumAlignBits];
CLzmaProb isRep[kNumStates];
CLzmaProb isRepG0[kNumStates];
CLzmaProb isRepG1[kNumStates];
CLzmaProb isRepG2[kNumStates];
CLzmaProb isMatch[kNumStates][LZMA_NUM_PB_STATES_MAX];
CLzmaProb isRep0Long[kNumStates][LZMA_NUM_PB_STATES_MAX];
CLzmaProb posSlotEncoder[kNumLenToPosStates][1 << kNumPosSlotBits];
CLzmaProb posEncoders[kNumFullDistances];
UInt32 longestMatchLength;
UInt32 numPairs;
UInt32 numAvail;
COptimal opt[kNumOpts];
CLenEnc lenProbs;
CLenEnc repLenProbs;
#ifndef LZMA_LOG_BSR
Byte g_FastPos[1 << kNumLogBits];
#endif
UInt32 ProbPrices[kBitModelTotal >> kNumMoveReducingBits];
UInt32 matches[LZMA_MATCH_LEN_MAX * 2 + 2 + 1];
UInt32 numFastBytes;
UInt32 additionalOffset;
UInt32 reps[LZMA_NUM_REPS];
UInt32 state;
UInt32 posSlotPrices[kNumLenToPosStates][kDistTableSizeMax];
UInt32 distancesPrices[kNumLenToPosStates][kNumFullDistances];
UInt32 alignPrices[kAlignTableSize];
UInt32 alignPriceCount;
UInt32 distTableSize;
unsigned lc, lp, pb;
unsigned lpMask, pbMask;
CLzmaProb *litProbs;
CLzmaProb isMatch[kNumStates][LZMA_NUM_PB_STATES_MAX];
CLzmaProb isRep[kNumStates];
CLzmaProb isRepG0[kNumStates];
CLzmaProb isRepG1[kNumStates];
CLzmaProb isRepG2[kNumStates];
CLzmaProb isRep0Long[kNumStates][LZMA_NUM_PB_STATES_MAX];
CLzmaProb posSlotEncoder[kNumLenToPosStates][1 << kNumPosSlotBits];
CLzmaProb posEncoders[kNumFullDistances - kEndPosModelIndex];
CLzmaProb posAlignEncoder[1 << kNumAlignBits];
CLenPriceEnc lenEnc;
CLenPriceEnc repLenEnc;
COptimal opt[kNumOpts];
unsigned lclp;
Bool fastMode;
CRangeEnc rc;
Bool writeEndMark;
UInt64 nowPos64;
UInt32 matchPriceCount;
Bool finished;
Bool multiThread;
SRes result;
UInt32 dictSize;
UInt32 matchFinderCycles;
ISeqInStream *inStream;
CSeqInStreamBuf seqBufInStream;
CSaveState saveState;
} CLzmaEnc;
// BoolInt mf_Failure;
#ifndef Z7_ST
Byte pad2[128];
#endif
};
#define MFB (p->matchFinderBase)
/*
#ifndef Z7_ST
#define MFB (p->matchFinderMt.MatchFinder)
#endif
*/
// #define GET_CLzmaEnc_p CLzmaEnc *p = (CLzmaEnc*)(void *)p;
// #define GET_const_CLzmaEnc_p const CLzmaEnc *p = (const CLzmaEnc*)(const void *)p;
#define COPY_ARR(dest, src, arr) memcpy((dest)->arr, (src)->arr, sizeof((src)->arr));
#define COPY_LZMA_ENC_STATE(d, s, p) \
(d)->state = (s)->state; \
COPY_ARR(d, s, reps) \
COPY_ARR(d, s, posAlignEncoder) \
COPY_ARR(d, s, isRep) \
COPY_ARR(d, s, isRepG0) \
COPY_ARR(d, s, isRepG1) \
COPY_ARR(d, s, isRepG2) \
COPY_ARR(d, s, isMatch) \
COPY_ARR(d, s, isRep0Long) \
COPY_ARR(d, s, posSlotEncoder) \
COPY_ARR(d, s, posEncoders) \
(d)->lenProbs = (s)->lenProbs; \
(d)->repLenProbs = (s)->repLenProbs; \
memcpy((d)->litProbs, (s)->litProbs, ((UInt32)0x300 << (p)->lclp) * sizeof(CLzmaProb));
void LzmaEnc_SaveState(CLzmaEncHandle p)
void LzmaEnc_SaveState(CLzmaEncHandle pp)
{
// GET_CLzmaEnc_p
CSaveState *v = &p->saveState;
COPY_LZMA_ENC_STATE(v, p, p)
CLzmaEnc *p = (CLzmaEnc *)pp;
CSaveState *dest = &p->saveState;
int i;
dest->lenEnc = p->lenEnc;
dest->repLenEnc = p->repLenEnc;
dest->state = p->state;
for (i = 0; i < kNumStates; i++)
{
memcpy(dest->isMatch[i], p->isMatch[i], sizeof(p->isMatch[i]));
memcpy(dest->isRep0Long[i], p->isRep0Long[i], sizeof(p->isRep0Long[i]));
}
for (i = 0; i < kNumLenToPosStates; i++)
memcpy(dest->posSlotEncoder[i], p->posSlotEncoder[i], sizeof(p->posSlotEncoder[i]));
memcpy(dest->isRep, p->isRep, sizeof(p->isRep));
memcpy(dest->isRepG0, p->isRepG0, sizeof(p->isRepG0));
memcpy(dest->isRepG1, p->isRepG1, sizeof(p->isRepG1));
memcpy(dest->isRepG2, p->isRepG2, sizeof(p->isRepG2));
memcpy(dest->posEncoders, p->posEncoders, sizeof(p->posEncoders));
memcpy(dest->posAlignEncoder, p->posAlignEncoder, sizeof(p->posAlignEncoder));
memcpy(dest->reps, p->reps, sizeof(p->reps));
memcpy(dest->litProbs, p->litProbs, (0x300 << p->lclp) * sizeof(CLzmaProb));
}
void LzmaEnc_RestoreState(CLzmaEncHandle p)
void LzmaEnc_RestoreState(CLzmaEncHandle pp)
{
// GET_CLzmaEnc_p
const CSaveState *v = &p->saveState;
COPY_LZMA_ENC_STATE(p, v, p)
CLzmaEnc *dest = (CLzmaEnc *)pp;
const CSaveState *p = &dest->saveState;
int i;
dest->lenEnc = p->lenEnc;
dest->repLenEnc = p->repLenEnc;
dest->state = p->state;
for (i = 0; i < kNumStates; i++)
{
memcpy(dest->isMatch[i], p->isMatch[i], sizeof(p->isMatch[i]));
memcpy(dest->isRep0Long[i], p->isRep0Long[i], sizeof(p->isRep0Long[i]));
}
for (i = 0; i < kNumLenToPosStates; i++)
memcpy(dest->posSlotEncoder[i], p->posSlotEncoder[i], sizeof(p->posSlotEncoder[i]));
memcpy(dest->isRep, p->isRep, sizeof(p->isRep));
memcpy(dest->isRepG0, p->isRepG0, sizeof(p->isRepG0));
memcpy(dest->isRepG1, p->isRepG1, sizeof(p->isRepG1));
memcpy(dest->isRepG2, p->isRepG2, sizeof(p->isRepG2));
memcpy(dest->posEncoders, p->posEncoders, sizeof(p->posEncoders));
memcpy(dest->posAlignEncoder, p->posAlignEncoder, sizeof(p->posAlignEncoder));
memcpy(dest->reps, p->reps, sizeof(p->reps));
memcpy(dest->litProbs, p->litProbs, (0x300 << dest->lclp) * sizeof(CLzmaProb));
}
Z7_NO_INLINE
SRes LzmaEnc_SetProps(CLzmaEncHandle p, const CLzmaEncProps *props2)
SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2)
{
// GET_CLzmaEnc_p
CLzmaEnc *p = (CLzmaEnc *)pp;
CLzmaEncProps props = *props2;
LzmaEncProps_Normalize(&props);
if (props.lc > LZMA_LC_MAX
|| props.lp > LZMA_LP_MAX
|| props.pb > LZMA_PB_MAX)
if (props.lc > LZMA_LC_MAX || props.lp > LZMA_LP_MAX || props.pb > LZMA_PB_MAX ||
props.dictSize > (1 << kDicLogSizeMaxCompress) || props.dictSize > (1 << 30))
return SZ_ERROR_PARAM;
if (props.dictSize > kLzmaMaxHistorySize)
props.dictSize = kLzmaMaxHistorySize;
#ifndef LZMA_LOG_BSR
{
const UInt64 dict64 = props.dictSize;
if (dict64 > ((UInt64)1 << kDicLogSizeMaxCompress))
return SZ_ERROR_PARAM;
}
#endif
p->dictSize = props.dictSize;
p->matchFinderCycles = props.mc;
{
unsigned fb = (unsigned)props.fb;
unsigned fb = props.fb;
if (fb < 5)
fb = 5;
if (fb > LZMA_MATCH_LEN_MAX)
fb = LZMA_MATCH_LEN_MAX;
p->numFastBytes = fb;
}
p->lc = (unsigned)props.lc;
p->lp = (unsigned)props.lp;
p->pb = (unsigned)props.pb;
p->lc = props.lc;
p->lp = props.lp;
p->pb = props.pb;
p->fastMode = (props.algo == 0);
// p->_maxMode = True;
MFB.btMode = (Byte)(props.btMode ? 1 : 0);
// MFB.btMode = (Byte)(props.btMode);
p->matchFinderBase.btMode = props.btMode;
{
unsigned numHashBytes = 4;
UInt32 numHashBytes = 4;
if (props.btMode)
{
if (props.numHashBytes < 2) numHashBytes = 2;
else if (props.numHashBytes < 4) numHashBytes = (unsigned)props.numHashBytes;
if (props.numHashBytes < 2)
numHashBytes = 2;
else if (props.numHashBytes < 4)
numHashBytes = props.numHashBytes;
}
if (props.numHashBytes >= 5) numHashBytes = 5;
MFB.numHashBytes = numHashBytes;
// MFB.numHashBytes_Min = 2;
MFB.numHashOutBits = (Byte)props.numHashOutBits;
p->matchFinderBase.numHashBytes = numHashBytes;
}
MFB.cutValue = props.mc;
p->matchFinderBase.cutValue = props.mc;
p->writeEndMark = (BoolInt)props.writeEndMark;
p->writeEndMark = props.writeEndMark;
#ifndef Z7_ST
#ifdef COMPRESS_MF_MT
/*
if (newMultiThread != _multiThread)
{
@@ -596,73 +458,56 @@ SRes LzmaEnc_SetProps(CLzmaEncHandle p, const CLzmaEncProps *props2)
}
*/
p->multiThread = (props.numThreads > 1);
p->matchFinderMt.btSync.affinity =
p->matchFinderMt.hashSync.affinity = props.affinity;
#endif
return SZ_OK;
}
static const int kLiteralNextStates[kNumStates] = {0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5};
static const int kMatchNextStates[kNumStates] = {7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10};
static const int kRepNextStates[kNumStates] = {8, 8, 8, 8, 8, 8, 8, 11, 11, 11, 11, 11};
static const int kShortRepNextStates[kNumStates]= {9, 9, 9, 9, 9, 9, 9, 11, 11, 11, 11, 11};
void LzmaEnc_SetDataSize(CLzmaEncHandle p, UInt64 expectedDataSiize)
{
// GET_CLzmaEnc_p
MFB.expectedDataSize = expectedDataSiize;
}
#define IsCharState(s) ((s) < 7)
#define kState_Start 0
#define kState_LitAfterMatch 4
#define kState_LitAfterRep 5
#define kState_MatchAfterLit 7
#define kState_RepAfterLit 8
static const Byte kLiteralNextStates[kNumStates] = {0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5};
static const Byte kMatchNextStates[kNumStates] = {7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10};
static const Byte kRepNextStates[kNumStates] = {8, 8, 8, 8, 8, 8, 8, 11, 11, 11, 11, 11};
static const Byte kShortRepNextStates[kNumStates]= {9, 9, 9, 9, 9, 9, 9, 11, 11, 11, 11, 11};
#define IsLitState(s) ((s) < 7)
#define GetLenToPosState2(len) (((len) < kNumLenToPosStates - 1) ? (len) : kNumLenToPosStates - 1)
#define GetLenToPosState(len) (((len) < kNumLenToPosStates + 1) ? (len) - 2 : kNumLenToPosStates - 1)
#define kInfinityPrice (1 << 30)
static void RangeEnc_Construct(CRangeEnc *p)
{
p->outStream = NULL;
p->bufBase = NULL;
p->outStream = 0;
p->bufBase = 0;
}
#define RangeEnc_GetProcessed(p) ( (p)->processed + (size_t)((p)->buf - (p)->bufBase) + (p)->cacheSize)
#define RangeEnc_GetProcessed_sizet(p) ((size_t)(p)->processed + (size_t)((p)->buf - (p)->bufBase) + (size_t)(p)->cacheSize)
#define RangeEnc_GetProcessed(p) ((p)->processed + ((p)->buf - (p)->bufBase) + (p)->cacheSize)
#define RC_BUF_SIZE (1 << 16)
static int RangeEnc_Alloc(CRangeEnc *p, ISzAllocPtr alloc)
static int RangeEnc_Alloc(CRangeEnc *p, ISzAlloc *alloc)
{
if (!p->bufBase)
if (p->bufBase == 0)
{
p->bufBase = (Byte *)ISzAlloc_Alloc(alloc, RC_BUF_SIZE);
if (!p->bufBase)
p->bufBase = (Byte *)alloc->Alloc(alloc, RC_BUF_SIZE);
if (p->bufBase == 0)
return 0;
p->bufLim = p->bufBase + RC_BUF_SIZE;
}
return 1;
}
static void RangeEnc_Free(CRangeEnc *p, ISzAllocPtr alloc)
static void RangeEnc_Free(CRangeEnc *p, ISzAlloc *alloc)
{
ISzAlloc_Free(alloc, p->bufBase);
p->bufBase = NULL;
alloc->Free(alloc, p->bufBase);
p->bufBase = 0;
}
static void RangeEnc_Init(CRangeEnc *p)
{
p->range = 0xFFFFFFFF;
p->cache = 0;
/* Stream.Init(); */
p->low = 0;
p->cacheSize = 0;
p->range = 0xFFFFFFFF;
p->cacheSize = 1;
p->cache = 0;
p->buf = p->bufBase;
@@ -670,48 +515,37 @@ static void RangeEnc_Init(CRangeEnc *p)
p->res = SZ_OK;
}
Z7_NO_INLINE static void RangeEnc_FlushStream(CRangeEnc *p)
static void RangeEnc_FlushStream(CRangeEnc *p)
{
const size_t num = (size_t)(p->buf - p->bufBase);
if (p->res == SZ_OK)
{
if (num != ISeqOutStream_Write(p->outStream, p->bufBase, num))
p->res = SZ_ERROR_WRITE;
}
size_t num;
if (p->res != SZ_OK)
return;
num = p->buf - p->bufBase;
if (num != p->outStream->Write(p->outStream, p->bufBase, num))
p->res = SZ_ERROR_WRITE;
p->processed += num;
p->buf = p->bufBase;
}
Z7_NO_INLINE static void Z7_FASTCALL RangeEnc_ShiftLow(CRangeEnc *p)
static void MY_FAST_CALL RangeEnc_ShiftLow(CRangeEnc *p)
{
UInt32 low = (UInt32)p->low;
unsigned high = (unsigned)(p->low >> 32);
p->low = (UInt32)(low << 8);
if (low < (UInt32)0xFF000000 || high != 0)
if ((UInt32)p->low < (UInt32)0xFF000000 || (int)(p->low >> 32) != 0)
{
Byte temp = p->cache;
do
{
Byte *buf = p->buf;
*buf++ = (Byte)(p->cache + high);
p->cache = (unsigned)(low >> 24);
*buf++ = (Byte)(temp + (Byte)(p->low >> 32));
p->buf = buf;
if (buf == p->bufLim)
RangeEnc_FlushStream(p);
if (p->cacheSize == 0)
return;
}
high += 0xFF;
for (;;)
{
Byte *buf = p->buf;
*buf++ = (Byte)(high);
p->buf = buf;
if (buf == p->bufLim)
RangeEnc_FlushStream(p);
if (--p->cacheSize == 0)
return;
temp = 0xFF;
}
while (--p->cacheSize != 0);
p->cache = (Byte)((UInt32)p->low >> 24);
}
p->cacheSize++;
p->low = (UInt32)p->low << 8;
}
static void RangeEnc_FlushData(CRangeEnc *p)
@@ -721,121 +555,78 @@ static void RangeEnc_FlushData(CRangeEnc *p)
RangeEnc_ShiftLow(p);
}
#define RC_NORM(p) if (range < kTopValue) { range <<= 8; RangeEnc_ShiftLow(p); }
#define RC_BIT_PRE(p, prob) \
ttt = *(prob); \
newBound = (range >> kNumBitModelTotalBits) * ttt;
// #define Z7_LZMA_ENC_USE_BRANCH
#ifdef Z7_LZMA_ENC_USE_BRANCH
#define RC_BIT(p, prob, bit) { \
RC_BIT_PRE(p, prob) \
if (bit == 0) { range = newBound; ttt += (kBitModelTotal - ttt) >> kNumMoveBits; } \
else { (p)->low += newBound; range -= newBound; ttt -= ttt >> kNumMoveBits; } \
*(prob) = (CLzmaProb)ttt; \
RC_NORM(p) \
}
#else
#define RC_BIT(p, prob, bit) { \
UInt32 mask; \
RC_BIT_PRE(p, prob) \
mask = 0 - (UInt32)bit; \
range &= mask; \
mask &= newBound; \
range -= mask; \
(p)->low += mask; \
mask = (UInt32)bit - 1; \
range += newBound & mask; \
mask &= (kBitModelTotal - ((1 << kNumMoveBits) - 1)); \
mask += ((1 << kNumMoveBits) - 1); \
ttt += (UInt32)((Int32)(mask - ttt) >> kNumMoveBits); \
*(prob) = (CLzmaProb)ttt; \
RC_NORM(p) \
}
#endif
#define RC_BIT_0_BASE(p, prob) \
range = newBound; *(prob) = (CLzmaProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits));
#define RC_BIT_1_BASE(p, prob) \
range -= newBound; (p)->low += newBound; *(prob) = (CLzmaProb)(ttt - (ttt >> kNumMoveBits)); \
#define RC_BIT_0(p, prob) \
RC_BIT_0_BASE(p, prob) \
RC_NORM(p)
#define RC_BIT_1(p, prob) \
RC_BIT_1_BASE(p, prob) \
RC_NORM(p)
static void RangeEnc_EncodeBit_0(CRangeEnc *p, CLzmaProb *prob)
static void RangeEnc_EncodeDirectBits(CRangeEnc *p, UInt32 value, int numBits)
{
UInt32 range, ttt, newBound;
range = p->range;
RC_BIT_PRE(p, prob)
RC_BIT_0(p, prob)
p->range = range;
}
static void LitEnc_Encode(CRangeEnc *p, CLzmaProb *probs, UInt32 sym)
{
UInt32 range = p->range;
sym |= 0x100;
do
{
UInt32 ttt, newBound;
// RangeEnc_EncodeBit(p, probs + (sym >> 8), (sym >> 7) & 1);
CLzmaProb *prob = probs + (sym >> 8);
UInt32 bit = (sym >> 7) & 1;
sym <<= 1;
RC_BIT(p, prob, bit)
p->range >>= 1;
p->low += p->range & (0 - ((value >> --numBits) & 1));
if (p->range < kTopValue)
{
p->range <<= 8;
RangeEnc_ShiftLow(p);
}
}
while (sym < 0x10000);
p->range = range;
while (numBits != 0);
}
static void LitEnc_EncodeMatched(CRangeEnc *p, CLzmaProb *probs, UInt32 sym, UInt32 matchByte)
static void RangeEnc_EncodeBit(CRangeEnc *p, CLzmaProb *prob, UInt32 symbol)
{
UInt32 ttt = *prob;
UInt32 newBound = (p->range >> kNumBitModelTotalBits) * ttt;
if (symbol == 0)
{
p->range = newBound;
ttt += (kBitModelTotal - ttt) >> kNumMoveBits;
}
else
{
p->low += newBound;
p->range -= newBound;
ttt -= ttt >> kNumMoveBits;
}
*prob = (CLzmaProb)ttt;
if (p->range < kTopValue)
{
p->range <<= 8;
RangeEnc_ShiftLow(p);
}
}
static void LitEnc_Encode(CRangeEnc *p, CLzmaProb *probs, UInt32 symbol)
{
symbol |= 0x100;
do
{
RangeEnc_EncodeBit(p, probs + (symbol >> 8), (symbol >> 7) & 1);
symbol <<= 1;
}
while (symbol < 0x10000);
}
static void LitEnc_EncodeMatched(CRangeEnc *p, CLzmaProb *probs, UInt32 symbol, UInt32 matchByte)
{
UInt32 range = p->range;
UInt32 offs = 0x100;
sym |= 0x100;
symbol |= 0x100;
do
{
UInt32 ttt, newBound;
CLzmaProb *prob;
UInt32 bit;
matchByte <<= 1;
// RangeEnc_EncodeBit(p, probs + (offs + (matchByte & offs) + (sym >> 8)), (sym >> 7) & 1);
prob = probs + (offs + (matchByte & offs) + (sym >> 8));
bit = (sym >> 7) & 1;
sym <<= 1;
offs &= ~(matchByte ^ sym);
RC_BIT(p, prob, bit)
RangeEnc_EncodeBit(p, probs + (offs + (matchByte & offs) + (symbol >> 8)), (symbol >> 7) & 1);
symbol <<= 1;
offs &= ~(matchByte ^ symbol);
}
while (sym < 0x10000);
p->range = range;
while (symbol < 0x10000);
}
static void LzmaEnc_InitPriceTables(CProbPrice *ProbPrices)
void LzmaEnc_InitPriceTables(UInt32 *ProbPrices)
{
UInt32 i;
for (i = 0; i < (kBitModelTotal >> kNumMoveReducingBits); i++)
for (i = (1 << kNumMoveReducingBits) / 2; i < kBitModelTotal; i += (1 << kNumMoveReducingBits))
{
const unsigned kCyclesBits = kNumBitPriceShiftBits;
UInt32 w = (i << kNumMoveReducingBits) + (1 << (kNumMoveReducingBits - 1));
unsigned bitCount = 0;
unsigned j;
const int kCyclesBits = kNumBitPriceShiftBits;
UInt32 w = i;
UInt32 bitCount = 0;
int j;
for (j = 0; j < kCyclesBits; j++)
{
w = w * w;
@@ -846,663 +637,554 @@ static void LzmaEnc_InitPriceTables(CProbPrice *ProbPrices)
bitCount++;
}
}
ProbPrices[i] = (CProbPrice)(((unsigned)kNumBitModelTotalBits << kCyclesBits) - 15 - bitCount);
// printf("\n%3d: %5d", i, ProbPrices[i]);
ProbPrices[i >> kNumMoveReducingBits] = ((kNumBitModelTotalBits << kCyclesBits) - 15 - bitCount);
}
}
#define GET_PRICE(prob, bit) \
p->ProbPrices[((prob) ^ (unsigned)(((-(int)(bit))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits]
#define GET_PRICE(prob, symbol) \
p->ProbPrices[((prob) ^ (((-(int)(symbol))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits];
#define GET_PRICEa(prob, bit) \
ProbPrices[((prob) ^ (unsigned)((-((int)(bit))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits]
#define GET_PRICEa(prob, symbol) \
ProbPrices[((prob) ^ ((-((int)(symbol))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits];
#define GET_PRICE_0(prob) p->ProbPrices[(prob) >> kNumMoveReducingBits]
#define GET_PRICE_1(prob) p->ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits]
#define GET_PRICEa_0(prob) ProbPrices[(prob) >> kNumMoveReducingBits]
#define GET_PRICEa_1(prob) ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits]
#define GET_PRICE_0a(prob) ProbPrices[(prob) >> kNumMoveReducingBits]
#define GET_PRICE_1a(prob) ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits]
static UInt32 LitEnc_GetPrice(const CLzmaProb *probs, UInt32 sym, const CProbPrice *ProbPrices)
static UInt32 LitEnc_GetPrice(const CLzmaProb *probs, UInt32 symbol, UInt32 *ProbPrices)
{
UInt32 price = 0;
sym |= 0x100;
symbol |= 0x100;
do
{
unsigned bit = sym & 1;
sym >>= 1;
price += GET_PRICEa(probs[sym], bit);
price += GET_PRICEa(probs[symbol >> 8], (symbol >> 7) & 1);
symbol <<= 1;
}
while (sym >= 2);
while (symbol < 0x10000);
return price;
}
static UInt32 LitEnc_Matched_GetPrice(const CLzmaProb *probs, UInt32 sym, UInt32 matchByte, const CProbPrice *ProbPrices)
static UInt32 LitEnc_GetPriceMatched(const CLzmaProb *probs, UInt32 symbol, UInt32 matchByte, UInt32 *ProbPrices)
{
UInt32 price = 0;
UInt32 offs = 0x100;
sym |= 0x100;
symbol |= 0x100;
do
{
matchByte <<= 1;
price += GET_PRICEa(probs[offs + (matchByte & offs) + (sym >> 8)], (sym >> 7) & 1);
sym <<= 1;
offs &= ~(matchByte ^ sym);
price += GET_PRICEa(probs[offs + (matchByte & offs) + (symbol >> 8)], (symbol >> 7) & 1);
symbol <<= 1;
offs &= ~(matchByte ^ symbol);
}
while (sym < 0x10000);
while (symbol < 0x10000);
return price;
}
static void RcTree_ReverseEncode(CRangeEnc *rc, CLzmaProb *probs, unsigned numBits, unsigned sym)
static void RcTree_Encode(CRangeEnc *rc, CLzmaProb *probs, int numBitLevels, UInt32 symbol)
{
UInt32 range = rc->range;
unsigned m = 1;
do
UInt32 m = 1;
int i;
for (i = numBitLevels; i != 0;)
{
UInt32 ttt, newBound;
unsigned bit = sym & 1;
// RangeEnc_EncodeBit(rc, probs + m, bit);
sym >>= 1;
RC_BIT(rc, probs + m, bit)
UInt32 bit;
i--;
bit = (symbol >> i) & 1;
RangeEnc_EncodeBit(rc, probs + m, bit);
m = (m << 1) | bit;
}
while (--numBits);
rc->range = range;
}
static void RcTree_ReverseEncode(CRangeEnc *rc, CLzmaProb *probs, int numBitLevels, UInt32 symbol)
{
UInt32 m = 1;
int i;
for (i = 0; i < numBitLevels; i++)
{
UInt32 bit = symbol & 1;
RangeEnc_EncodeBit(rc, probs + m, bit);
m = (m << 1) | bit;
symbol >>= 1;
}
}
static UInt32 RcTree_GetPrice(const CLzmaProb *probs, int numBitLevels, UInt32 symbol, UInt32 *ProbPrices)
{
UInt32 price = 0;
symbol |= (1 << numBitLevels);
while (symbol != 1)
{
price += GET_PRICEa(probs[symbol >> 1], symbol & 1);
symbol >>= 1;
}
return price;
}
static UInt32 RcTree_ReverseGetPrice(const CLzmaProb *probs, int numBitLevels, UInt32 symbol, UInt32 *ProbPrices)
{
UInt32 price = 0;
UInt32 m = 1;
int i;
for (i = numBitLevels; i != 0; i--)
{
UInt32 bit = symbol & 1;
symbol >>= 1;
price += GET_PRICEa(probs[m], bit);
m = (m << 1) | bit;
}
return price;
}
static void LenEnc_Init(CLenEnc *p)
{
unsigned i;
for (i = 0; i < (LZMA_NUM_PB_STATES_MAX << (kLenNumLowBits + 1)); i++)
p->choice = p->choice2 = kProbInitValue;
for (i = 0; i < (LZMA_NUM_PB_STATES_MAX << kLenNumLowBits); i++)
p->low[i] = kProbInitValue;
for (i = 0; i < (LZMA_NUM_PB_STATES_MAX << kLenNumMidBits); i++)
p->mid[i] = kProbInitValue;
for (i = 0; i < kLenNumHighSymbols; i++)
p->high[i] = kProbInitValue;
}
static void LenEnc_Encode(CLenEnc *p, CRangeEnc *rc, unsigned sym, unsigned posState)
static void LenEnc_Encode(CLenEnc *p, CRangeEnc *rc, UInt32 symbol, UInt32 posState)
{
UInt32 range, ttt, newBound;
CLzmaProb *probs = p->low;
range = rc->range;
RC_BIT_PRE(rc, probs)
if (sym >= kLenNumLowSymbols)
if (symbol < kLenNumLowSymbols)
{
RC_BIT_1(rc, probs)
probs += kLenNumLowSymbols;
RC_BIT_PRE(rc, probs)
if (sym >= kLenNumLowSymbols * 2)
RangeEnc_EncodeBit(rc, &p->choice, 0);
RcTree_Encode(rc, p->low + (posState << kLenNumLowBits), kLenNumLowBits, symbol);
}
else
{
RangeEnc_EncodeBit(rc, &p->choice, 1);
if (symbol < kLenNumLowSymbols + kLenNumMidSymbols)
{
RC_BIT_1(rc, probs)
rc->range = range;
// RcTree_Encode(rc, p->high, kLenNumHighBits, sym - kLenNumLowSymbols * 2);
LitEnc_Encode(rc, p->high, sym - kLenNumLowSymbols * 2);
RangeEnc_EncodeBit(rc, &p->choice2, 0);
RcTree_Encode(rc, p->mid + (posState << kLenNumMidBits), kLenNumMidBits, symbol - kLenNumLowSymbols);
}
else
{
RangeEnc_EncodeBit(rc, &p->choice2, 1);
RcTree_Encode(rc, p->high, kLenNumHighBits, symbol - kLenNumLowSymbols - kLenNumMidSymbols);
}
}
}
static void LenEnc_SetPrices(CLenEnc *p, UInt32 posState, UInt32 numSymbols, UInt32 *prices, UInt32 *ProbPrices)
{
UInt32 a0 = GET_PRICE_0a(p->choice);
UInt32 a1 = GET_PRICE_1a(p->choice);
UInt32 b0 = a1 + GET_PRICE_0a(p->choice2);
UInt32 b1 = a1 + GET_PRICE_1a(p->choice2);
UInt32 i = 0;
for (i = 0; i < kLenNumLowSymbols; i++)
{
if (i >= numSymbols)
return;
}
sym -= kLenNumLowSymbols;
prices[i] = a0 + RcTree_GetPrice(p->low + (posState << kLenNumLowBits), kLenNumLowBits, i, ProbPrices);
}
// RcTree_Encode(rc, probs + (posState << kLenNumLowBits), kLenNumLowBits, sym);
for (; i < kLenNumLowSymbols + kLenNumMidSymbols; i++)
{
unsigned m;
unsigned bit;
RC_BIT_0(rc, probs)
probs += (posState << (1 + kLenNumLowBits));
bit = (sym >> 2) ; RC_BIT(rc, probs + 1, bit) m = (1 << 1) + bit;
bit = (sym >> 1) & 1; RC_BIT(rc, probs + m, bit) m = (m << 1) + bit;
bit = sym & 1; RC_BIT(rc, probs + m, bit)
rc->range = range;
if (i >= numSymbols)
return;
prices[i] = b0 + RcTree_GetPrice(p->mid + (posState << kLenNumMidBits), kLenNumMidBits, i - kLenNumLowSymbols, ProbPrices);
}
for (; i < numSymbols; i++)
prices[i] = b1 + RcTree_GetPrice(p->high, kLenNumHighBits, i - kLenNumLowSymbols - kLenNumMidSymbols, ProbPrices);
}
static void SetPrices_3(const CLzmaProb *probs, UInt32 startPrice, UInt32 *prices, const CProbPrice *ProbPrices)
static void MY_FAST_CALL LenPriceEnc_UpdateTable(CLenPriceEnc *p, UInt32 posState, UInt32 *ProbPrices)
{
unsigned i;
for (i = 0; i < 8; i += 2)
{
UInt32 price = startPrice;
UInt32 prob;
price += GET_PRICEa(probs[1 ], (i >> 2));
price += GET_PRICEa(probs[2 + (i >> 2)], (i >> 1) & 1);
prob = probs[4 + (i >> 1)];
prices[i ] = price + GET_PRICEa_0(prob);
prices[i + 1] = price + GET_PRICEa_1(prob);
}
LenEnc_SetPrices(&p->p, posState, p->tableSize, p->prices[posState], ProbPrices);
p->counters[posState] = p->tableSize;
}
Z7_NO_INLINE static void Z7_FASTCALL LenPriceEnc_UpdateTables(
CLenPriceEnc *p,
unsigned numPosStates,
const CLenEnc *enc,
const CProbPrice *ProbPrices)
static void LenPriceEnc_UpdateTables(CLenPriceEnc *p, UInt32 numPosStates, UInt32 *ProbPrices)
{
UInt32 b;
{
unsigned prob = enc->low[0];
UInt32 a, c;
unsigned posState;
b = GET_PRICEa_1(prob);
a = GET_PRICEa_0(prob);
c = b + GET_PRICEa_0(enc->low[kLenNumLowSymbols]);
for (posState = 0; posState < numPosStates; posState++)
{
UInt32 *prices = p->prices[posState];
const CLzmaProb *probs = enc->low + (posState << (1 + kLenNumLowBits));
SetPrices_3(probs, a, prices, ProbPrices);
SetPrices_3(probs + kLenNumLowSymbols, c, prices + kLenNumLowSymbols, ProbPrices);
}
}
/*
{
unsigned i;
UInt32 b;
a = GET_PRICEa_0(enc->low[0]);
for (i = 0; i < kLenNumLowSymbols; i++)
p->prices2[i] = a;
a = GET_PRICEa_1(enc->low[0]);
b = a + GET_PRICEa_0(enc->low[kLenNumLowSymbols]);
for (i = kLenNumLowSymbols; i < kLenNumLowSymbols * 2; i++)
p->prices2[i] = b;
a += GET_PRICEa_1(enc->low[kLenNumLowSymbols]);
}
*/
// p->counter = numSymbols;
// p->counter = 64;
{
unsigned i = p->tableSize;
if (i > kLenNumLowSymbols * 2)
{
const CLzmaProb *probs = enc->high;
UInt32 *prices = p->prices[0] + kLenNumLowSymbols * 2;
i -= kLenNumLowSymbols * 2 - 1;
i >>= 1;
b += GET_PRICEa_1(enc->low[kLenNumLowSymbols]);
do
{
/*
p->prices2[i] = a +
// RcTree_GetPrice(enc->high, kLenNumHighBits, i - kLenNumLowSymbols * 2, ProbPrices);
LitEnc_GetPrice(probs, i - kLenNumLowSymbols * 2, ProbPrices);
*/
// UInt32 price = a + RcTree_GetPrice(probs, kLenNumHighBits - 1, sym, ProbPrices);
unsigned sym = --i + (1 << (kLenNumHighBits - 1));
UInt32 price = b;
do
{
unsigned bit = sym & 1;
sym >>= 1;
price += GET_PRICEa(probs[sym], bit);
}
while (sym >= 2);
{
unsigned prob = probs[(size_t)i + (1 << (kLenNumHighBits - 1))];
prices[(size_t)i * 2 ] = price + GET_PRICEa_0(prob);
prices[(size_t)i * 2 + 1] = price + GET_PRICEa_1(prob);
}
}
while (i);
{
unsigned posState;
size_t num = (p->tableSize - kLenNumLowSymbols * 2) * sizeof(p->prices[0][0]);
for (posState = 1; posState < numPosStates; posState++)
memcpy(p->prices[posState] + kLenNumLowSymbols * 2, p->prices[0] + kLenNumLowSymbols * 2, num);
}
}
}
UInt32 posState;
for (posState = 0; posState < numPosStates; posState++)
LenPriceEnc_UpdateTable(p, posState, ProbPrices);
}
/*
static void LenEnc_Encode2(CLenPriceEnc *p, CRangeEnc *rc, UInt32 symbol, UInt32 posState, Bool updatePrice, UInt32 *ProbPrices)
{
LenEnc_Encode(&p->p, rc, symbol, posState);
if (updatePrice)
if (--p->counters[posState] == 0)
LenPriceEnc_UpdateTable(p, posState, ProbPrices);
}
static void MovePos(CLzmaEnc *p, UInt32 num)
{
#ifdef SHOW_STAT
g_STAT_OFFSET += num;
printf("\n MovePos %u", num);
ttt += num;
printf("\n MovePos %d", num);
#endif
*/
#define MOVE_POS(p, num) { \
p->additionalOffset += (num); \
p->matchFinder.Skip(p->matchFinderObj, (UInt32)(num)); }
if (num != 0)
{
p->additionalOffset += num;
p->matchFinder.Skip(p->matchFinderObj, num);
}
}
static unsigned ReadMatchDistances(CLzmaEnc *p, unsigned *numPairsRes)
static UInt32 ReadMatchDistances(CLzmaEnc *p, UInt32 *numDistancePairsRes)
{
unsigned numPairs;
p->additionalOffset++;
UInt32 lenRes = 0, numPairs;
p->numAvail = p->matchFinder.GetNumAvailableBytes(p->matchFinderObj);
{
const UInt32 *d = p->matchFinder.GetMatches(p->matchFinderObj, p->matches);
// if (!d) { p->mf_Failure = True; *numPairsRes = 0; return 0; }
numPairs = (unsigned)(d - p->matches);
}
*numPairsRes = numPairs;
numPairs = p->matchFinder.GetMatches(p->matchFinderObj, p->matches);
#ifdef SHOW_STAT
printf("\n i = %u numPairs = %u ", g_STAT_OFFSET, numPairs / 2);
g_STAT_OFFSET++;
printf("\n i = %d numPairs = %d ", ttt, numPairs / 2);
ttt++;
{
unsigned i;
UInt32 i;
for (i = 0; i < numPairs; i += 2)
printf("%2u %6u | ", p->matches[i], p->matches[i + 1]);
printf("%2d %6d | ", p->matches[i], p->matches[i + 1]);
}
#endif
if (numPairs == 0)
return 0;
if (numPairs > 0)
{
const unsigned len = p->matches[(size_t)numPairs - 2];
if (len != p->numFastBytes)
return len;
lenRes = p->matches[numPairs - 2];
if (lenRes == p->numFastBytes)
{
const Byte *pby = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1;
UInt32 distance = p->matches[numPairs - 1] + 1;
UInt32 numAvail = p->numAvail;
if (numAvail > LZMA_MATCH_LEN_MAX)
numAvail = LZMA_MATCH_LEN_MAX;
{
const Byte *p1 = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1;
const Byte *p2 = p1 + len;
const ptrdiff_t dif = (ptrdiff_t)-1 - (ptrdiff_t)p->matches[(size_t)numPairs - 1];
const Byte *lim = p1 + numAvail;
for (; p2 != lim && *p2 == p2[dif]; p2++)
{}
return (unsigned)(p2 - p1);
const Byte *pby2 = pby - distance;
for (; lenRes < numAvail && pby[lenRes] == pby2[lenRes]; lenRes++);
}
}
}
p->additionalOffset++;
*numDistancePairsRes = numPairs;
return lenRes;
}
#define MARK_LIT ((UInt32)(Int32)-1)
#define MakeAs_Lit(p) { (p)->dist = MARK_LIT; (p)->extra = 0; }
#define MakeAs_ShortRep(p) { (p)->dist = 0; (p)->extra = 0; }
#define IsShortRep(p) ((p)->dist == 0)
#define MakeAsChar(p) (p)->backPrev = (UInt32)(-1); (p)->prev1IsChar = False;
#define MakeAsShortRep(p) (p)->backPrev = 0; (p)->prev1IsChar = False;
#define IsShortRep(p) ((p)->backPrev == 0)
static UInt32 GetRepLen1Price(CLzmaEnc *p, UInt32 state, UInt32 posState)
{
return
GET_PRICE_0(p->isRepG0[state]) +
GET_PRICE_0(p->isRep0Long[state][posState]);
}
#define GetPrice_ShortRep(p, state, posState) \
( GET_PRICE_0(p->isRepG0[state]) + GET_PRICE_0(p->isRep0Long[state][posState]))
#define GetPrice_Rep_0(p, state, posState) ( \
GET_PRICE_1(p->isMatch[state][posState]) \
+ GET_PRICE_1(p->isRep0Long[state][posState])) \
+ GET_PRICE_1(p->isRep[state]) \
+ GET_PRICE_0(p->isRepG0[state])
Z7_FORCE_INLINE
static UInt32 GetPrice_PureRep(const CLzmaEnc *p, unsigned repIndex, size_t state, size_t posState)
static UInt32 GetPureRepPrice(CLzmaEnc *p, UInt32 repIndex, UInt32 state, UInt32 posState)
{
UInt32 price;
UInt32 prob = p->isRepG0[state];
if (repIndex == 0)
{
price = GET_PRICE_0(prob);
price = GET_PRICE_0(p->isRepG0[state]);
price += GET_PRICE_1(p->isRep0Long[state][posState]);
}
else
{
price = GET_PRICE_1(prob);
prob = p->isRepG1[state];
price = GET_PRICE_1(p->isRepG0[state]);
if (repIndex == 1)
price += GET_PRICE_0(prob);
price += GET_PRICE_0(p->isRepG1[state]);
else
{
price += GET_PRICE_1(prob);
price += GET_PRICE_1(p->isRepG1[state]);
price += GET_PRICE(p->isRepG2[state], repIndex - 2);
}
}
return price;
}
static unsigned Backward(CLzmaEnc *p, unsigned cur)
static UInt32 GetRepPrice(CLzmaEnc *p, UInt32 repIndex, UInt32 len, UInt32 state, UInt32 posState)
{
unsigned wr = cur + 1;
p->optEnd = wr;
for (;;)
{
UInt32 dist = p->opt[cur].dist;
unsigned len = (unsigned)p->opt[cur].len;
unsigned extra = (unsigned)p->opt[cur].extra;
cur -= len;
if (extra)
{
wr--;
p->opt[wr].len = (UInt32)len;
cur -= extra;
len = extra;
if (extra == 1)
{
p->opt[wr].dist = dist;
dist = MARK_LIT;
}
else
{
p->opt[wr].dist = 0;
len--;
wr--;
p->opt[wr].dist = MARK_LIT;
p->opt[wr].len = 1;
}
}
if (cur == 0)
{
p->backRes = dist;
p->optCur = wr;
return len;
}
wr--;
p->opt[wr].dist = dist;
p->opt[wr].len = (UInt32)len;
}
return p->repLenEnc.prices[posState][len - LZMA_MATCH_LEN_MIN] +
GetPureRepPrice(p, repIndex, state, posState);
}
#define LIT_PROBS(pos, prevByte) \
(p->litProbs + (UInt32)3 * (((((pos) << 8) + (prevByte)) & p->lpMask) << p->lc))
static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
static UInt32 Backward(CLzmaEnc *p, UInt32 *backRes, UInt32 cur)
{
unsigned last, cur;
UInt32 reps[LZMA_NUM_REPS];
unsigned repLens[LZMA_NUM_REPS];
UInt32 posMem = p->opt[cur].posPrev;
UInt32 backMem = p->opt[cur].backPrev;
p->optimumEndIndex = cur;
do
{
if (p->opt[cur].prev1IsChar)
{
MakeAsChar(&p->opt[posMem])
p->opt[posMem].posPrev = posMem - 1;
if (p->opt[cur].prev2)
{
p->opt[posMem - 1].prev1IsChar = False;
p->opt[posMem - 1].posPrev = p->opt[cur].posPrev2;
p->opt[posMem - 1].backPrev = p->opt[cur].backPrev2;
}
}
{
UInt32 posPrev = posMem;
UInt32 backCur = backMem;
backMem = p->opt[posPrev].backPrev;
posMem = p->opt[posPrev].posPrev;
p->opt[posPrev].backPrev = backCur;
p->opt[posPrev].posPrev = cur;
cur = posPrev;
}
}
while (cur != 0);
*backRes = p->opt[0].backPrev;
p->optimumCurrentIndex = p->opt[0].posPrev;
return p->optimumCurrentIndex;
}
#define LIT_PROBS(pos, prevByte) (p->litProbs + ((((pos) & p->lpMask) << p->lc) + ((prevByte) >> (8 - p->lc))) * 0x300)
static UInt32 GetOptimum(CLzmaEnc *p, UInt32 position, UInt32 *backRes)
{
UInt32 numAvail, mainLen, numPairs, repMaxIndex, i, posState, lenEnd, len, cur;
UInt32 matchPrice, repMatchPrice, normalMatchPrice;
UInt32 reps[LZMA_NUM_REPS], repLens[LZMA_NUM_REPS];
UInt32 *matches;
const Byte *data;
Byte curByte, matchByte;
if (p->optimumEndIndex != p->optimumCurrentIndex)
{
const COptimal *opt = &p->opt[p->optimumCurrentIndex];
UInt32 lenRes = opt->posPrev - p->optimumCurrentIndex;
*backRes = opt->backPrev;
p->optimumCurrentIndex = opt->posPrev;
return lenRes;
}
p->optimumCurrentIndex = p->optimumEndIndex = 0;
if (p->additionalOffset == 0)
mainLen = ReadMatchDistances(p, &numPairs);
else
{
mainLen = p->longestMatchLength;
numPairs = p->numPairs;
}
numAvail = p->numAvail;
if (numAvail < 2)
{
*backRes = (UInt32)(-1);
return 1;
}
if (numAvail > LZMA_MATCH_LEN_MAX)
numAvail = LZMA_MATCH_LEN_MAX;
data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1;
repMaxIndex = 0;
for (i = 0; i < LZMA_NUM_REPS; i++)
{
UInt32 lenTest;
const Byte *data2;
reps[i] = p->reps[i];
data2 = data - (reps[i] + 1);
if (data[0] != data2[0] || data[1] != data2[1])
{
repLens[i] = 0;
continue;
}
for (lenTest = 2; lenTest < numAvail && data[lenTest] == data2[lenTest]; lenTest++);
repLens[i] = lenTest;
if (lenTest > repLens[repMaxIndex])
repMaxIndex = i;
}
if (repLens[repMaxIndex] >= p->numFastBytes)
{
UInt32 lenRes;
*backRes = repMaxIndex;
lenRes = repLens[repMaxIndex];
MovePos(p, lenRes - 1);
return lenRes;
}
matches = p->matches;
if (mainLen >= p->numFastBytes)
{
*backRes = matches[numPairs - 1] + LZMA_NUM_REPS;
MovePos(p, mainLen - 1);
return mainLen;
}
curByte = *data;
matchByte = *(data - (reps[0] + 1));
if (mainLen < 2 && curByte != matchByte && repLens[repMaxIndex] < 2)
{
*backRes = (UInt32)-1;
return 1;
}
p->opt[0].state = (CState)p->state;
posState = (position & p->pbMask);
{
UInt32 numAvail;
unsigned numPairs, mainLen, repMaxIndex, i, posState;
UInt32 matchPrice, repMatchPrice;
const Byte *data;
Byte curByte, matchByte;
p->optCur = p->optEnd = 0;
if (p->additionalOffset == 0)
mainLen = ReadMatchDistances(p, &numPairs);
else
{
mainLen = p->longestMatchLen;
numPairs = p->numPairs;
}
numAvail = p->numAvail;
if (numAvail < 2)
{
p->backRes = MARK_LIT;
return 1;
}
if (numAvail > LZMA_MATCH_LEN_MAX)
numAvail = LZMA_MATCH_LEN_MAX;
data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1;
repMaxIndex = 0;
for (i = 0; i < LZMA_NUM_REPS; i++)
{
unsigned len;
const Byte *data2;
reps[i] = p->reps[i];
data2 = data - reps[i];
if (data[0] != data2[0] || data[1] != data2[1])
{
repLens[i] = 0;
continue;
}
for (len = 2; len < numAvail && data[len] == data2[len]; len++)
{}
repLens[i] = len;
if (len > repLens[repMaxIndex])
repMaxIndex = i;
if (len == LZMA_MATCH_LEN_MAX) // 21.03 : optimization
break;
}
if (repLens[repMaxIndex] >= p->numFastBytes)
{
unsigned len;
p->backRes = (UInt32)repMaxIndex;
len = repLens[repMaxIndex];
MOVE_POS(p, len - 1)
return len;
}
matches = p->matches;
#define MATCHES matches
// #define MATCHES p->matches
if (mainLen >= p->numFastBytes)
{
p->backRes = MATCHES[(size_t)numPairs - 1] + LZMA_NUM_REPS;
MOVE_POS(p, mainLen - 1)
return mainLen;
}
curByte = *data;
matchByte = *(data - reps[0]);
last = repLens[repMaxIndex];
if (last <= mainLen)
last = mainLen;
if (last < 2 && curByte != matchByte)
{
p->backRes = MARK_LIT;
return 1;
}
p->opt[0].state = (CState)p->state;
posState = (position & p->pbMask);
{
const CLzmaProb *probs = LIT_PROBS(position, *(data - 1));
p->opt[1].price = GET_PRICE_0(p->isMatch[p->state][posState]) +
(!IsLitState(p->state) ?
LitEnc_Matched_GetPrice(probs, curByte, matchByte, p->ProbPrices) :
const CLzmaProb *probs = LIT_PROBS(position, *(data - 1));
p->opt[1].price = GET_PRICE_0(p->isMatch[p->state][posState]) +
(!IsCharState(p->state) ?
LitEnc_GetPriceMatched(probs, curByte, matchByte, p->ProbPrices) :
LitEnc_GetPrice(probs, curByte, p->ProbPrices));
}
}
MakeAs_Lit(&p->opt[1])
matchPrice = GET_PRICE_1(p->isMatch[p->state][posState]);
repMatchPrice = matchPrice + GET_PRICE_1(p->isRep[p->state]);
// 18.06
if (matchByte == curByte && repLens[0] == 0)
{
UInt32 shortRepPrice = repMatchPrice + GetPrice_ShortRep(p, p->state, posState);
if (shortRepPrice < p->opt[1].price)
{
p->opt[1].price = shortRepPrice;
MakeAs_ShortRep(&p->opt[1])
}
if (last < 2)
{
p->backRes = p->opt[1].dist;
return 1;
}
}
p->opt[1].len = 1;
p->opt[0].reps[0] = reps[0];
p->opt[0].reps[1] = reps[1];
p->opt[0].reps[2] = reps[2];
p->opt[0].reps[3] = reps[3];
// ---------- REP ----------
for (i = 0; i < LZMA_NUM_REPS; i++)
{
unsigned repLen = repLens[i];
UInt32 price;
if (repLen < 2)
continue;
price = repMatchPrice + GetPrice_PureRep(p, i, p->state, posState);
do
{
UInt32 price2 = price + GET_PRICE_LEN(&p->repLenEnc, posState, repLen);
COptimal *opt = &p->opt[repLen];
if (price2 < opt->price)
{
opt->price = price2;
opt->len = (UInt32)repLen;
opt->dist = (UInt32)i;
opt->extra = 0;
}
}
while (--repLen >= 2);
}
// ---------- MATCH ----------
{
unsigned len = repLens[0] + 1;
if (len <= mainLen)
{
unsigned offs = 0;
UInt32 normalMatchPrice = matchPrice + GET_PRICE_0(p->isRep[p->state]);
MakeAsChar(&p->opt[1]);
if (len < 2)
len = 2;
else
while (len > MATCHES[offs])
offs += 2;
for (; ; len++)
{
COptimal *opt;
UInt32 dist = MATCHES[(size_t)offs + 1];
UInt32 price = normalMatchPrice + GET_PRICE_LEN(&p->lenEnc, posState, len);
unsigned lenToPosState = GetLenToPosState(len);
if (dist < kNumFullDistances)
price += p->distancesPrices[lenToPosState][dist & (kNumFullDistances - 1)];
else
{
unsigned slot;
GetPosSlot2(dist, slot)
price += p->alignPrices[dist & kAlignMask];
price += p->posSlotPrices[lenToPosState][slot];
}
opt = &p->opt[len];
if (price < opt->price)
{
opt->price = price;
opt->len = (UInt32)len;
opt->dist = dist + LZMA_NUM_REPS;
opt->extra = 0;
}
if (len == MATCHES[offs])
{
offs += 2;
if (offs == numPairs)
break;
}
}
matchPrice = GET_PRICE_1(p->isMatch[p->state][posState]);
repMatchPrice = matchPrice + GET_PRICE_1(p->isRep[p->state]);
if (matchByte == curByte)
{
UInt32 shortRepPrice = repMatchPrice + GetRepLen1Price(p, p->state, posState);
if (shortRepPrice < p->opt[1].price)
{
p->opt[1].price = shortRepPrice;
MakeAsShortRep(&p->opt[1]);
}
}
lenEnd = ((mainLen >= repLens[repMaxIndex]) ? mainLen : repLens[repMaxIndex]);
if (lenEnd < 2)
{
*backRes = p->opt[1].backPrev;
return 1;
}
p->opt[1].posPrev = 0;
for (i = 0; i < LZMA_NUM_REPS; i++)
p->opt[0].backs[i] = reps[i];
len = lenEnd;
do
p->opt[len--].price = kInfinityPrice;
while (len >= 2);
for (i = 0; i < LZMA_NUM_REPS; i++)
{
UInt32 repLen = repLens[i];
UInt32 price;
if (repLen < 2)
continue;
price = repMatchPrice + GetPureRepPrice(p, i, p->state, posState);
do
{
UInt32 curAndLenPrice = price + p->repLenEnc.prices[posState][repLen - 2];
COptimal *opt = &p->opt[repLen];
if (curAndLenPrice < opt->price)
{
opt->price = curAndLenPrice;
opt->posPrev = 0;
opt->backPrev = i;
opt->prev1IsChar = False;
}
}
while (--repLen >= 2);
}
cur = 0;
normalMatchPrice = matchPrice + GET_PRICE_0(p->isRep[p->state]);
len = ((repLens[0] >= 2) ? repLens[0] + 1 : 2);
if (len <= mainLen)
{
UInt32 offs = 0;
while (len > matches[offs])
offs += 2;
for (; ; len++)
{
COptimal *opt;
UInt32 distance = matches[offs + 1];
UInt32 curAndLenPrice = normalMatchPrice + p->lenEnc.prices[posState][len - LZMA_MATCH_LEN_MIN];
UInt32 lenToPosState = GetLenToPosState(len);
if (distance < kNumFullDistances)
curAndLenPrice += p->distancesPrices[lenToPosState][distance];
else
{
UInt32 slot;
GetPosSlot2(distance, slot);
curAndLenPrice += p->alignPrices[distance & kAlignMask] + p->posSlotPrices[lenToPosState][slot];
}
opt = &p->opt[len];
if (curAndLenPrice < opt->price)
{
opt->price = curAndLenPrice;
opt->posPrev = 0;
opt->backPrev = distance + LZMA_NUM_REPS;
opt->prev1IsChar = False;
}
if (len == matches[offs])
{
offs += 2;
if (offs == numPairs)
break;
}
}
}
cur = 0;
#ifdef SHOW_STAT2
/* if (position >= 0) */
if (position >= 0)
{
unsigned i;
printf("\n pos = %4X", position);
for (i = cur; i <= last; i++)
printf("\nprice[%4X] = %u", position - cur + i, p->opt[i].price);
for (i = cur; i <= lenEnd; i++)
printf("\nprice[%4X] = %d", position - cur + i, p->opt[i].price);
}
#endif
}
// ---------- Optimal Parsing ----------
for (;;)
{
unsigned numAvail;
UInt32 numAvailFull;
unsigned newLen, numPairs, prev, state, posState, startLen;
UInt32 litPrice, matchPrice, repMatchPrice;
BoolInt nextIsLit;
UInt32 numAvailFull, newLen, numPairs, posPrev, state, posState, startLen;
UInt32 curPrice, curAnd1Price, matchPrice, repMatchPrice;
Bool nextIsChar;
Byte curByte, matchByte;
const Byte *data;
COptimal *curOpt, *nextOpt;
COptimal *curOpt;
COptimal *nextOpt;
if (++cur == last)
break;
// 18.06
if (cur >= kNumOpts - 64)
{
unsigned j, best;
UInt32 price = p->opt[cur].price;
best = cur;
for (j = cur + 1; j <= last; j++)
{
UInt32 price2 = p->opt[j].price;
if (price >= price2)
{
price = price2;
best = j;
}
}
{
unsigned delta = best - cur;
if (delta != 0)
{
MOVE_POS(p, delta)
}
}
cur = best;
break;
}
cur++;
if (cur == lenEnd)
return Backward(p, backRes, cur);
newLen = ReadMatchDistances(p, &numPairs);
if (newLen >= p->numFastBytes)
{
p->numPairs = numPairs;
p->longestMatchLen = newLen;
break;
p->longestMatchLength = newLen;
return Backward(p, backRes, cur);
}
curOpt = &p->opt[cur];
position++;
// we need that check here, if skip_items in p->opt are possible
/*
if (curOpt->price >= kInfinityPrice)
continue;
*/
prev = cur - curOpt->len;
if (curOpt->len == 1)
curOpt = &p->opt[cur];
posPrev = curOpt->posPrev;
if (curOpt->prev1IsChar)
{
posPrev--;
if (curOpt->prev2)
{
state = p->opt[curOpt->posPrev2].state;
if (curOpt->backPrev2 < LZMA_NUM_REPS)
state = kRepNextStates[state];
else
state = kMatchNextStates[state];
}
else
state = p->opt[posPrev].state;
state = kLiteralNextStates[state];
}
else
state = p->opt[posPrev].state;
if (posPrev == cur - 1)
{
state = (unsigned)p->opt[prev].state;
if (IsShortRep(curOpt))
state = kShortRepNextStates[state];
else
@@ -1510,499 +1192,355 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
}
else
{
UInt32 pos;
const COptimal *prevOpt;
UInt32 b0;
UInt32 dist = curOpt->dist;
if (curOpt->extra)
if (curOpt->prev1IsChar && curOpt->prev2)
{
prev -= (unsigned)curOpt->extra;
state = kState_RepAfterLit;
if (curOpt->extra == 1)
state = (dist < LZMA_NUM_REPS ? kState_RepAfterLit : kState_MatchAfterLit);
posPrev = curOpt->posPrev2;
pos = curOpt->backPrev2;
state = kRepNextStates[state];
}
else
{
state = (unsigned)p->opt[prev].state;
if (dist < LZMA_NUM_REPS)
pos = curOpt->backPrev;
if (pos < LZMA_NUM_REPS)
state = kRepNextStates[state];
else
state = kMatchNextStates[state];
}
prevOpt = &p->opt[prev];
b0 = prevOpt->reps[0];
if (dist < LZMA_NUM_REPS)
prevOpt = &p->opt[posPrev];
if (pos < LZMA_NUM_REPS)
{
if (dist == 0)
{
reps[0] = b0;
reps[1] = prevOpt->reps[1];
reps[2] = prevOpt->reps[2];
reps[3] = prevOpt->reps[3];
}
else
{
reps[1] = b0;
b0 = prevOpt->reps[1];
if (dist == 1)
{
reps[0] = b0;
reps[2] = prevOpt->reps[2];
reps[3] = prevOpt->reps[3];
}
else
{
reps[2] = b0;
reps[0] = prevOpt->reps[dist];
reps[3] = prevOpt->reps[dist ^ 1];
}
}
UInt32 i;
reps[0] = prevOpt->backs[pos];
for (i = 1; i <= pos; i++)
reps[i] = prevOpt->backs[i - 1];
for (; i < LZMA_NUM_REPS; i++)
reps[i] = prevOpt->backs[i];
}
else
{
reps[0] = (dist - LZMA_NUM_REPS + 1);
reps[1] = b0;
reps[2] = prevOpt->reps[1];
reps[3] = prevOpt->reps[2];
UInt32 i;
reps[0] = (pos - LZMA_NUM_REPS);
for (i = 1; i < LZMA_NUM_REPS; i++)
reps[i] = prevOpt->backs[i - 1];
}
}
curOpt->state = (CState)state;
curOpt->reps[0] = reps[0];
curOpt->reps[1] = reps[1];
curOpt->reps[2] = reps[2];
curOpt->reps[3] = reps[3];
curOpt->backs[0] = reps[0];
curOpt->backs[1] = reps[1];
curOpt->backs[2] = reps[2];
curOpt->backs[3] = reps[3];
curPrice = curOpt->price;
nextIsChar = False;
data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1;
curByte = *data;
matchByte = *(data - reps[0]);
matchByte = *(data - (reps[0] + 1));
posState = (position & p->pbMask);
/*
The order of Price checks:
< LIT
<= SHORT_REP
< LIT : REP_0
< REP [ : LIT : REP_0 ]
< MATCH [ : LIT : REP_0 ]
*/
{
UInt32 curPrice = curOpt->price;
unsigned prob = p->isMatch[state][posState];
matchPrice = curPrice + GET_PRICE_1(prob);
litPrice = curPrice + GET_PRICE_0(prob);
}
nextOpt = &p->opt[(size_t)cur + 1];
nextIsLit = False;
// here we can allow skip_items in p->opt, if we don't check (nextOpt->price < kInfinityPrice)
// 18.new.06
if ((nextOpt->price < kInfinityPrice
// && !IsLitState(state)
&& matchByte == curByte)
|| litPrice > nextOpt->price
)
litPrice = 0;
else
curAnd1Price = curPrice + GET_PRICE_0(p->isMatch[state][posState]);
{
const CLzmaProb *probs = LIT_PROBS(position, *(data - 1));
litPrice += (!IsLitState(state) ?
LitEnc_Matched_GetPrice(probs, curByte, matchByte, p->ProbPrices) :
curAnd1Price +=
(!IsCharState(state) ?
LitEnc_GetPriceMatched(probs, curByte, matchByte, p->ProbPrices) :
LitEnc_GetPrice(probs, curByte, p->ProbPrices));
if (litPrice < nextOpt->price)
{
nextOpt->price = litPrice;
nextOpt->len = 1;
MakeAs_Lit(nextOpt)
nextIsLit = True;
}
}
nextOpt = &p->opt[cur + 1];
if (curAnd1Price < nextOpt->price)
{
nextOpt->price = curAnd1Price;
nextOpt->posPrev = cur;
MakeAsChar(nextOpt);
nextIsChar = True;
}
matchPrice = curPrice + GET_PRICE_1(p->isMatch[state][posState]);
repMatchPrice = matchPrice + GET_PRICE_1(p->isRep[state]);
numAvailFull = p->numAvail;
if (matchByte == curByte && !(nextOpt->posPrev < cur && nextOpt->backPrev == 0))
{
unsigned temp = kNumOpts - 1 - cur;
if (numAvailFull > temp)
numAvailFull = (UInt32)temp;
}
// 18.06
// ---------- SHORT_REP ----------
if (IsLitState(state)) // 18.new
if (matchByte == curByte)
if (repMatchPrice < nextOpt->price) // 18.new
// if (numAvailFull < 2 || data[1] != *(data - reps[0] + 1))
if (
// nextOpt->price >= kInfinityPrice ||
nextOpt->len < 2 // we can check nextOpt->len, if skip items are not allowed in p->opt
|| (nextOpt->dist != 0
// && nextOpt->extra <= 1 // 17.old
)
)
{
UInt32 shortRepPrice = repMatchPrice + GetPrice_ShortRep(p, state, posState);
// if (shortRepPrice <= nextOpt->price) // 17.old
if (shortRepPrice < nextOpt->price) // 18.new
UInt32 shortRepPrice = repMatchPrice + GetRepLen1Price(p, state, posState);
if (shortRepPrice <= nextOpt->price)
{
nextOpt->price = shortRepPrice;
nextOpt->len = 1;
MakeAs_ShortRep(nextOpt)
nextIsLit = False;
nextOpt->posPrev = cur;
MakeAsShortRep(nextOpt);
nextIsChar = True;
}
}
numAvailFull = p->numAvail;
{
UInt32 temp = kNumOpts - 1 - cur;
if (temp < numAvailFull)
numAvailFull = temp;
}
if (numAvailFull < 2)
continue;
numAvail = (numAvailFull <= p->numFastBytes ? numAvailFull : p->numFastBytes);
// numAvail <= p->numFastBytes
// ---------- LIT : REP_0 ----------
if (!nextIsLit
&& litPrice != 0 // 18.new
&& matchByte != curByte
&& numAvailFull > 2)
if (!nextIsChar && matchByte != curByte) /* speed optimization */
{
const Byte *data2 = data - reps[0];
if (data[1] == data2[1] && data[2] == data2[2])
/* try Literal + rep0 */
UInt32 temp;
UInt32 lenTest2;
const Byte *data2 = data - (reps[0] + 1);
UInt32 limit = p->numFastBytes + 1;
if (limit > numAvailFull)
limit = numAvailFull;
for (temp = 1; temp < limit && data[temp] == data2[temp]; temp++);
lenTest2 = temp - 1;
if (lenTest2 >= 2)
{
unsigned len;
unsigned limit = p->numFastBytes + 1;
if (limit > numAvailFull)
limit = numAvailFull;
for (len = 3; len < limit && data[len] == data2[len]; len++)
{}
UInt32 state2 = kLiteralNextStates[state];
UInt32 posStateNext = (position + 1) & p->pbMask;
UInt32 nextRepMatchPrice = curAnd1Price +
GET_PRICE_1(p->isMatch[state2][posStateNext]) +
GET_PRICE_1(p->isRep[state2]);
/* for (; lenTest2 >= 2; lenTest2--) */
{
unsigned state2 = kLiteralNextStates[state];
unsigned posState2 = (position + 1) & p->pbMask;
UInt32 price = litPrice + GetPrice_Rep_0(p, state2, posState2);
UInt32 curAndLenPrice;
COptimal *opt;
UInt32 offset = cur + 1 + lenTest2;
while (lenEnd < offset)
p->opt[++lenEnd].price = kInfinityPrice;
curAndLenPrice = nextRepMatchPrice + GetRepPrice(p, 0, lenTest2, state2, posStateNext);
opt = &p->opt[offset];
if (curAndLenPrice < opt->price)
{
unsigned offset = cur + len;
if (last < offset)
last = offset;
// do
{
UInt32 price2;
COptimal *opt;
len--;
// price2 = price + GetPrice_Len_Rep_0(p, len, state2, posState2);
price2 = price + GET_PRICE_LEN(&p->repLenEnc, posState2, len);
opt = &p->opt[offset];
// offset--;
if (price2 < opt->price)
{
opt->price = price2;
opt->len = (UInt32)len;
opt->dist = 0;
opt->extra = 1;
}
}
// while (len >= 3);
opt->price = curAndLenPrice;
opt->posPrev = cur + 1;
opt->backPrev = 0;
opt->prev1IsChar = True;
opt->prev2 = False;
}
}
}
}
startLen = 2; /* speed optimization */
{
// ---------- REP ----------
unsigned repIndex = 0; // 17.old
// unsigned repIndex = IsLitState(state) ? 0 : 1; // 18.notused
for (; repIndex < LZMA_NUM_REPS; repIndex++)
UInt32 repIndex;
for (repIndex = 0; repIndex < LZMA_NUM_REPS; repIndex++)
{
UInt32 lenTest;
UInt32 lenTestTemp;
UInt32 price;
const Byte *data2 = data - (reps[repIndex] + 1);
if (data[0] != data2[0] || data[1] != data2[1])
continue;
for (lenTest = 2; lenTest < numAvail && data[lenTest] == data2[lenTest]; lenTest++);
while (lenEnd < cur + lenTest)
p->opt[++lenEnd].price = kInfinityPrice;
lenTestTemp = lenTest;
price = repMatchPrice + GetPureRepPrice(p, repIndex, state, posState);
do
{
unsigned len;
UInt32 price;
const Byte *data2 = data - reps[repIndex];
if (data[0] != data2[0] || data[1] != data2[1])
continue;
for (len = 2; len < numAvail && data[len] == data2[len]; len++)
{}
// if (len < startLen) continue; // 18.new: speed optimization
UInt32 curAndLenPrice = price + p->repLenEnc.prices[posState][lenTest - 2];
COptimal *opt = &p->opt[cur + lenTest];
if (curAndLenPrice < opt->price)
{
unsigned offset = cur + len;
if (last < offset)
last = offset;
}
{
unsigned len2 = len;
price = repMatchPrice + GetPrice_PureRep(p, repIndex, state, posState);
do
{
UInt32 price2 = price + GET_PRICE_LEN(&p->repLenEnc, posState, len2);
COptimal *opt = &p->opt[cur + len2];
if (price2 < opt->price)
{
opt->price = price2;
opt->len = (UInt32)len2;
opt->dist = (UInt32)repIndex;
opt->extra = 0;
}
}
while (--len2 >= 2);
}
if (repIndex == 0) startLen = len + 1; // 17.old
// startLen = len + 1; // 18.new
/* if (_maxMode) */
{
// ---------- REP : LIT : REP_0 ----------
// numFastBytes + 1 + numFastBytes
unsigned len2 = len + 1;
unsigned limit = len2 + p->numFastBytes;
if (limit > numAvailFull)
limit = numAvailFull;
len2 += 2;
if (len2 <= limit)
if (data[len2 - 2] == data2[len2 - 2])
if (data[len2 - 1] == data2[len2 - 1])
{
unsigned state2 = kRepNextStates[state];
unsigned posState2 = (position + len) & p->pbMask;
price += GET_PRICE_LEN(&p->repLenEnc, posState, len)
+ GET_PRICE_0(p->isMatch[state2][posState2])
+ LitEnc_Matched_GetPrice(LIT_PROBS(position + len, data[(size_t)len - 1]),
data[len], data2[len], p->ProbPrices);
// state2 = kLiteralNextStates[state2];
state2 = kState_LitAfterRep;
posState2 = (posState2 + 1) & p->pbMask;
price += GetPrice_Rep_0(p, state2, posState2);
for (; len2 < limit && data[len2] == data2[len2]; len2++)
{}
len2 -= len;
// if (len2 >= 3)
{
{
unsigned offset = cur + len + len2;
if (last < offset)
last = offset;
// do
{
UInt32 price2;
COptimal *opt;
len2--;
// price2 = price + GetPrice_Len_Rep_0(p, len2, state2, posState2);
price2 = price + GET_PRICE_LEN(&p->repLenEnc, posState2, len2);
opt = &p->opt[offset];
// offset--;
if (price2 < opt->price)
{
opt->price = price2;
opt->len = (UInt32)len2;
opt->extra = (CExtra)(len + 1);
opt->dist = (UInt32)repIndex;
}
}
// while (len2 >= 3);
}
}
}
opt->price = curAndLenPrice;
opt->posPrev = cur;
opt->backPrev = repIndex;
opt->prev1IsChar = False;
}
}
while (--lenTest >= 2);
lenTest = lenTestTemp;
if (repIndex == 0)
startLen = lenTest + 1;
/* if (_maxMode) */
{
UInt32 lenTest2 = lenTest + 1;
UInt32 limit = lenTest2 + p->numFastBytes;
UInt32 nextRepMatchPrice;
if (limit > numAvailFull)
limit = numAvailFull;
for (; lenTest2 < limit && data[lenTest2] == data2[lenTest2]; lenTest2++);
lenTest2 -= lenTest + 1;
if (lenTest2 >= 2)
{
UInt32 state2 = kRepNextStates[state];
UInt32 posStateNext = (position + lenTest) & p->pbMask;
UInt32 curAndLenCharPrice =
price + p->repLenEnc.prices[posState][lenTest - 2] +
GET_PRICE_0(p->isMatch[state2][posStateNext]) +
LitEnc_GetPriceMatched(LIT_PROBS(position + lenTest, data[lenTest - 1]),
data[lenTest], data2[lenTest], p->ProbPrices);
state2 = kLiteralNextStates[state2];
posStateNext = (position + lenTest + 1) & p->pbMask;
nextRepMatchPrice = curAndLenCharPrice +
GET_PRICE_1(p->isMatch[state2][posStateNext]) +
GET_PRICE_1(p->isRep[state2]);
/* for (; lenTest2 >= 2; lenTest2--) */
{
UInt32 curAndLenPrice;
COptimal *opt;
UInt32 offset = cur + lenTest + 1 + lenTest2;
while (lenEnd < offset)
p->opt[++lenEnd].price = kInfinityPrice;
curAndLenPrice = nextRepMatchPrice + GetRepPrice(p, 0, lenTest2, state2, posStateNext);
opt = &p->opt[offset];
if (curAndLenPrice < opt->price)
{
opt->price = curAndLenPrice;
opt->posPrev = cur + lenTest + 1;
opt->backPrev = 0;
opt->prev1IsChar = True;
opt->prev2 = True;
opt->posPrev2 = cur;
opt->backPrev2 = repIndex;
}
}
}
}
}
// ---------- MATCH ----------
/* for (unsigned len = 2; len <= newLen; len++) */
}
/* for (UInt32 lenTest = 2; lenTest <= newLen; lenTest++) */
if (newLen > numAvail)
{
newLen = numAvail;
for (numPairs = 0; newLen > MATCHES[numPairs]; numPairs += 2);
MATCHES[numPairs] = (UInt32)newLen;
for (numPairs = 0; newLen > matches[numPairs]; numPairs += 2);
matches[numPairs] = newLen;
numPairs += 2;
}
// startLen = 2; /* speed optimization */
if (newLen >= startLen)
{
UInt32 normalMatchPrice = matchPrice + GET_PRICE_0(p->isRep[state]);
UInt32 dist;
unsigned offs, posSlot, len;
{
unsigned offset = cur + newLen;
if (last < offset)
last = offset;
}
UInt32 offs, curBack, posSlot;
UInt32 lenTest;
while (lenEnd < cur + newLen)
p->opt[++lenEnd].price = kInfinityPrice;
offs = 0;
while (startLen > MATCHES[offs])
while (startLen > matches[offs])
offs += 2;
dist = MATCHES[(size_t)offs + 1];
// if (dist >= kNumFullDistances)
GetPosSlot2(dist, posSlot)
for (len = /*2*/ startLen; ; len++)
curBack = matches[offs + 1];
GetPosSlot2(curBack, posSlot);
for (lenTest = /*2*/ startLen; ; lenTest++)
{
UInt32 price = normalMatchPrice + GET_PRICE_LEN(&p->lenEnc, posState, len);
UInt32 curAndLenPrice = normalMatchPrice + p->lenEnc.prices[posState][lenTest - LZMA_MATCH_LEN_MIN];
UInt32 lenToPosState = GetLenToPosState(lenTest);
COptimal *opt;
if (curBack < kNumFullDistances)
curAndLenPrice += p->distancesPrices[lenToPosState][curBack];
else
curAndLenPrice += p->posSlotPrices[lenToPosState][posSlot] + p->alignPrices[curBack & kAlignMask];
opt = &p->opt[cur + lenTest];
if (curAndLenPrice < opt->price)
{
COptimal *opt;
unsigned lenNorm = len - 2;
lenNorm = GetLenToPosState2(lenNorm);
if (dist < kNumFullDistances)
price += p->distancesPrices[lenNorm][dist & (kNumFullDistances - 1)];
else
price += p->posSlotPrices[lenNorm][posSlot] + p->alignPrices[dist & kAlignMask];
opt = &p->opt[cur + len];
if (price < opt->price)
{
opt->price = price;
opt->len = (UInt32)len;
opt->dist = dist + LZMA_NUM_REPS;
opt->extra = 0;
}
opt->price = curAndLenPrice;
opt->posPrev = cur;
opt->backPrev = curBack + LZMA_NUM_REPS;
opt->prev1IsChar = False;
}
if (len == MATCHES[offs])
if (/*_maxMode && */lenTest == matches[offs])
{
// if (p->_maxMode) {
// MATCH : LIT : REP_0
const Byte *data2 = data - dist - 1;
unsigned len2 = len + 1;
unsigned limit = len2 + p->numFastBytes;
/* Try Match + Literal + Rep0 */
const Byte *data2 = data - (curBack + 1);
UInt32 lenTest2 = lenTest + 1;
UInt32 limit = lenTest2 + p->numFastBytes;
UInt32 nextRepMatchPrice;
if (limit > numAvailFull)
limit = numAvailFull;
len2 += 2;
if (len2 <= limit)
if (data[len2 - 2] == data2[len2 - 2])
if (data[len2 - 1] == data2[len2 - 1])
for (; lenTest2 < limit && data[lenTest2] == data2[lenTest2]; lenTest2++);
lenTest2 -= lenTest + 1;
if (lenTest2 >= 2)
{
for (; len2 < limit && data[len2] == data2[len2]; len2++)
{}
len2 -= len;
// if (len2 >= 3)
{
unsigned state2 = kMatchNextStates[state];
unsigned posState2 = (position + len) & p->pbMask;
unsigned offset;
price += GET_PRICE_0(p->isMatch[state2][posState2]);
price += LitEnc_Matched_GetPrice(LIT_PROBS(position + len, data[(size_t)len - 1]),
data[len], data2[len], p->ProbPrices);
// state2 = kLiteralNextStates[state2];
state2 = kState_LitAfterMatch;
posState2 = (posState2 + 1) & p->pbMask;
price += GetPrice_Rep_0(p, state2, posState2);
offset = cur + len + len2;
if (last < offset)
last = offset;
// do
UInt32 state2 = kMatchNextStates[state];
UInt32 posStateNext = (position + lenTest) & p->pbMask;
UInt32 curAndLenCharPrice = curAndLenPrice +
GET_PRICE_0(p->isMatch[state2][posStateNext]) +
LitEnc_GetPriceMatched(LIT_PROBS(position + lenTest, data[lenTest - 1]),
data[lenTest], data2[lenTest], p->ProbPrices);
state2 = kLiteralNextStates[state2];
posStateNext = (posStateNext + 1) & p->pbMask;
nextRepMatchPrice = curAndLenCharPrice +
GET_PRICE_1(p->isMatch[state2][posStateNext]) +
GET_PRICE_1(p->isRep[state2]);
/* for (; lenTest2 >= 2; lenTest2--) */
{
UInt32 price2;
UInt32 offset = cur + lenTest + 1 + lenTest2;
UInt32 curAndLenPrice;
COptimal *opt;
len2--;
// price2 = price + GetPrice_Len_Rep_0(p, len2, state2, posState2);
price2 = price + GET_PRICE_LEN(&p->repLenEnc, posState2, len2);
while (lenEnd < offset)
p->opt[++lenEnd].price = kInfinityPrice;
curAndLenPrice = nextRepMatchPrice + GetRepPrice(p, 0, lenTest2, state2, posStateNext);
opt = &p->opt[offset];
// offset--;
if (price2 < opt->price)
if (curAndLenPrice < opt->price)
{
opt->price = price2;
opt->len = (UInt32)len2;
opt->extra = (CExtra)(len + 1);
opt->dist = dist + LZMA_NUM_REPS;
opt->price = curAndLenPrice;
opt->posPrev = cur + lenTest + 1;
opt->backPrev = 0;
opt->prev1IsChar = True;
opt->prev2 = True;
opt->posPrev2 = cur;
opt->backPrev2 = curBack + LZMA_NUM_REPS;
}
}
// while (len2 >= 3);
}
}
offs += 2;
if (offs == numPairs)
break;
dist = MATCHES[(size_t)offs + 1];
// if (dist >= kNumFullDistances)
GetPosSlot2(dist, posSlot)
curBack = matches[offs + 1];
if (curBack >= kNumFullDistances)
GetPosSlot2(curBack, posSlot);
}
}
}
}
do
p->opt[last].price = kInfinityPrice;
while (--last);
return Backward(p, cur);
}
#define ChangePair(smallDist, bigDist) (((bigDist) >> 7) > (smallDist))
static unsigned GetOptimumFast(CLzmaEnc *p)
static UInt32 GetOptimumFast(CLzmaEnc *p, UInt32 *backRes)
{
UInt32 numAvail, mainDist;
unsigned mainLen, numPairs, repIndex, repLen, i;
UInt32 numAvail, mainLen, mainDist, numPairs, repIndex, repLen, i;
const Byte *data;
const UInt32 *matches;
if (p->additionalOffset == 0)
mainLen = ReadMatchDistances(p, &numPairs);
else
{
mainLen = p->longestMatchLen;
mainLen = p->longestMatchLength;
numPairs = p->numPairs;
}
numAvail = p->numAvail;
p->backRes = MARK_LIT;
*backRes = (UInt32)-1;
if (numAvail < 2)
return 1;
// if (mainLen < 2 && p->state == 0) return 1; // 18.06.notused
if (numAvail > LZMA_MATCH_LEN_MAX)
numAvail = LZMA_MATCH_LEN_MAX;
data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1;
repLen = repIndex = 0;
for (i = 0; i < LZMA_NUM_REPS; i++)
{
unsigned len;
const Byte *data2 = data - p->reps[i];
UInt32 len;
const Byte *data2 = data - (p->reps[i] + 1);
if (data[0] != data2[0] || data[1] != data2[1])
continue;
for (len = 2; len < numAvail && data[len] == data2[len]; len++)
{}
for (len = 2; len < numAvail && data[len] == data2[len]; len++);
if (len >= p->numFastBytes)
{
p->backRes = (UInt32)i;
MOVE_POS(p, len - 1)
*backRes = i;
MovePos(p, len - 1);
return len;
}
if (len > repLen)
@@ -2012,182 +1550,98 @@ static unsigned GetOptimumFast(CLzmaEnc *p)
}
}
matches = p->matches;
if (mainLen >= p->numFastBytes)
{
p->backRes = p->matches[(size_t)numPairs - 1] + LZMA_NUM_REPS;
MOVE_POS(p, mainLen - 1)
*backRes = matches[numPairs - 1] + LZMA_NUM_REPS;
MovePos(p, mainLen - 1);
return mainLen;
}
mainDist = 0; /* for GCC */
if (mainLen >= 2)
{
mainDist = p->matches[(size_t)numPairs - 1];
while (numPairs > 2)
mainDist = matches[numPairs - 1];
while (numPairs > 2 && mainLen == matches[numPairs - 4] + 1)
{
UInt32 dist2;
if (mainLen != p->matches[(size_t)numPairs - 4] + 1)
break;
dist2 = p->matches[(size_t)numPairs - 3];
if (!ChangePair(dist2, mainDist))
if (!ChangePair(matches[numPairs - 3], mainDist))
break;
numPairs -= 2;
mainLen--;
mainDist = dist2;
mainLen = matches[numPairs - 2];
mainDist = matches[numPairs - 1];
}
if (mainLen == 2 && mainDist >= 0x80)
mainLen = 1;
}
if (repLen >= 2)
if ( repLen + 1 >= mainLen
|| (repLen + 2 >= mainLen && mainDist >= (1 << 9))
|| (repLen + 3 >= mainLen && mainDist >= (1 << 15)))
if (repLen >= 2 && (
(repLen + 1 >= mainLen) ||
(repLen + 2 >= mainLen && mainDist >= (1 << 9)) ||
(repLen + 3 >= mainLen && mainDist >= (1 << 15))))
{
p->backRes = (UInt32)repIndex;
MOVE_POS(p, repLen - 1)
*backRes = repIndex;
MovePos(p, repLen - 1);
return repLen;
}
if (mainLen < 2 || numAvail <= 2)
return 1;
p->longestMatchLength = ReadMatchDistances(p, &p->numPairs);
if (p->longestMatchLength >= 2)
{
unsigned len1 = ReadMatchDistances(p, &p->numPairs);
p->longestMatchLen = len1;
if (len1 >= 2)
{
UInt32 newDist = p->matches[(size_t)p->numPairs - 1];
if ( (len1 >= mainLen && newDist < mainDist)
|| (len1 == mainLen + 1 && !ChangePair(mainDist, newDist))
|| (len1 > mainLen + 1)
|| (len1 + 1 >= mainLen && mainLen >= 3 && ChangePair(newDist, mainDist)))
return 1;
}
UInt32 newDistance = matches[p->numPairs - 1];
if ((p->longestMatchLength >= mainLen && newDistance < mainDist) ||
(p->longestMatchLength == mainLen + 1 && !ChangePair(mainDist, newDistance)) ||
(p->longestMatchLength > mainLen + 1) ||
(p->longestMatchLength + 1 >= mainLen && mainLen >= 3 && ChangePair(newDistance, mainDist)))
return 1;
}
data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1;
for (i = 0; i < LZMA_NUM_REPS; i++)
{
unsigned len, limit;
const Byte *data2 = data - p->reps[i];
UInt32 len, limit;
const Byte *data2 = data - (p->reps[i] + 1);
if (data[0] != data2[0] || data[1] != data2[1])
continue;
limit = mainLen - 1;
for (len = 2;; len++)
{
if (len >= limit)
return 1;
if (data[len] != data2[len])
break;
}
}
p->backRes = mainDist + LZMA_NUM_REPS;
if (mainLen != 2)
{
MOVE_POS(p, mainLen - 2)
for (len = 2; len < limit && data[len] == data2[len]; len++);
if (len >= limit)
return 1;
}
*backRes = mainDist + LZMA_NUM_REPS;
MovePos(p, mainLen - 2);
return mainLen;
}
static void WriteEndMarker(CLzmaEnc *p, unsigned posState)
static void WriteEndMarker(CLzmaEnc *p, UInt32 posState)
{
UInt32 range;
range = p->rc.range;
{
UInt32 ttt, newBound;
CLzmaProb *prob = &p->isMatch[p->state][posState];
RC_BIT_PRE(&p->rc, prob)
RC_BIT_1(&p->rc, prob)
prob = &p->isRep[p->state];
RC_BIT_PRE(&p->rc, prob)
RC_BIT_0(&p->rc, prob)
}
UInt32 len;
RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][posState], 1);
RangeEnc_EncodeBit(&p->rc, &p->isRep[p->state], 0);
p->state = kMatchNextStates[p->state];
p->rc.range = range;
LenEnc_Encode(&p->lenProbs, &p->rc, 0, posState);
range = p->rc.range;
{
// RcTree_Encode_PosSlot(&p->rc, p->posSlotEncoder[0], (1 << kNumPosSlotBits) - 1);
CLzmaProb *probs = p->posSlotEncoder[0];
unsigned m = 1;
do
{
UInt32 ttt, newBound;
RC_BIT_PRE(p, probs + m)
RC_BIT_1(&p->rc, probs + m)
m = (m << 1) + 1;
}
while (m < (1 << kNumPosSlotBits));
}
{
// RangeEnc_EncodeDirectBits(&p->rc, ((UInt32)1 << (30 - kNumAlignBits)) - 1, 30 - kNumAlignBits); UInt32 range = p->range;
unsigned numBits = 30 - kNumAlignBits;
do
{
range >>= 1;
p->rc.low += range;
RC_NORM(&p->rc)
}
while (--numBits);
}
{
// RcTree_ReverseEncode(&p->rc, p->posAlignEncoder, kNumAlignBits, kAlignMask);
CLzmaProb *probs = p->posAlignEncoder;
unsigned m = 1;
do
{
UInt32 ttt, newBound;
RC_BIT_PRE(p, probs + m)
RC_BIT_1(&p->rc, probs + m)
m = (m << 1) + 1;
}
while (m < kAlignTableSize);
}
p->rc.range = range;
len = LZMA_MATCH_LEN_MIN;
LenEnc_Encode2(&p->lenEnc, &p->rc, len - LZMA_MATCH_LEN_MIN, posState, !p->fastMode, p->ProbPrices);
RcTree_Encode(&p->rc, p->posSlotEncoder[GetLenToPosState(len)], kNumPosSlotBits, (1 << kNumPosSlotBits) - 1);
RangeEnc_EncodeDirectBits(&p->rc, (((UInt32)1 << 30) - 1) >> kNumAlignBits, 30 - kNumAlignBits);
RcTree_ReverseEncode(&p->rc, p->posAlignEncoder, kNumAlignBits, kAlignMask);
}
static SRes CheckErrors(CLzmaEnc *p)
{
if (p->result != SZ_OK)
return p->result;
if (p->rc.res != SZ_OK)
p->result = SZ_ERROR_WRITE;
#ifndef Z7_ST
if (
// p->mf_Failure ||
(p->mtMode &&
( // p->matchFinderMt.failure_LZ_LZ ||
p->matchFinderMt.failure_LZ_BT))
)
{
p->result = MY_HRES_ERROR_INTERNAL_ERROR;
// printf("\nCheckErrors p->matchFinderMt.failureLZ\n");
}
#endif
if (MFB.result != SZ_OK)
if (p->matchFinderBase.result != SZ_OK)
p->result = SZ_ERROR_READ;
if (p->result != SZ_OK)
p->finished = True;
return p->result;
}
Z7_NO_INLINE static SRes Flush(CLzmaEnc *p, UInt32 nowPos)
static SRes Flush(CLzmaEnc *p, UInt32 nowPos)
{
/* ReleaseMFStream(); */
p->finished = True;
@@ -2198,140 +1652,61 @@ Z7_NO_INLINE static SRes Flush(CLzmaEnc *p, UInt32 nowPos)
return CheckErrors(p);
}
Z7_NO_INLINE static void FillAlignPrices(CLzmaEnc *p)
static void FillAlignPrices(CLzmaEnc *p)
{
unsigned i;
const CProbPrice *ProbPrices = p->ProbPrices;
const CLzmaProb *probs = p->posAlignEncoder;
// p->alignPriceCount = 0;
for (i = 0; i < kAlignTableSize / 2; i++)
{
UInt32 price = 0;
unsigned sym = i;
unsigned m = 1;
unsigned bit;
UInt32 prob;
bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[m], bit); m = (m << 1) + bit;
bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[m], bit); m = (m << 1) + bit;
bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[m], bit); m = (m << 1) + bit;
prob = probs[m];
p->alignPrices[i ] = price + GET_PRICEa_0(prob);
p->alignPrices[i + 8] = price + GET_PRICEa_1(prob);
// p->alignPrices[i] = RcTree_ReverseGetPrice(p->posAlignEncoder, kNumAlignBits, i, p->ProbPrices);
}
UInt32 i;
for (i = 0; i < kAlignTableSize; i++)
p->alignPrices[i] = RcTree_ReverseGetPrice(p->posAlignEncoder, kNumAlignBits, i, p->ProbPrices);
p->alignPriceCount = 0;
}
Z7_NO_INLINE static void FillDistancesPrices(CLzmaEnc *p)
static void FillDistancesPrices(CLzmaEnc *p)
{
// int y; for (y = 0; y < 100; y++) {
UInt32 tempPrices[kNumFullDistances];
unsigned i, lps;
UInt32 i, lenToPosState;
for (i = kStartPosModelIndex; i < kNumFullDistances; i++)
{
UInt32 posSlot = GetPosSlot1(i);
UInt32 footerBits = ((posSlot >> 1) - 1);
UInt32 base = ((2 | (posSlot & 1)) << footerBits);
tempPrices[i] = RcTree_ReverseGetPrice(p->posEncoders + base - posSlot - 1, footerBits, i - base, p->ProbPrices);
}
const CProbPrice *ProbPrices = p->ProbPrices;
for (lenToPosState = 0; lenToPosState < kNumLenToPosStates; lenToPosState++)
{
UInt32 posSlot;
const CLzmaProb *encoder = p->posSlotEncoder[lenToPosState];
UInt32 *posSlotPrices = p->posSlotPrices[lenToPosState];
for (posSlot = 0; posSlot < p->distTableSize; posSlot++)
posSlotPrices[posSlot] = RcTree_GetPrice(encoder, kNumPosSlotBits, posSlot, p->ProbPrices);
for (posSlot = kEndPosModelIndex; posSlot < p->distTableSize; posSlot++)
posSlotPrices[posSlot] += ((((posSlot >> 1) - 1) - kNumAlignBits) << kNumBitPriceShiftBits);
{
UInt32 *distancesPrices = p->distancesPrices[lenToPosState];
UInt32 i;
for (i = 0; i < kStartPosModelIndex; i++)
distancesPrices[i] = posSlotPrices[i];
for (; i < kNumFullDistances; i++)
distancesPrices[i] = posSlotPrices[GetPosSlot1(i)] + tempPrices[i];
}
}
p->matchPriceCount = 0;
for (i = kStartPosModelIndex / 2; i < kNumFullDistances / 2; i++)
{
unsigned posSlot = GetPosSlot1(i);
unsigned footerBits = (posSlot >> 1) - 1;
unsigned base = ((2 | (posSlot & 1)) << footerBits);
const CLzmaProb *probs = p->posEncoders + (size_t)base * 2;
// tempPrices[i] = RcTree_ReverseGetPrice(p->posEncoders + base, footerBits, i - base, p->ProbPrices);
UInt32 price = 0;
unsigned m = 1;
unsigned sym = i;
unsigned offset = (unsigned)1 << footerBits;
base += i;
if (footerBits)
do
{
unsigned bit = sym & 1;
sym >>= 1;
price += GET_PRICEa(probs[m], bit);
m = (m << 1) + bit;
}
while (--footerBits);
{
unsigned prob = probs[m];
tempPrices[base ] = price + GET_PRICEa_0(prob);
tempPrices[base + offset] = price + GET_PRICEa_1(prob);
}
}
for (lps = 0; lps < kNumLenToPosStates; lps++)
{
unsigned slot;
unsigned distTableSize2 = (p->distTableSize + 1) >> 1;
UInt32 *posSlotPrices = p->posSlotPrices[lps];
const CLzmaProb *probs = p->posSlotEncoder[lps];
for (slot = 0; slot < distTableSize2; slot++)
{
// posSlotPrices[slot] = RcTree_GetPrice(encoder, kNumPosSlotBits, slot, p->ProbPrices);
UInt32 price;
unsigned bit;
unsigned sym = slot + (1 << (kNumPosSlotBits - 1));
unsigned prob;
bit = sym & 1; sym >>= 1; price = GET_PRICEa(probs[sym], bit);
bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[sym], bit);
bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[sym], bit);
bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[sym], bit);
bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[sym], bit);
prob = probs[(size_t)slot + (1 << (kNumPosSlotBits - 1))];
posSlotPrices[(size_t)slot * 2 ] = price + GET_PRICEa_0(prob);
posSlotPrices[(size_t)slot * 2 + 1] = price + GET_PRICEa_1(prob);
}
{
UInt32 delta = ((UInt32)((kEndPosModelIndex / 2 - 1) - kNumAlignBits) << kNumBitPriceShiftBits);
for (slot = kEndPosModelIndex / 2; slot < distTableSize2; slot++)
{
posSlotPrices[(size_t)slot * 2 ] += delta;
posSlotPrices[(size_t)slot * 2 + 1] += delta;
delta += ((UInt32)1 << kNumBitPriceShiftBits);
}
}
{
UInt32 *dp = p->distancesPrices[lps];
dp[0] = posSlotPrices[0];
dp[1] = posSlotPrices[1];
dp[2] = posSlotPrices[2];
dp[3] = posSlotPrices[3];
for (i = 4; i < kNumFullDistances; i += 2)
{
UInt32 slotPrice = posSlotPrices[GetPosSlot1(i)];
dp[i ] = slotPrice + tempPrices[i];
dp[i + 1] = slotPrice + tempPrices[i + 1];
}
}
}
// }
}
static void LzmaEnc_Construct(CLzmaEnc *p)
void LzmaEnc_Construct(CLzmaEnc *p)
{
RangeEnc_Construct(&p->rc);
MatchFinder_Construct(&MFB);
#ifndef Z7_ST
p->matchFinderMt.MatchFinder = &MFB;
MatchFinder_Construct(&p->matchFinderBase);
#ifdef COMPRESS_MF_MT
MatchFinderMt_Construct(&p->matchFinderMt);
p->matchFinderMt.MatchFinder = &p->matchFinderBase;
#endif
{
CLzmaEncProps props;
LzmaEncProps_Init(&props);
LzmaEnc_SetProps((CLzmaEncHandle)(void *)p, &props);
LzmaEnc_SetProps(p, &props);
}
#ifndef LZMA_LOG_BSR
@@ -2339,370 +1714,226 @@ static void LzmaEnc_Construct(CLzmaEnc *p)
#endif
LzmaEnc_InitPriceTables(p->ProbPrices);
p->litProbs = NULL;
p->saveState.litProbs = NULL;
p->litProbs = 0;
p->saveState.litProbs = 0;
}
CLzmaEncHandle LzmaEnc_Create(ISzAllocPtr alloc)
CLzmaEncHandle LzmaEnc_Create(ISzAlloc *alloc)
{
void *p;
p = ISzAlloc_Alloc(alloc, sizeof(CLzmaEnc));
if (p)
p = alloc->Alloc(alloc, sizeof(CLzmaEnc));
if (p != 0)
LzmaEnc_Construct((CLzmaEnc *)p);
return p;
}
static void LzmaEnc_FreeLits(CLzmaEnc *p, ISzAllocPtr alloc)
void LzmaEnc_FreeLits(CLzmaEnc *p, ISzAlloc *alloc)
{
ISzAlloc_Free(alloc, p->litProbs);
ISzAlloc_Free(alloc, p->saveState.litProbs);
p->litProbs = NULL;
p->saveState.litProbs = NULL;
alloc->Free(alloc, p->litProbs);
alloc->Free(alloc, p->saveState.litProbs);
p->litProbs = 0;
p->saveState.litProbs = 0;
}
static void LzmaEnc_Destruct(CLzmaEnc *p, ISzAllocPtr alloc, ISzAllocPtr allocBig)
void LzmaEnc_Destruct(CLzmaEnc *p, ISzAlloc *alloc, ISzAlloc *allocBig)
{
#ifndef Z7_ST
#ifdef COMPRESS_MF_MT
MatchFinderMt_Destruct(&p->matchFinderMt, allocBig);
#endif
MatchFinder_Free(&MFB, allocBig);
MatchFinder_Free(&p->matchFinderBase, allocBig);
LzmaEnc_FreeLits(p, alloc);
RangeEnc_Free(&p->rc, alloc);
}
void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAllocPtr alloc, ISzAllocPtr allocBig)
void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAlloc *alloc, ISzAlloc *allocBig)
{
// GET_CLzmaEnc_p
LzmaEnc_Destruct(p, alloc, allocBig);
ISzAlloc_Free(alloc, p);
LzmaEnc_Destruct((CLzmaEnc *)p, alloc, allocBig);
alloc->Free(alloc, p);
}
Z7_NO_INLINE
static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpackSize)
static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, Bool useLimits, UInt32 maxPackSize, UInt32 maxUnpackSize)
{
UInt32 nowPos32, startPos32;
if (p->needInit)
if (p->inStream != 0)
{
#ifndef Z7_ST
if (p->mtMode)
{
RINOK(MatchFinderMt_InitMt(&p->matchFinderMt))
}
#endif
p->matchFinderBase.stream = p->inStream;
p->matchFinder.Init(p->matchFinderObj);
p->needInit = 0;
p->inStream = 0;
}
if (p->finished)
return p->result;
RINOK(CheckErrors(p))
RINOK(CheckErrors(p));
nowPos32 = (UInt32)p->nowPos64;
startPos32 = nowPos32;
if (p->nowPos64 == 0)
{
unsigned numPairs;
UInt32 numPairs;
Byte curByte;
if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) == 0)
return Flush(p, nowPos32);
ReadMatchDistances(p, &numPairs);
RangeEnc_EncodeBit_0(&p->rc, &p->isMatch[kState_Start][0]);
// p->state = kLiteralNextStates[p->state];
curByte = *(p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset);
RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][0], 0);
p->state = kLiteralNextStates[p->state];
curByte = p->matchFinder.GetIndexByte(p->matchFinderObj, 0 - p->additionalOffset);
LitEnc_Encode(&p->rc, p->litProbs, curByte);
p->additionalOffset--;
nowPos32++;
}
if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) != 0)
for (;;)
{
UInt32 dist;
unsigned len, posState;
UInt32 range, ttt, newBound;
CLzmaProb *probs;
if (p->fastMode)
len = GetOptimumFast(p);
else
{
unsigned oci = p->optCur;
if (p->optEnd == oci)
len = GetOptimum(p, nowPos32);
else
{
const COptimal *opt = &p->opt[oci];
len = opt->len;
p->backRes = opt->dist;
p->optCur = oci + 1;
}
}
UInt32 pos, len, posState;
posState = (unsigned)nowPos32 & p->pbMask;
range = p->rc.range;
probs = &p->isMatch[p->state][posState];
RC_BIT_PRE(&p->rc, probs)
dist = p->backRes;
if (p->fastMode)
len = GetOptimumFast(p, &pos);
else
len = GetOptimum(p, nowPos32, &pos);
#ifdef SHOW_STAT2
printf("\n pos = %6X, len = %3u pos = %6u", nowPos32, len, dist);
printf("\n pos = %4X, len = %d pos = %d", nowPos32, len, pos);
#endif
if (dist == MARK_LIT)
posState = nowPos32 & p->pbMask;
if (len == 1 && pos == (UInt32)-1)
{
Byte curByte;
CLzmaProb *probs;
const Byte *data;
unsigned state;
RC_BIT_0(&p->rc, probs)
p->rc.range = range;
RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][posState], 0);
data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset;
probs = LIT_PROBS(nowPos32, *(data - 1));
curByte = *data;
state = p->state;
p->state = kLiteralNextStates[state];
if (IsLitState(state))
probs = LIT_PROBS(nowPos32, *(data - 1));
if (IsCharState(p->state))
LitEnc_Encode(&p->rc, probs, curByte);
else
LitEnc_EncodeMatched(&p->rc, probs, curByte, *(data - p->reps[0]));
LitEnc_EncodeMatched(&p->rc, probs, curByte, *(data - p->reps[0] - 1));
p->state = kLiteralNextStates[p->state];
}
else
{
RC_BIT_1(&p->rc, probs)
probs = &p->isRep[p->state];
RC_BIT_PRE(&p->rc, probs)
if (dist < LZMA_NUM_REPS)
RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][posState], 1);
if (pos < LZMA_NUM_REPS)
{
RC_BIT_1(&p->rc, probs)
probs = &p->isRepG0[p->state];
RC_BIT_PRE(&p->rc, probs)
if (dist == 0)
RangeEnc_EncodeBit(&p->rc, &p->isRep[p->state], 1);
if (pos == 0)
{
RC_BIT_0(&p->rc, probs)
probs = &p->isRep0Long[p->state][posState];
RC_BIT_PRE(&p->rc, probs)
if (len != 1)
{
RC_BIT_1_BASE(&p->rc, probs)
}
else
{
RC_BIT_0_BASE(&p->rc, probs)
p->state = kShortRepNextStates[p->state];
}
RangeEnc_EncodeBit(&p->rc, &p->isRepG0[p->state], 0);
RangeEnc_EncodeBit(&p->rc, &p->isRep0Long[p->state][posState], ((len == 1) ? 0 : 1));
}
else
{
RC_BIT_1(&p->rc, probs)
probs = &p->isRepG1[p->state];
RC_BIT_PRE(&p->rc, probs)
if (dist == 1)
{
RC_BIT_0_BASE(&p->rc, probs)
dist = p->reps[1];
}
UInt32 distance = p->reps[pos];
RangeEnc_EncodeBit(&p->rc, &p->isRepG0[p->state], 1);
if (pos == 1)
RangeEnc_EncodeBit(&p->rc, &p->isRepG1[p->state], 0);
else
{
RC_BIT_1(&p->rc, probs)
probs = &p->isRepG2[p->state];
RC_BIT_PRE(&p->rc, probs)
if (dist == 2)
{
RC_BIT_0_BASE(&p->rc, probs)
dist = p->reps[2];
}
else
{
RC_BIT_1_BASE(&p->rc, probs)
dist = p->reps[3];
RangeEnc_EncodeBit(&p->rc, &p->isRepG1[p->state], 1);
RangeEnc_EncodeBit(&p->rc, &p->isRepG2[p->state], pos - 2);
if (pos == 3)
p->reps[3] = p->reps[2];
}
p->reps[2] = p->reps[1];
}
p->reps[1] = p->reps[0];
p->reps[0] = dist;
p->reps[0] = distance;
}
RC_NORM(&p->rc)
p->rc.range = range;
if (len != 1)
if (len == 1)
p->state = kShortRepNextStates[p->state];
else
{
LenEnc_Encode(&p->repLenProbs, &p->rc, len - LZMA_MATCH_LEN_MIN, posState);
--p->repLenEncCounter;
LenEnc_Encode2(&p->repLenEnc, &p->rc, len - LZMA_MATCH_LEN_MIN, posState, !p->fastMode, p->ProbPrices);
p->state = kRepNextStates[p->state];
}
}
else
{
unsigned posSlot;
RC_BIT_0(&p->rc, probs)
p->rc.range = range;
UInt32 posSlot;
RangeEnc_EncodeBit(&p->rc, &p->isRep[p->state], 0);
p->state = kMatchNextStates[p->state];
LenEnc_Encode2(&p->lenEnc, &p->rc, len - LZMA_MATCH_LEN_MIN, posState, !p->fastMode, p->ProbPrices);
pos -= LZMA_NUM_REPS;
GetPosSlot(pos, posSlot);
RcTree_Encode(&p->rc, p->posSlotEncoder[GetLenToPosState(len)], kNumPosSlotBits, posSlot);
if (posSlot >= kStartPosModelIndex)
{
UInt32 footerBits = ((posSlot >> 1) - 1);
UInt32 base = ((2 | (posSlot & 1)) << footerBits);
UInt32 posReduced = pos - base;
LenEnc_Encode(&p->lenProbs, &p->rc, len - LZMA_MATCH_LEN_MIN, posState);
// --p->lenEnc.counter;
dist -= LZMA_NUM_REPS;
if (posSlot < kEndPosModelIndex)
RcTree_ReverseEncode(&p->rc, p->posEncoders + base - posSlot - 1, footerBits, posReduced);
else
{
RangeEnc_EncodeDirectBits(&p->rc, posReduced >> kNumAlignBits, footerBits - kNumAlignBits);
RcTree_ReverseEncode(&p->rc, p->posAlignEncoder, kNumAlignBits, posReduced & kAlignMask);
p->alignPriceCount++;
}
}
p->reps[3] = p->reps[2];
p->reps[2] = p->reps[1];
p->reps[1] = p->reps[0];
p->reps[0] = dist + 1;
p->reps[0] = pos;
p->matchPriceCount++;
GetPosSlot(dist, posSlot)
// RcTree_Encode_PosSlot(&p->rc, p->posSlotEncoder[GetLenToPosState(len)], posSlot);
{
UInt32 sym = (UInt32)posSlot + (1 << kNumPosSlotBits);
range = p->rc.range;
probs = p->posSlotEncoder[GetLenToPosState(len)];
do
{
CLzmaProb *prob = probs + (sym >> kNumPosSlotBits);
UInt32 bit = (sym >> (kNumPosSlotBits - 1)) & 1;
sym <<= 1;
RC_BIT(&p->rc, prob, bit)
}
while (sym < (1 << kNumPosSlotBits * 2));
p->rc.range = range;
}
if (dist >= kStartPosModelIndex)
{
unsigned footerBits = ((posSlot >> 1) - 1);
if (dist < kNumFullDistances)
{
unsigned base = ((2 | (posSlot & 1)) << footerBits);
RcTree_ReverseEncode(&p->rc, p->posEncoders + base, footerBits, (unsigned)(dist /* - base */));
}
else
{
UInt32 pos2 = (dist | 0xF) << (32 - footerBits);
range = p->rc.range;
// RangeEnc_EncodeDirectBits(&p->rc, posReduced >> kNumAlignBits, footerBits - kNumAlignBits);
/*
do
{
range >>= 1;
p->rc.low += range & (0 - ((dist >> --footerBits) & 1));
RC_NORM(&p->rc)
}
while (footerBits > kNumAlignBits);
*/
do
{
range >>= 1;
p->rc.low += range & (0 - (pos2 >> 31));
pos2 += pos2;
RC_NORM(&p->rc)
}
while (pos2 != 0xF0000000);
// RcTree_ReverseEncode(&p->rc, p->posAlignEncoder, kNumAlignBits, posReduced & kAlignMask);
{
unsigned m = 1;
unsigned bit;
bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit) m = (m << 1) + bit;
bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit) m = (m << 1) + bit;
bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit) m = (m << 1) + bit;
bit = dist & 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit)
p->rc.range = range;
// p->alignPriceCount++;
}
}
}
}
}
nowPos32 += (UInt32)len;
p->additionalOffset -= len;
nowPos32 += len;
if (p->additionalOffset == 0)
{
UInt32 processed;
if (!p->fastMode)
{
/*
if (p->alignPriceCount >= 16) // kAlignTableSize
FillAlignPrices(p);
if (p->matchPriceCount >= 128)
if (p->matchPriceCount >= (1 << 7))
FillDistancesPrices(p);
if (p->lenEnc.counter <= 0)
LenPriceEnc_UpdateTables(&p->lenEnc, 1 << p->pb, &p->lenProbs, p->ProbPrices);
*/
if (p->matchPriceCount >= 64)
{
if (p->alignPriceCount >= kAlignTableSize)
FillAlignPrices(p);
// { int y; for (y = 0; y < 100; y++) {
FillDistancesPrices(p);
// }}
LenPriceEnc_UpdateTables(&p->lenEnc, (unsigned)1 << p->pb, &p->lenProbs, p->ProbPrices);
}
if (p->repLenEncCounter <= 0)
{
p->repLenEncCounter = REP_LEN_COUNT;
LenPriceEnc_UpdateTables(&p->repLenEnc, (unsigned)1 << p->pb, &p->repLenProbs, p->ProbPrices);
}
}
if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) == 0)
break;
processed = nowPos32 - startPos32;
if (maxPackSize)
if (useLimits)
{
if (processed + kNumOpts + 300 >= maxUnpackSize
|| RangeEnc_GetProcessed_sizet(&p->rc) + kPackReserve >= maxPackSize)
if (processed + kNumOpts + 300 >= maxUnpackSize ||
RangeEnc_GetProcessed(&p->rc) + kNumOpts * 2 >= maxPackSize)
break;
}
else if (processed >= (1 << 17))
else if (processed >= (1 << 15))
{
p->nowPos64 += nowPos32 - startPos32;
return CheckErrors(p);
}
}
}
p->nowPos64 += nowPos32 - startPos32;
return Flush(p, nowPos32);
}
#define kBigHashDicLimit ((UInt32)1 << 24)
static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig)
static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAlloc *alloc, ISzAlloc *allocBig)
{
UInt32 beforeSize = kNumOpts;
UInt32 dictSize;
if (!RangeEnc_Alloc(&p->rc, alloc))
return SZ_ERROR_MEM;
#ifndef Z7_ST
p->mtMode = (p->multiThread && !p->fastMode && (MFB.btMode != 0));
#ifdef COMPRESS_MF_MT
Bool btMode = (p->matchFinderBase.btMode != 0);;
p->mtMode = (p->multiThread && !p->fastMode && btMode);
#endif
{
unsigned lclp = p->lc + p->lp;
if (!p->litProbs || !p->saveState.litProbs || p->lclp != lclp)
if (p->litProbs == 0 || p->saveState.litProbs == 0 || p->lclp != lclp)
{
LzmaEnc_FreeLits(p, alloc);
p->litProbs = (CLzmaProb *)ISzAlloc_Alloc(alloc, ((UInt32)0x300 << lclp) * sizeof(CLzmaProb));
p->saveState.litProbs = (CLzmaProb *)ISzAlloc_Alloc(alloc, ((UInt32)0x300 << lclp) * sizeof(CLzmaProb));
if (!p->litProbs || !p->saveState.litProbs)
p->litProbs = (CLzmaProb *)alloc->Alloc(alloc, (0x300 << lclp) * sizeof(CLzmaProb));
p->saveState.litProbs = (CLzmaProb *)alloc->Alloc(alloc, (0x300 << lclp) * sizeof(CLzmaProb));
if (p->litProbs == 0 || p->saveState.litProbs == 0)
{
LzmaEnc_FreeLits(p, alloc);
return SZ_ERROR_MEM;
@@ -2711,71 +1942,42 @@ static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc,
}
}
MFB.bigHash = (Byte)(p->dictSize > kBigHashDicLimit ? 1 : 0);
p->matchFinderBase.bigHash = (p->dictSize > kBigHashDicLimit);
if (beforeSize + p->dictSize < keepWindowSize)
beforeSize = keepWindowSize - p->dictSize;
dictSize = p->dictSize;
if (dictSize == ((UInt32)2 << 30) ||
dictSize == ((UInt32)3 << 30))
{
/* 21.03 : here we reduce the dictionary for 2 reasons:
1) we don't want 32-bit back_distance matches in decoder for 2 GB dictionary.
2) we want to elimate useless last MatchFinder_Normalize3() for corner cases,
where data size is aligned for 1 GB: 5/6/8 GB.
That reducing must be >= 1 for such corner cases. */
dictSize -= 1;
}
if (beforeSize + dictSize < keepWindowSize)
beforeSize = keepWindowSize - dictSize;
/* in worst case we can look ahead for
max(LZMA_MATCH_LEN_MAX, numFastBytes + 1 + numFastBytes) bytes.
we send larger value for (keepAfter) to MantchFinder_Create():
(numFastBytes + LZMA_MATCH_LEN_MAX + 1)
*/
#ifndef Z7_ST
#ifdef COMPRESS_MF_MT
if (p->mtMode)
{
RINOK(MatchFinderMt_Create(&p->matchFinderMt, dictSize, beforeSize,
p->numFastBytes, LZMA_MATCH_LEN_MAX + 1 /* 18.04 */
, allocBig))
RINOK(MatchFinderMt_Create(&p->matchFinderMt, p->dictSize, beforeSize, p->numFastBytes, LZMA_MATCH_LEN_MAX, allocBig));
p->matchFinderObj = &p->matchFinderMt;
MFB.bigHash = (Byte)(MFB.hashMask >= 0xFFFFFF ? 1 : 0);
MatchFinderMt_CreateVTable(&p->matchFinderMt, &p->matchFinder);
}
else
#endif
{
if (!MatchFinder_Create(&MFB, dictSize, beforeSize,
p->numFastBytes, LZMA_MATCH_LEN_MAX + 1 /* 21.03 */
, allocBig))
if (!MatchFinder_Create(&p->matchFinderBase, p->dictSize, beforeSize, p->numFastBytes, LZMA_MATCH_LEN_MAX, allocBig))
return SZ_ERROR_MEM;
p->matchFinderObj = &MFB;
MatchFinder_CreateVTable(&MFB, &p->matchFinder);
p->matchFinderObj = &p->matchFinderBase;
MatchFinder_CreateVTable(&p->matchFinderBase, &p->matchFinder);
}
return SZ_OK;
}
static void LzmaEnc_Init(CLzmaEnc *p)
void LzmaEnc_Init(CLzmaEnc *p)
{
unsigned i;
UInt32 i;
p->state = 0;
p->reps[0] =
p->reps[1] =
p->reps[2] =
p->reps[3] = 1;
for (i = 0 ; i < LZMA_NUM_REPS; i++)
p->reps[i] = 0;
RangeEnc_Init(&p->rc);
for (i = 0; i < (1 << kNumAlignBits); i++)
p->posAlignEncoder[i] = kProbInitValue;
for (i = 0; i < kNumStates; i++)
{
unsigned j;
UInt32 j;
for (j = 0; j < LZMA_NUM_PB_STATES_MAX; j++)
{
p->isMatch[i][j] = kProbInitValue;
@@ -2787,50 +1989,41 @@ static void LzmaEnc_Init(CLzmaEnc *p)
p->isRepG2[i] = kProbInitValue;
}
{
UInt32 num = 0x300 << (p->lp + p->lc);
for (i = 0; i < num; i++)
p->litProbs[i] = kProbInitValue;
}
{
for (i = 0; i < kNumLenToPosStates; i++)
{
CLzmaProb *probs = p->posSlotEncoder[i];
unsigned j;
UInt32 j;
for (j = 0; j < (1 << kNumPosSlotBits); j++)
probs[j] = kProbInitValue;
}
}
{
for (i = 0; i < kNumFullDistances; i++)
for (i = 0; i < kNumFullDistances - kEndPosModelIndex; i++)
p->posEncoders[i] = kProbInitValue;
}
{
UInt32 num = (UInt32)0x300 << (p->lp + p->lc);
UInt32 k;
CLzmaProb *probs = p->litProbs;
for (k = 0; k < num; k++)
probs[k] = kProbInitValue;
}
LenEnc_Init(&p->lenEnc.p);
LenEnc_Init(&p->repLenEnc.p);
for (i = 0; i < (1 << kNumAlignBits); i++)
p->posAlignEncoder[i] = kProbInitValue;
LenEnc_Init(&p->lenProbs);
LenEnc_Init(&p->repLenProbs);
p->optEnd = 0;
p->optCur = 0;
{
for (i = 0; i < kNumOpts; i++)
p->opt[i].price = kInfinityPrice;
}
p->optimumEndIndex = 0;
p->optimumCurrentIndex = 0;
p->additionalOffset = 0;
p->pbMask = ((unsigned)1 << p->pb) - 1;
p->lpMask = ((UInt32)0x100 << p->lp) - ((unsigned)0x100 >> p->lc);
// p->mf_Failure = False;
p->pbMask = (1 << p->pb) - 1;
p->lpMask = (1 << p->lp) - 1;
}
static void LzmaEnc_InitPrices(CLzmaEnc *p)
void LzmaEnc_InitPrices(CLzmaEnc *p)
{
if (!p->fastMode)
{
@@ -2841,125 +2034,116 @@ static void LzmaEnc_InitPrices(CLzmaEnc *p)
p->lenEnc.tableSize =
p->repLenEnc.tableSize =
p->numFastBytes + 1 - LZMA_MATCH_LEN_MIN;
p->repLenEncCounter = REP_LEN_COUNT;
LenPriceEnc_UpdateTables(&p->lenEnc, (unsigned)1 << p->pb, &p->lenProbs, p->ProbPrices);
LenPriceEnc_UpdateTables(&p->repLenEnc, (unsigned)1 << p->pb, &p->repLenProbs, p->ProbPrices);
LenPriceEnc_UpdateTables(&p->lenEnc, 1 << p->pb, p->ProbPrices);
LenPriceEnc_UpdateTables(&p->repLenEnc, 1 << p->pb, p->ProbPrices);
}
static SRes LzmaEnc_AllocAndInit(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig)
static SRes LzmaEnc_AllocAndInit(CLzmaEnc *p, UInt32 keepWindowSize, ISzAlloc *alloc, ISzAlloc *allocBig)
{
unsigned i;
for (i = kEndPosModelIndex / 2; i < kDicLogSizeMax; i++)
UInt32 i;
for (i = 0; i < (UInt32)kDicLogSizeMaxCompress; i++)
if (p->dictSize <= ((UInt32)1 << i))
break;
p->distTableSize = i * 2;
p->finished = False;
p->result = SZ_OK;
p->nowPos64 = 0;
p->needInit = 1;
RINOK(LzmaEnc_Alloc(p, keepWindowSize, alloc, allocBig))
RINOK(LzmaEnc_Alloc(p, keepWindowSize, alloc, allocBig));
LzmaEnc_Init(p);
LzmaEnc_InitPrices(p);
p->nowPos64 = 0;
return SZ_OK;
}
static SRes LzmaEnc_Prepare(CLzmaEncHandle p,
ISeqOutStreamPtr outStream,
ISeqInStreamPtr inStream,
ISzAllocPtr alloc, ISzAllocPtr allocBig)
static SRes LzmaEnc_Prepare(CLzmaEncHandle pp, ISeqInStream *inStream, ISeqOutStream *outStream,
ISzAlloc *alloc, ISzAlloc *allocBig)
{
// GET_CLzmaEnc_p
MatchFinder_SET_STREAM(&MFB, inStream)
CLzmaEnc *p = (CLzmaEnc *)pp;
p->inStream = inStream;
p->rc.outStream = outStream;
return LzmaEnc_AllocAndInit(p, 0, alloc, allocBig);
}
SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle p,
ISeqInStreamPtr inStream, UInt32 keepWindowSize,
ISzAllocPtr alloc, ISzAllocPtr allocBig)
SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle pp,
ISeqInStream *inStream, UInt32 keepWindowSize,
ISzAlloc *alloc, ISzAlloc *allocBig)
{
// GET_CLzmaEnc_p
MatchFinder_SET_STREAM(&MFB, inStream)
CLzmaEnc *p = (CLzmaEnc *)pp;
p->inStream = inStream;
return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig);
}
SRes LzmaEnc_MemPrepare(CLzmaEncHandle p,
const Byte *src, SizeT srcLen,
UInt32 keepWindowSize,
ISzAllocPtr alloc, ISzAllocPtr allocBig)
static void LzmaEnc_SetInputBuf(CLzmaEnc *p, const Byte *src, SizeT srcLen)
{
// GET_CLzmaEnc_p
MatchFinder_SET_DIRECT_INPUT_BUF(&MFB, src, srcLen)
LzmaEnc_SetDataSize(p, srcLen);
p->seqBufInStream.funcTable.Read = MyRead;
p->seqBufInStream.data = src;
p->seqBufInStream.rem = srcLen;
}
SRes LzmaEnc_MemPrepare(CLzmaEncHandle pp, const Byte *src, SizeT srcLen,
UInt32 keepWindowSize, ISzAlloc *alloc, ISzAlloc *allocBig)
{
CLzmaEnc *p = (CLzmaEnc *)pp;
LzmaEnc_SetInputBuf(p, src, srcLen);
p->inStream = &p->seqBufInStream.funcTable;
return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig);
}
void LzmaEnc_Finish(CLzmaEncHandle p)
void LzmaEnc_Finish(CLzmaEncHandle pp)
{
#ifndef Z7_ST
// GET_CLzmaEnc_p
#ifdef COMPRESS_MF_MT
CLzmaEnc *p = (CLzmaEnc *)pp;
if (p->mtMode)
MatchFinderMt_ReleaseStream(&p->matchFinderMt);
#else
UNUSED_VAR(p)
pp = pp;
#endif
}
typedef struct
typedef struct _CSeqOutStreamBuf
{
ISeqOutStream vt;
ISeqOutStream funcTable;
Byte *data;
size_t rem;
BoolInt overflow;
} CLzmaEnc_SeqOutStreamBuf;
SizeT rem;
Bool overflow;
} CSeqOutStreamBuf;
static size_t SeqOutStreamBuf_Write(ISeqOutStreamPtr pp, const void *data, size_t size)
static size_t MyWrite(void *pp, const void *data, size_t size)
{
Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CLzmaEnc_SeqOutStreamBuf)
CSeqOutStreamBuf *p = (CSeqOutStreamBuf *)pp;
if (p->rem < size)
{
size = p->rem;
p->overflow = True;
}
if (size != 0)
{
memcpy(p->data, data, size);
p->rem -= size;
p->data += size;
}
memcpy(p->data, data, size);
p->rem -= size;
p->data += size;
return size;
}
/*
UInt32 LzmaEnc_GetNumAvailableBytes(CLzmaEncHandle p)
UInt32 LzmaEnc_GetNumAvailableBytes(CLzmaEncHandle pp)
{
GET_const_CLzmaEnc_p
const CLzmaEnc *p = (CLzmaEnc *)pp;
return p->matchFinder.GetNumAvailableBytes(p->matchFinderObj);
}
*/
const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle p)
const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle pp)
{
// GET_const_CLzmaEnc_p
const CLzmaEnc *p = (CLzmaEnc *)pp;
return p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset;
}
// (desiredPackSize == 0) is not allowed
SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle p, BoolInt reInit,
SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, Bool reInit,
Byte *dest, size_t *destLen, UInt32 desiredPackSize, UInt32 *unpackSize)
{
// GET_CLzmaEnc_p
CLzmaEnc *p = (CLzmaEnc *)pp;
UInt64 nowPos64;
SRes res;
CLzmaEnc_SeqOutStreamBuf outStream;
CSeqOutStreamBuf outStream;
outStream.vt.Write = SeqOutStreamBuf_Write;
outStream.funcTable.Write = MyWrite;
outStream.data = dest;
outStream.rem = *destLen;
outStream.overflow = False;
@@ -2971,11 +2155,11 @@ SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle p, BoolInt reInit,
if (reInit)
LzmaEnc_Init(p);
LzmaEnc_InitPrices(p);
RangeEnc_Init(&p->rc);
p->rc.outStream = &outStream.vt;
nowPos64 = p->nowPos64;
res = LzmaEnc_CodeOneBlock(p, desiredPackSize, *unpackSize);
RangeEnc_Init(&p->rc);
p->rc.outStream = &outStream.funcTable;
res = LzmaEnc_CodeOneBlock(p, True, desiredPackSize, *unpackSize);
*unpackSize = (UInt32)(p->nowPos64 - nowPos64);
*destLen -= outStream.rem;
@@ -2985,26 +2169,29 @@ SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle p, BoolInt reInit,
return res;
}
Z7_NO_INLINE
static SRes LzmaEnc_Encode2(CLzmaEnc *p, ICompressProgressPtr progress)
SRes LzmaEnc_Encode(CLzmaEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStream, ICompressProgress *progress,
ISzAlloc *alloc, ISzAlloc *allocBig)
{
CLzmaEnc *p = (CLzmaEnc *)pp;
SRes res = SZ_OK;
#ifndef Z7_ST
#ifdef COMPRESS_MF_MT
Byte allocaDummy[0x300];
allocaDummy[0] = 0;
allocaDummy[1] = allocaDummy[0];
int i = 0;
for (i = 0; i < 16; i++)
allocaDummy[i] = (Byte)i;
#endif
RINOK(LzmaEnc_Prepare(pp, inStream, outStream, alloc, allocBig));
for (;;)
{
res = LzmaEnc_CodeOneBlock(p, 0, 0);
if (res != SZ_OK || p->finished)
res = LzmaEnc_CodeOneBlock(p, False, 0, 0);
if (res != SZ_OK || p->finished != 0)
break;
if (progress)
if (progress != 0)
{
res = ICompressProgress_Progress(progress, p->nowPos64, RangeEnc_GetProcessed(&p->rc));
res = progress->Progress(progress, p->nowPos64, RangeEnc_GetProcessed(&p->rc));
if (res != SZ_OK)
{
res = SZ_ERROR_PROGRESS;
@@ -3012,110 +2199,71 @@ static SRes LzmaEnc_Encode2(CLzmaEnc *p, ICompressProgressPtr progress)
}
}
}
LzmaEnc_Finish((CLzmaEncHandle)(void *)p);
/*
if (res == SZ_OK && !Inline_MatchFinder_IsFinishedOK(&MFB))
res = SZ_ERROR_FAIL;
}
*/
LzmaEnc_Finish(pp);
return res;
}
SRes LzmaEnc_Encode(CLzmaEncHandle p, ISeqOutStreamPtr outStream, ISeqInStreamPtr inStream, ICompressProgressPtr progress,
ISzAllocPtr alloc, ISzAllocPtr allocBig)
{
// GET_CLzmaEnc_p
RINOK(LzmaEnc_Prepare(p, outStream, inStream, alloc, allocBig))
return LzmaEnc_Encode2(p, progress);
}
SRes LzmaEnc_WriteProperties(CLzmaEncHandle p, Byte *props, SizeT *size)
SRes LzmaEnc_WriteProperties(CLzmaEncHandle pp, Byte *props, SizeT *size)
{
CLzmaEnc *p = (CLzmaEnc *)pp;
int i;
UInt32 dictSize = p->dictSize;
if (*size < LZMA_PROPS_SIZE)
return SZ_ERROR_PARAM;
*size = LZMA_PROPS_SIZE;
props[0] = (Byte)((p->pb * 5 + p->lp) * 9 + p->lc);
for (i = 11; i <= 30; i++)
{
// GET_CLzmaEnc_p
const UInt32 dictSize = p->dictSize;
UInt32 v;
props[0] = (Byte)((p->pb * 5 + p->lp) * 9 + p->lc);
// we write aligned dictionary value to properties for lzma decoder
if (dictSize >= ((UInt32)1 << 21))
if (dictSize <= ((UInt32)2 << i))
{
const UInt32 kDictMask = ((UInt32)1 << 20) - 1;
v = (dictSize + kDictMask) & ~kDictMask;
if (v < dictSize)
v = dictSize;
dictSize = (2 << i);
break;
}
else
if (dictSize <= ((UInt32)3 << i))
{
unsigned i = 11 * 2;
do
{
v = (UInt32)(2 + (i & 1)) << (i >> 1);
i++;
}
while (v < dictSize);
dictSize = (3 << i);
break;
}
SetUi32(props + 1, v)
return SZ_OK;
}
for (i = 0; i < 4; i++)
props[1 + i] = (Byte)(dictSize >> (8 * i));
return SZ_OK;
}
unsigned LzmaEnc_IsWriteEndMark(CLzmaEncHandle p)
{
// GET_CLzmaEnc_p
return (unsigned)p->writeEndMark;
}
SRes LzmaEnc_MemEncode(CLzmaEncHandle p, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
int writeEndMark, ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig)
SRes LzmaEnc_MemEncode(CLzmaEncHandle pp, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
int writeEndMark, ICompressProgress *progress, ISzAlloc *alloc, ISzAlloc *allocBig)
{
SRes res;
// GET_CLzmaEnc_p
CLzmaEnc *p = (CLzmaEnc *)pp;
CLzmaEnc_SeqOutStreamBuf outStream;
CSeqOutStreamBuf outStream;
outStream.vt.Write = SeqOutStreamBuf_Write;
LzmaEnc_SetInputBuf(p, src, srcLen);
outStream.funcTable.Write = MyWrite;
outStream.data = dest;
outStream.rem = *destLen;
outStream.overflow = False;
p->writeEndMark = writeEndMark;
p->rc.outStream = &outStream.vt;
res = LzmaEnc_Encode(pp, &outStream.funcTable, &p->seqBufInStream.funcTable,
progress, alloc, allocBig);
res = LzmaEnc_MemPrepare(p, src, srcLen, 0, alloc, allocBig);
if (res == SZ_OK)
{
res = LzmaEnc_Encode2(p, progress);
if (res == SZ_OK && p->nowPos64 != srcLen)
res = SZ_ERROR_FAIL;
}
*destLen -= (SizeT)outStream.rem;
*destLen -= outStream.rem;
if (outStream.overflow)
return SZ_ERROR_OUTPUT_EOF;
return res;
}
SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark,
ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig)
ICompressProgress *progress, ISzAlloc *alloc, ISzAlloc *allocBig)
{
CLzmaEncHandle p = LzmaEnc_Create(alloc);
CLzmaEnc *p = (CLzmaEnc *)LzmaEnc_Create(alloc);
SRes res;
if (!p)
if (p == 0)
return SZ_ERROR_MEM;
res = LzmaEnc_SetProps(p, props);
@@ -3130,15 +2278,3 @@ SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
LzmaEnc_Destroy(p, alloc, allocBig);
return res;
}
/*
#ifndef Z7_ST
void LzmaEnc_GetLzThreads(CLzmaEncHandle p, HANDLE lz_threads[2])
{
GET_const_CLzmaEnc_p
lz_threads[0] = p->matchFinderMt.hashSync.thread;
lz_threads[1] = p->matchFinderMt.btSync.thread;
}
#endif
*/

69
extern/lzma/LzmaEnc.h vendored
View File

@@ -1,21 +1,19 @@
/* LzmaEnc.h -- LZMA Encoder
2023-04-13 : Igor Pavlov : Public domain */
2008-10-04 : Igor Pavlov : Public domain */
#ifndef ZIP7_INC_LZMA_ENC_H
#define ZIP7_INC_LZMA_ENC_H
#ifndef __LZMAENC_H
#define __LZMAENC_H
#include "7zTypes.h"
EXTERN_C_BEGIN
#include "Types.h"
#define LZMA_PROPS_SIZE 5
typedef struct
typedef struct _CLzmaEncProps
{
int level; /* 0 <= level <= 9 */
int level; /* 0 <= level <= 9 */
UInt32 dictSize; /* (1 << 12) <= dictSize <= (1 << 27) for 32-bit version
(1 << 12) <= dictSize <= (3 << 29) for 64-bit version
default = (1 << 24) */
(1 << 12) <= dictSize <= (1 << 30) for 64-bit version
default = (1 << 24) */
int lc; /* 0 <= lc <= 8, default = 3 */
int lp; /* 0 <= lp <= 4, default = 0 */
int pb; /* 0 <= pb <= 4, default = 2 */
@@ -23,17 +21,9 @@ typedef struct
int fb; /* 5 <= fb <= 273, default = 32 */
int btMode; /* 0 - hashChain Mode, 1 - binTree mode - normal, default = 1 */
int numHashBytes; /* 2, 3 or 4, default = 4 */
unsigned numHashOutBits; /* default = ? */
UInt32 mc; /* 1 <= mc <= (1 << 30), default = 32 */
UInt32 mc; /* 1 <= mc <= (1 << 30), default = 32 */
unsigned writeEndMark; /* 0 - do not write EOPM, 1 - write EOPM, default = 0 */
int numThreads; /* 1 or 2, default = 2 */
// int _pad;
UInt64 reduceSize; /* estimated size of data that will be compressed. default = (UInt64)(Int64)-1.
Encoder uses this value to reduce dictionary size */
UInt64 affinity;
} CLzmaEncProps;
void LzmaEncProps_Init(CLzmaEncProps *p);
@@ -43,41 +33,40 @@ UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps *props2);
/* ---------- CLzmaEncHandle Interface ---------- */
/* LzmaEnc* functions can return the following exit codes:
SRes:
/* LzmaEnc_* functions can return the following exit codes:
Returns:
SZ_OK - OK
SZ_ERROR_MEM - Memory allocation error
SZ_ERROR_PARAM - Incorrect paramater in props
SZ_ERROR_WRITE - ISeqOutStream write callback error
SZ_ERROR_OUTPUT_EOF - output buffer overflow - version with (Byte *) output
SZ_ERROR_WRITE - Write callback error.
SZ_ERROR_PROGRESS - some break from progress callback
SZ_ERROR_THREAD - error in multithreading functions (only for Mt version)
SZ_ERROR_THREAD - errors in multithreading functions (only for Mt version)
*/
typedef struct CLzmaEnc CLzmaEnc;
typedef CLzmaEnc * CLzmaEncHandle;
// Z7_DECLARE_HANDLE(CLzmaEncHandle)
CLzmaEncHandle LzmaEnc_Create(ISzAllocPtr alloc);
void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAllocPtr alloc, ISzAllocPtr allocBig);
typedef void * CLzmaEncHandle;
CLzmaEncHandle LzmaEnc_Create(ISzAlloc *alloc);
void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAlloc *alloc, ISzAlloc *allocBig);
SRes LzmaEnc_SetProps(CLzmaEncHandle p, const CLzmaEncProps *props);
void LzmaEnc_SetDataSize(CLzmaEncHandle p, UInt64 expectedDataSiize);
SRes LzmaEnc_WriteProperties(CLzmaEncHandle p, Byte *properties, SizeT *size);
unsigned LzmaEnc_IsWriteEndMark(CLzmaEncHandle p);
SRes LzmaEnc_Encode(CLzmaEncHandle p, ISeqOutStreamPtr outStream, ISeqInStreamPtr inStream,
ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig);
SRes LzmaEnc_Encode(CLzmaEncHandle p, ISeqOutStream *outStream, ISeqInStream *inStream,
ICompressProgress *progress, ISzAlloc *alloc, ISzAlloc *allocBig);
SRes LzmaEnc_MemEncode(CLzmaEncHandle p, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
int writeEndMark, ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig);
int writeEndMark, ICompressProgress *progress, ISzAlloc *alloc, ISzAlloc *allocBig);
/* ---------- One Call Interface ---------- */
/* LzmaEncode
Return code:
SZ_OK - OK
SZ_ERROR_MEM - Memory allocation error
SZ_ERROR_PARAM - Incorrect paramater
SZ_ERROR_OUTPUT_EOF - output buffer overflow
SZ_ERROR_THREAD - errors in multithreading functions (only for Mt version)
*/
SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark,
ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig);
EXTERN_C_END
ICompressProgress *progress, ISzAlloc *alloc, ISzAlloc *allocBig);
#endif

18
extern/lzma/LzmaLib.c vendored
View File

@@ -1,14 +1,18 @@
/* LzmaLib.c -- LZMA library wrapper
2023-04-02 : Igor Pavlov : Public domain */
2008-08-05
Igor Pavlov
Public domain */
#include "Precomp.h"
#include "Alloc.h"
#include "LzmaDec.h"
#include "LzmaEnc.h"
#include "LzmaDec.h"
#include "Alloc.h"
#include "LzmaLib.h"
Z7_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t srcLen,
static void *SzAlloc(void *p, size_t size) { p = p; return MyAlloc(size); }
static void SzFree(void *p, void *address) { p = p; MyFree(address); }
static ISzAlloc g_Alloc = { SzAlloc, SzFree };
MY_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t srcLen,
unsigned char *outProps, size_t *outPropsSize,
int level, /* 0 <= level <= 9, default = 5 */
unsigned dictSize, /* use (1 << N) or (3 << N). 4 KB < dictSize <= 128 MB */
@@ -34,7 +38,7 @@ Z7_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char
}
Z7_STDAPI LzmaUncompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t *srcLen,
MY_STDAPI LzmaUncompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t *srcLen,
const unsigned char *props, size_t propsSize)
{
ELzmaStatus status;

47
extern/lzma/LzmaLib.h vendored
View File

@@ -1,14 +1,20 @@
/* LzmaLib.h -- LZMA library interface
2023-04-02 : Igor Pavlov : Public domain */
2008-08-05
Igor Pavlov
Public domain */
#ifndef ZIP7_INC_LZMA_LIB_H
#define ZIP7_INC_LZMA_LIB_H
#ifndef __LZMALIB_H
#define __LZMALIB_H
#include "7zTypes.h"
#include "Types.h"
EXTERN_C_BEGIN
#ifdef __cplusplus
#define MY_EXTERN_C extern "C"
#else
#define MY_EXTERN_C extern
#endif
#define Z7_STDAPI int Z7_STDCALL
#define MY_STDAPI MY_EXTERN_C int MY_STD_CALL
#define LZMA_PROPS_SIZE 5
@@ -40,16 +46,14 @@ outPropsSize -
level - compression level: 0 <= level <= 9;
level dictSize algo fb
0: 64 KB 0 32
1: 256 KB 0 32
2: 1 MB 0 32
3: 4 MB 0 32
4: 16 MB 0 32
0: 16 KB 0 32
1: 64 KB 0 32
2: 256 KB 0 32
3: 1 MB 0 32
4: 4 MB 0 32
5: 16 MB 1 32
6: 32 MB 1 32
7: 32 MB 1 64
8: 64 MB 1 64
9: 64 MB 1 64
7+: 64 MB 1 64
The default value for "level" is 5.
@@ -85,11 +89,6 @@ fb - Word size (the number of fast bytes).
numThreads - The number of thereads. 1 or 2. The default value is 2.
Fast mode (algo = 0) can use only 1 thread.
In:
dest - output data buffer
destLen - output data buffer size
src - input data
srcLen - input data size
Out:
destLen - processed output size
Returns:
@@ -100,7 +99,7 @@ Returns:
SZ_ERROR_THREAD - errors in multithreading functions (only for Mt version)
*/
Z7_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t srcLen,
MY_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t srcLen,
unsigned char *outProps, size_t *outPropsSize, /* *outPropsSize must be = 5 */
int level, /* 0 <= level <= 9, default = 5 */
unsigned dictSize, /* default = (1 << 24) */
@@ -115,8 +114,8 @@ Z7_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char
LzmaUncompress
--------------
In:
dest - output data buffer
destLen - output data buffer size
dest - output data
destLen - output data size
src - input data
srcLen - input data size
Out:
@@ -130,9 +129,7 @@ Returns:
SZ_ERROR_INPUT_EOF - it needs more bytes in input buffer (src)
*/
Z7_STDAPI LzmaUncompress(unsigned char *dest, size_t *destLen, const unsigned char *src, SizeT *srcLen,
MY_STDAPI LzmaUncompress(unsigned char *dest, size_t *destLen, const unsigned char *src, SizeT *srcLen,
const unsigned char *props, size_t propsSize);
EXTERN_C_END
#endif

10
extern/lzma/Precomp.h vendored
View File

@@ -1,10 +0,0 @@
/* Precomp.h -- StdAfx
2023-04-02 : Igor Pavlov : Public domain */
#ifndef ZIP7_INC_PRECOMP_H
#define ZIP7_INC_PRECOMP_H
#include "Compiler.h"
/* #include "7zTypes.h" */
#endif

View File

@@ -1,8 +1,5 @@
Project: LZMA SDK
URL: https://www.7-zip.org/sdk.html
License: Public Domain
Upstream version: 23.01
Local modifications: No code changes
- Took only files needed for Blender: C source for raw LZMA1 encoder/decoder.
- CMakeLists.txt is made for Blender codebase
Upstream version: 4.65
Local modifications: None

562
extern/lzma/Threads.c vendored
View File

@@ -1,562 +0,0 @@
/* Threads.c -- multithreading library
2023-03-04 : Igor Pavlov : Public domain */
#include "Precomp.h"
#ifdef _WIN32
#ifndef USE_THREADS_CreateThread
#include <process.h>
#endif
#include "Threads.h"
static WRes GetError(void)
{
const DWORD res = GetLastError();
return res ? (WRes)res : 1;
}
static WRes HandleToWRes(HANDLE h) { return (h != NULL) ? 0 : GetError(); }
static WRes BOOLToWRes(BOOL v) { return v ? 0 : GetError(); }
WRes HandlePtr_Close(HANDLE *p)
{
if (*p != NULL)
{
if (!CloseHandle(*p))
return GetError();
*p = NULL;
}
return 0;
}
WRes Handle_WaitObject(HANDLE h)
{
DWORD dw = WaitForSingleObject(h, INFINITE);
/*
(dw) result:
WAIT_OBJECT_0 // 0
WAIT_ABANDONED // 0x00000080 : is not compatible with Win32 Error space
WAIT_TIMEOUT // 0x00000102 : is compatible with Win32 Error space
WAIT_FAILED // 0xFFFFFFFF
*/
if (dw == WAIT_FAILED)
{
dw = GetLastError();
if (dw == 0)
return WAIT_FAILED;
}
return (WRes)dw;
}
#define Thread_Wait(p) Handle_WaitObject(*(p))
WRes Thread_Wait_Close(CThread *p)
{
WRes res = Thread_Wait(p);
WRes res2 = Thread_Close(p);
return (res != 0 ? res : res2);
}
WRes Thread_Create(CThread *p, THREAD_FUNC_TYPE func, LPVOID param)
{
/* Windows Me/98/95: threadId parameter may not be NULL in _beginthreadex/CreateThread functions */
#ifdef USE_THREADS_CreateThread
DWORD threadId;
*p = CreateThread(NULL, 0, func, param, 0, &threadId);
#else
unsigned threadId;
*p = (HANDLE)(_beginthreadex(NULL, 0, func, param, 0, &threadId));
#endif
/* maybe we must use errno here, but probably GetLastError() is also OK. */
return HandleToWRes(*p);
}
WRes Thread_Create_With_Affinity(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, CAffinityMask affinity)
{
#ifdef USE_THREADS_CreateThread
UNUSED_VAR(affinity)
return Thread_Create(p, func, param);
#else
/* Windows Me/98/95: threadId parameter may not be NULL in _beginthreadex/CreateThread functions */
HANDLE h;
WRes wres;
unsigned threadId;
h = (HANDLE)(_beginthreadex(NULL, 0, func, param, CREATE_SUSPENDED, &threadId));
*p = h;
wres = HandleToWRes(h);
if (h)
{
{
// DWORD_PTR prevMask =
SetThreadAffinityMask(h, (DWORD_PTR)affinity);
/*
if (prevMask == 0)
{
// affinity change is non-critical error, so we can ignore it
// wres = GetError();
}
*/
}
{
DWORD prevSuspendCount = ResumeThread(h);
/* ResumeThread() returns:
0 : was_not_suspended
1 : was_resumed
-1 : error
*/
if (prevSuspendCount == (DWORD)-1)
wres = GetError();
}
}
/* maybe we must use errno here, but probably GetLastError() is also OK. */
return wres;
#endif
}
static WRes Event_Create(CEvent *p, BOOL manualReset, int signaled)
{
*p = CreateEvent(NULL, manualReset, (signaled ? TRUE : FALSE), NULL);
return HandleToWRes(*p);
}
WRes Event_Set(CEvent *p) { return BOOLToWRes(SetEvent(*p)); }
WRes Event_Reset(CEvent *p) { return BOOLToWRes(ResetEvent(*p)); }
WRes ManualResetEvent_Create(CManualResetEvent *p, int signaled) { return Event_Create(p, TRUE, signaled); }
WRes AutoResetEvent_Create(CAutoResetEvent *p, int signaled) { return Event_Create(p, FALSE, signaled); }
WRes ManualResetEvent_CreateNotSignaled(CManualResetEvent *p) { return ManualResetEvent_Create(p, 0); }
WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *p) { return AutoResetEvent_Create(p, 0); }
WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount)
{
// negative ((LONG)maxCount) is not supported in WIN32::CreateSemaphore()
*p = CreateSemaphore(NULL, (LONG)initCount, (LONG)maxCount, NULL);
return HandleToWRes(*p);
}
WRes Semaphore_OptCreateInit(CSemaphore *p, UInt32 initCount, UInt32 maxCount)
{
// if (Semaphore_IsCreated(p))
{
WRes wres = Semaphore_Close(p);
if (wres != 0)
return wres;
}
return Semaphore_Create(p, initCount, maxCount);
}
static WRes Semaphore_Release(CSemaphore *p, LONG releaseCount, LONG *previousCount)
{ return BOOLToWRes(ReleaseSemaphore(*p, releaseCount, previousCount)); }
WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 num)
{ return Semaphore_Release(p, (LONG)num, NULL); }
WRes Semaphore_Release1(CSemaphore *p) { return Semaphore_ReleaseN(p, 1); }
WRes CriticalSection_Init(CCriticalSection *p)
{
/* InitializeCriticalSection() can raise exception:
Windows XP, 2003 : can raise a STATUS_NO_MEMORY exception
Windows Vista+ : no exceptions */
#ifdef _MSC_VER
#ifdef __clang__
#pragma GCC diagnostic ignored "-Wlanguage-extension-token"
#endif
__try
#endif
{
InitializeCriticalSection(p);
/* InitializeCriticalSectionAndSpinCount(p, 0); */
}
#ifdef _MSC_VER
__except (EXCEPTION_EXECUTE_HANDLER) { return ERROR_NOT_ENOUGH_MEMORY; }
#endif
return 0;
}
#else // _WIN32
// ---------- POSIX ----------
#ifndef __APPLE__
#ifndef Z7_AFFINITY_DISABLE
// _GNU_SOURCE can be required for pthread_setaffinity_np() / CPU_ZERO / CPU_SET
// clang < 3.6 : unknown warning group '-Wreserved-id-macro'
// clang 3.6 - 12.01 : gives warning "macro name is a reserved identifier"
// clang >= 13 : do not give warning
#if !defined(_GNU_SOURCE)
#if defined(__clang__) && (__clang_major__ >= 4) && (__clang_major__ <= 12)
#pragma GCC diagnostic ignored "-Wreserved-id-macro"
#endif
#define _GNU_SOURCE
#endif // !defined(_GNU_SOURCE)
#endif // Z7_AFFINITY_DISABLE
#endif // __APPLE__
#include "Threads.h"
#include <errno.h>
#include <stdlib.h>
#include <string.h>
#ifdef Z7_AFFINITY_SUPPORTED
// #include <sched.h>
#endif
// #include <stdio.h>
// #define PRF(p) p
#define PRF(p)
#define Print(s) PRF(printf("\n%s\n", s);)
WRes Thread_Create_With_CpuSet(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, const CCpuSet *cpuSet)
{
// new thread in Posix probably inherits affinity from parrent thread
Print("Thread_Create_With_CpuSet")
pthread_attr_t attr;
int ret;
// int ret2;
p->_created = 0;
RINOK(pthread_attr_init(&attr))
ret = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
if (!ret)
{
if (cpuSet)
{
#ifdef Z7_AFFINITY_SUPPORTED
/*
printf("\n affinity :");
unsigned i;
for (i = 0; i < sizeof(*cpuSet) && i < 8; i++)
{
Byte b = *((const Byte *)cpuSet + i);
char temp[32];
#define GET_HEX_CHAR(t) ((char)(((t < 10) ? ('0' + t) : ('A' + (t - 10)))))
temp[0] = GET_HEX_CHAR((b & 0xF));
temp[1] = GET_HEX_CHAR((b >> 4));
// temp[0] = GET_HEX_CHAR((b >> 4)); // big-endian
// temp[1] = GET_HEX_CHAR((b & 0xF)); // big-endian
temp[2] = 0;
printf("%s", temp);
}
printf("\n");
*/
// ret2 =
pthread_attr_setaffinity_np(&attr, sizeof(*cpuSet), cpuSet);
// if (ret2) ret = ret2;
#endif
}
ret = pthread_create(&p->_tid, &attr, func, param);
if (!ret)
{
p->_created = 1;
/*
if (cpuSet)
{
// ret2 =
pthread_setaffinity_np(p->_tid, sizeof(*cpuSet), cpuSet);
// if (ret2) ret = ret2;
}
*/
}
}
// ret2 =
pthread_attr_destroy(&attr);
// if (ret2 != 0) ret = ret2;
return ret;
}
WRes Thread_Create(CThread *p, THREAD_FUNC_TYPE func, LPVOID param)
{
return Thread_Create_With_CpuSet(p, func, param, NULL);
}
WRes Thread_Create_With_Affinity(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, CAffinityMask affinity)
{
Print("Thread_Create_WithAffinity")
CCpuSet cs;
unsigned i;
CpuSet_Zero(&cs);
for (i = 0; i < sizeof(affinity) * 8; i++)
{
if (affinity == 0)
break;
if (affinity & 1)
{
CpuSet_Set(&cs, i);
}
affinity >>= 1;
}
return Thread_Create_With_CpuSet(p, func, param, &cs);
}
WRes Thread_Close(CThread *p)
{
// Print("Thread_Close")
int ret;
if (!p->_created)
return 0;
ret = pthread_detach(p->_tid);
p->_tid = 0;
p->_created = 0;
return ret;
}
WRes Thread_Wait_Close(CThread *p)
{
// Print("Thread_Wait_Close")
void *thread_return;
int ret;
if (!p->_created)
return EINVAL;
ret = pthread_join(p->_tid, &thread_return);
// probably we can't use that (_tid) after pthread_join(), so we close thread here
p->_created = 0;
p->_tid = 0;
return ret;
}
static WRes Event_Create(CEvent *p, int manualReset, int signaled)
{
RINOK(pthread_mutex_init(&p->_mutex, NULL))
RINOK(pthread_cond_init(&p->_cond, NULL))
p->_manual_reset = manualReset;
p->_state = (signaled ? True : False);
p->_created = 1;
return 0;
}
WRes ManualResetEvent_Create(CManualResetEvent *p, int signaled)
{ return Event_Create(p, True, signaled); }
WRes ManualResetEvent_CreateNotSignaled(CManualResetEvent *p)
{ return ManualResetEvent_Create(p, 0); }
WRes AutoResetEvent_Create(CAutoResetEvent *p, int signaled)
{ return Event_Create(p, False, signaled); }
WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *p)
{ return AutoResetEvent_Create(p, 0); }
WRes Event_Set(CEvent *p)
{
RINOK(pthread_mutex_lock(&p->_mutex))
p->_state = True;
int res1 = pthread_cond_broadcast(&p->_cond);
int res2 = pthread_mutex_unlock(&p->_mutex);
return (res2 ? res2 : res1);
}
WRes Event_Reset(CEvent *p)
{
RINOK(pthread_mutex_lock(&p->_mutex))
p->_state = False;
return pthread_mutex_unlock(&p->_mutex);
}
WRes Event_Wait(CEvent *p)
{
RINOK(pthread_mutex_lock(&p->_mutex))
while (p->_state == False)
{
// ETIMEDOUT
// ret =
pthread_cond_wait(&p->_cond, &p->_mutex);
// if (ret != 0) break;
}
if (p->_manual_reset == False)
{
p->_state = False;
}
return pthread_mutex_unlock(&p->_mutex);
}
WRes Event_Close(CEvent *p)
{
if (!p->_created)
return 0;
p->_created = 0;
{
int res1 = pthread_mutex_destroy(&p->_mutex);
int res2 = pthread_cond_destroy(&p->_cond);
return (res1 ? res1 : res2);
}
}
WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount)
{
if (initCount > maxCount || maxCount < 1)
return EINVAL;
RINOK(pthread_mutex_init(&p->_mutex, NULL))
RINOK(pthread_cond_init(&p->_cond, NULL))
p->_count = initCount;
p->_maxCount = maxCount;
p->_created = 1;
return 0;
}
WRes Semaphore_OptCreateInit(CSemaphore *p, UInt32 initCount, UInt32 maxCount)
{
if (Semaphore_IsCreated(p))
{
/*
WRes wres = Semaphore_Close(p);
if (wres != 0)
return wres;
*/
if (initCount > maxCount || maxCount < 1)
return EINVAL;
// return EINVAL; // for debug
p->_count = initCount;
p->_maxCount = maxCount;
return 0;
}
return Semaphore_Create(p, initCount, maxCount);
}
WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 releaseCount)
{
UInt32 newCount;
int ret;
if (releaseCount < 1)
return EINVAL;
RINOK(pthread_mutex_lock(&p->_mutex))
newCount = p->_count + releaseCount;
if (newCount > p->_maxCount)
ret = ERROR_TOO_MANY_POSTS; // EINVAL;
else
{
p->_count = newCount;
ret = pthread_cond_broadcast(&p->_cond);
}
RINOK(pthread_mutex_unlock(&p->_mutex))
return ret;
}
WRes Semaphore_Wait(CSemaphore *p)
{
RINOK(pthread_mutex_lock(&p->_mutex))
while (p->_count < 1)
{
pthread_cond_wait(&p->_cond, &p->_mutex);
}
p->_count--;
return pthread_mutex_unlock(&p->_mutex);
}
WRes Semaphore_Close(CSemaphore *p)
{
if (!p->_created)
return 0;
p->_created = 0;
{
int res1 = pthread_mutex_destroy(&p->_mutex);
int res2 = pthread_cond_destroy(&p->_cond);
return (res1 ? res1 : res2);
}
}
WRes CriticalSection_Init(CCriticalSection *p)
{
// Print("CriticalSection_Init")
if (!p)
return EINTR;
return pthread_mutex_init(&p->_mutex, NULL);
}
void CriticalSection_Enter(CCriticalSection *p)
{
// Print("CriticalSection_Enter")
if (p)
{
// int ret =
pthread_mutex_lock(&p->_mutex);
}
}
void CriticalSection_Leave(CCriticalSection *p)
{
// Print("CriticalSection_Leave")
if (p)
{
// int ret =
pthread_mutex_unlock(&p->_mutex);
}
}
void CriticalSection_Delete(CCriticalSection *p)
{
// Print("CriticalSection_Delete")
if (p)
{
// int ret =
pthread_mutex_destroy(&p->_mutex);
}
}
LONG InterlockedIncrement(LONG volatile *addend)
{
// Print("InterlockedIncrement")
#ifdef USE_HACK_UNSAFE_ATOMIC
LONG val = *addend + 1;
*addend = val;
return val;
#else
#if defined(__clang__) && (__clang_major__ >= 8)
#pragma GCC diagnostic ignored "-Watomic-implicit-seq-cst"
#endif
return __sync_add_and_fetch(addend, 1);
#endif
}
#endif // _WIN32
WRes AutoResetEvent_OptCreate_And_Reset(CAutoResetEvent *p)
{
if (Event_IsCreated(p))
return Event_Reset(p);
return AutoResetEvent_CreateNotSignaled(p);
}
#undef PRF
#undef Print

240
extern/lzma/Threads.h vendored
View File

@@ -1,240 +0,0 @@
/* Threads.h -- multithreading library
2023-04-02 : Igor Pavlov : Public domain */
#ifndef ZIP7_INC_THREADS_H
#define ZIP7_INC_THREADS_H
#ifdef _WIN32
#include "7zWindows.h"
#else
#if defined(__linux__)
#if !defined(__APPLE__) && !defined(_AIX) && !defined(__ANDROID__)
#ifndef Z7_AFFINITY_DISABLE
#define Z7_AFFINITY_SUPPORTED
// #pragma message(" ==== Z7_AFFINITY_SUPPORTED")
// #define _GNU_SOURCE
#endif
#endif
#endif
#include <pthread.h>
#endif
#include "7zTypes.h"
EXTERN_C_BEGIN
#ifdef _WIN32
WRes HandlePtr_Close(HANDLE *h);
WRes Handle_WaitObject(HANDLE h);
typedef HANDLE CThread;
#define Thread_CONSTRUCT(p) { *(p) = NULL; }
#define Thread_WasCreated(p) (*(p) != NULL)
#define Thread_Close(p) HandlePtr_Close(p)
// #define Thread_Wait(p) Handle_WaitObject(*(p))
#ifdef UNDER_CE
// if (USE_THREADS_CreateThread is defined), we use _beginthreadex()
// if (USE_THREADS_CreateThread is not definned), we use CreateThread()
#define USE_THREADS_CreateThread
#endif
typedef
#ifdef USE_THREADS_CreateThread
DWORD
#else
unsigned
#endif
THREAD_FUNC_RET_TYPE;
#define THREAD_FUNC_RET_ZERO 0
typedef DWORD_PTR CAffinityMask;
typedef DWORD_PTR CCpuSet;
#define CpuSet_Zero(p) *(p) = (0)
#define CpuSet_Set(p, cpu) *(p) |= ((DWORD_PTR)1 << (cpu))
#else // _WIN32
typedef struct
{
pthread_t _tid;
int _created;
} CThread;
#define Thread_CONSTRUCT(p) { (p)->_tid = 0; (p)->_created = 0; }
#define Thread_WasCreated(p) ((p)->_created != 0)
WRes Thread_Close(CThread *p);
// #define Thread_Wait Thread_Wait_Close
typedef void * THREAD_FUNC_RET_TYPE;
#define THREAD_FUNC_RET_ZERO NULL
typedef UInt64 CAffinityMask;
#ifdef Z7_AFFINITY_SUPPORTED
typedef cpu_set_t CCpuSet;
#define CpuSet_Zero(p) CPU_ZERO(p)
#define CpuSet_Set(p, cpu) CPU_SET(cpu, p)
#define CpuSet_IsSet(p, cpu) CPU_ISSET(cpu, p)
#else
typedef UInt64 CCpuSet;
#define CpuSet_Zero(p) *(p) = (0)
#define CpuSet_Set(p, cpu) *(p) |= ((UInt64)1 << (cpu))
#define CpuSet_IsSet(p, cpu) ((*(p) & ((UInt64)1 << (cpu))) != 0)
#endif
#endif // _WIN32
#define THREAD_FUNC_CALL_TYPE Z7_STDCALL
#if defined(_WIN32) && defined(__GNUC__)
/* GCC compiler for x86 32-bit uses the rule:
the stack is 16-byte aligned before CALL instruction for function calling.
But only root function main() contains instructions that
set 16-byte alignment for stack pointer. And another functions
just keep alignment, if it was set in some parent function.
The problem:
if we create new thread in MinGW (GCC) 32-bit x86 via _beginthreadex() or CreateThread(),
the root function of thread doesn't set 16-byte alignment.
And stack frames in all child functions also will be unaligned in that case.
Here we set (force_align_arg_pointer) attribute for root function of new thread.
Do we need (force_align_arg_pointer) also for another systems? */
#define THREAD_FUNC_ATTRIB_ALIGN_ARG __attribute__((force_align_arg_pointer))
// #define THREAD_FUNC_ATTRIB_ALIGN_ARG // for debug : bad alignment in SSE functions
#else
#define THREAD_FUNC_ATTRIB_ALIGN_ARG
#endif
#define THREAD_FUNC_DECL THREAD_FUNC_ATTRIB_ALIGN_ARG THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE
typedef THREAD_FUNC_RET_TYPE (THREAD_FUNC_CALL_TYPE * THREAD_FUNC_TYPE)(void *);
WRes Thread_Create(CThread *p, THREAD_FUNC_TYPE func, LPVOID param);
WRes Thread_Create_With_Affinity(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, CAffinityMask affinity);
WRes Thread_Wait_Close(CThread *p);
#ifdef _WIN32
#define Thread_Create_With_CpuSet(p, func, param, cs) \
Thread_Create_With_Affinity(p, func, param, *cs)
#else
WRes Thread_Create_With_CpuSet(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, const CCpuSet *cpuSet);
#endif
#ifdef _WIN32
typedef HANDLE CEvent;
typedef CEvent CAutoResetEvent;
typedef CEvent CManualResetEvent;
#define Event_Construct(p) *(p) = NULL
#define Event_IsCreated(p) (*(p) != NULL)
#define Event_Close(p) HandlePtr_Close(p)
#define Event_Wait(p) Handle_WaitObject(*(p))
WRes Event_Set(CEvent *p);
WRes Event_Reset(CEvent *p);
WRes ManualResetEvent_Create(CManualResetEvent *p, int signaled);
WRes ManualResetEvent_CreateNotSignaled(CManualResetEvent *p);
WRes AutoResetEvent_Create(CAutoResetEvent *p, int signaled);
WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *p);
typedef HANDLE CSemaphore;
#define Semaphore_Construct(p) *(p) = NULL
#define Semaphore_IsCreated(p) (*(p) != NULL)
#define Semaphore_Close(p) HandlePtr_Close(p)
#define Semaphore_Wait(p) Handle_WaitObject(*(p))
WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount);
WRes Semaphore_OptCreateInit(CSemaphore *p, UInt32 initCount, UInt32 maxCount);
WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 num);
WRes Semaphore_Release1(CSemaphore *p);
typedef CRITICAL_SECTION CCriticalSection;
WRes CriticalSection_Init(CCriticalSection *p);
#define CriticalSection_Delete(p) DeleteCriticalSection(p)
#define CriticalSection_Enter(p) EnterCriticalSection(p)
#define CriticalSection_Leave(p) LeaveCriticalSection(p)
#else // _WIN32
typedef struct _CEvent
{
int _created;
int _manual_reset;
int _state;
pthread_mutex_t _mutex;
pthread_cond_t _cond;
} CEvent;
typedef CEvent CAutoResetEvent;
typedef CEvent CManualResetEvent;
#define Event_Construct(p) (p)->_created = 0
#define Event_IsCreated(p) ((p)->_created)
WRes ManualResetEvent_Create(CManualResetEvent *p, int signaled);
WRes ManualResetEvent_CreateNotSignaled(CManualResetEvent *p);
WRes AutoResetEvent_Create(CAutoResetEvent *p, int signaled);
WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *p);
WRes Event_Set(CEvent *p);
WRes Event_Reset(CEvent *p);
WRes Event_Wait(CEvent *p);
WRes Event_Close(CEvent *p);
typedef struct _CSemaphore
{
int _created;
UInt32 _count;
UInt32 _maxCount;
pthread_mutex_t _mutex;
pthread_cond_t _cond;
} CSemaphore;
#define Semaphore_Construct(p) (p)->_created = 0
#define Semaphore_IsCreated(p) ((p)->_created)
WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount);
WRes Semaphore_OptCreateInit(CSemaphore *p, UInt32 initCount, UInt32 maxCount);
WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 num);
#define Semaphore_Release1(p) Semaphore_ReleaseN(p, 1)
WRes Semaphore_Wait(CSemaphore *p);
WRes Semaphore_Close(CSemaphore *p);
typedef struct _CCriticalSection
{
pthread_mutex_t _mutex;
} CCriticalSection;
WRes CriticalSection_Init(CCriticalSection *p);
void CriticalSection_Delete(CCriticalSection *cs);
void CriticalSection_Enter(CCriticalSection *cs);
void CriticalSection_Leave(CCriticalSection *cs);
LONG InterlockedIncrement(LONG volatile *addend);
#endif // _WIN32
WRes AutoResetEvent_OptCreate_And_Reset(CAutoResetEvent *p);
EXTERN_C_END
#endif

208
extern/lzma/Types.h vendored Normal file
View File

@@ -0,0 +1,208 @@
/* Types.h -- Basic types
2008-11-23 : Igor Pavlov : Public domain */
#ifndef __7Z_TYPES_H
#define __7Z_TYPES_H
#include <stddef.h>
#ifdef _WIN32
#include <windows.h>
#endif
#define SZ_OK 0
#define SZ_ERROR_DATA 1
#define SZ_ERROR_MEM 2
#define SZ_ERROR_CRC 3
#define SZ_ERROR_UNSUPPORTED 4
#define SZ_ERROR_PARAM 5
#define SZ_ERROR_INPUT_EOF 6
#define SZ_ERROR_OUTPUT_EOF 7
#define SZ_ERROR_READ 8
#define SZ_ERROR_WRITE 9
#define SZ_ERROR_PROGRESS 10
#define SZ_ERROR_FAIL 11
#define SZ_ERROR_THREAD 12
#define SZ_ERROR_ARCHIVE 16
#define SZ_ERROR_NO_ARCHIVE 17
typedef int SRes;
#ifdef _WIN32
typedef DWORD WRes;
#else
typedef int WRes;
#endif
#ifndef RINOK
#define RINOK(x) { int __result__ = (x); if (__result__ != 0) return __result__; }
#endif
typedef unsigned char Byte;
typedef short Int16;
typedef unsigned short UInt16;
#ifdef _LZMA_UINT32_IS_ULONG
typedef long Int32;
typedef unsigned long UInt32;
#else
typedef int Int32;
typedef unsigned int UInt32;
#endif
#ifdef _SZ_NO_INT_64
/* define _SZ_NO_INT_64, if your compiler doesn't support 64-bit integers.
NOTES: Some code will work incorrectly in that case! */
typedef long Int64;
typedef unsigned long UInt64;
#else
#if defined(_MSC_VER) || defined(__BORLANDC__)
typedef __int64 Int64;
typedef unsigned __int64 UInt64;
#else
typedef long long int Int64;
typedef unsigned long long int UInt64;
#endif
#endif
#ifdef _LZMA_NO_SYSTEM_SIZE_T
typedef UInt32 SizeT;
#else
typedef size_t SizeT;
#endif
typedef int Bool;
#define True 1
#define False 0
#ifdef _MSC_VER
#if _MSC_VER >= 1300
#define MY_NO_INLINE __declspec(noinline)
#else
#define MY_NO_INLINE
#endif
#define MY_CDECL __cdecl
#define MY_STD_CALL __stdcall
#define MY_FAST_CALL MY_NO_INLINE __fastcall
#else
#define MY_CDECL
#define MY_STD_CALL
#define MY_FAST_CALL
#endif
/* The following interfaces use first parameter as pointer to structure */
typedef struct
{
SRes (*Read)(void *p, void *buf, size_t *size);
/* if (input(*size) != 0 && output(*size) == 0) means end_of_stream.
(output(*size) < input(*size)) is allowed */
} ISeqInStream;
/* it can return SZ_ERROR_INPUT_EOF */
SRes SeqInStream_Read(ISeqInStream *stream, void *buf, size_t size);
SRes SeqInStream_Read2(ISeqInStream *stream, void *buf, size_t size, SRes errorType);
SRes SeqInStream_ReadByte(ISeqInStream *stream, Byte *buf);
typedef struct
{
size_t (*Write)(void *p, const void *buf, size_t size);
/* Returns: result - the number of actually written bytes.
(result < size) means error */
} ISeqOutStream;
typedef enum
{
SZ_SEEK_SET = 0,
SZ_SEEK_CUR = 1,
SZ_SEEK_END = 2
} ESzSeek;
typedef struct
{
SRes (*Read)(void *p, void *buf, size_t *size); /* same as ISeqInStream::Read */
SRes (*Seek)(void *p, Int64 *pos, ESzSeek origin);
} ISeekInStream;
typedef struct
{
SRes (*Look)(void *p, void **buf, size_t *size);
/* if (input(*size) != 0 && output(*size) == 0) means end_of_stream.
(output(*size) > input(*size)) is not allowed
(output(*size) < input(*size)) is allowed */
SRes (*Skip)(void *p, size_t offset);
/* offset must be <= output(*size) of Look */
SRes (*Read)(void *p, void *buf, size_t *size);
/* reads directly (without buffer). It's same as ISeqInStream::Read */
SRes (*Seek)(void *p, Int64 *pos, ESzSeek origin);
} ILookInStream;
SRes LookInStream_LookRead(ILookInStream *stream, void *buf, size_t *size);
SRes LookInStream_SeekTo(ILookInStream *stream, UInt64 offset);
/* reads via ILookInStream::Read */
SRes LookInStream_Read2(ILookInStream *stream, void *buf, size_t size, SRes errorType);
SRes LookInStream_Read(ILookInStream *stream, void *buf, size_t size);
#define LookToRead_BUF_SIZE (1 << 14)
typedef struct
{
ILookInStream s;
ISeekInStream *realStream;
size_t pos;
size_t size;
Byte buf[LookToRead_BUF_SIZE];
} CLookToRead;
void LookToRead_CreateVTable(CLookToRead *p, int lookahead);
void LookToRead_Init(CLookToRead *p);
typedef struct
{
ISeqInStream s;
ILookInStream *realStream;
} CSecToLook;
void SecToLook_CreateVTable(CSecToLook *p);
typedef struct
{
ISeqInStream s;
ILookInStream *realStream;
} CSecToRead;
void SecToRead_CreateVTable(CSecToRead *p);
typedef struct
{
SRes (*Progress)(void *p, UInt64 inSize, UInt64 outSize);
/* Returns: result. (result != SZ_OK) means break.
Value (UInt64)(Int64)-1 for size means unknown value. */
} ICompressProgress;
typedef struct
{
void *(*Alloc)(void *p, size_t size);
void (*Free)(void *p, void *address); /* address can be 0 */
} ISzAlloc;
#define IAlloc_Alloc(p, size) (p)->Alloc((p), size)
#define IAlloc_Free(p, a) (p)->Free((p), a)
#endif

236
extern/lzma/history.txt vendored Normal file
View File

@@ -0,0 +1,236 @@
HISTORY of the LZMA SDK
-----------------------
4.65 2009-02-03
-------------------------
- Some minor fixes
4.63 2008-12-31
-------------------------
- Some minor fixes
4.61 beta 2008-11-23
-------------------------
- The bug in ANSI-C LZMA Decoder was fixed:
If encoded stream was corrupted, decoder could access memory
outside of allocated range.
- Some changes in ANSI-C 7z Decoder interfaces.
- LZMA SDK is placed in the public domain.
4.60 beta 2008-08-19
-------------------------
- Some minor fixes.
4.59 beta 2008-08-13
-------------------------
- The bug was fixed:
LZMA Encoder in fast compression mode could access memory outside of
allocated range in some rare cases.
4.58 beta 2008-05-05
-------------------------
- ANSI-C LZMA Decoder was rewritten for speed optimizations.
- ANSI-C LZMA Encoder was included to LZMA SDK.
- C++ LZMA code now is just wrapper over ANSI-C code.
4.57 2007-12-12
-------------------------
- Speed optimizations in <20>++ LZMA Decoder.
- Small changes for more compatibility with some C/C++ compilers.
4.49 beta 2007-07-05
-------------------------
- .7z ANSI-C Decoder:
- now it supports BCJ and BCJ2 filters
- now it supports files larger than 4 GB.
- now it supports "Last Write Time" field for files.
- C++ code for .7z archives compressing/decompressing from 7-zip
was included to LZMA SDK.
4.43 2006-06-04
-------------------------
- Small changes for more compatibility with some C/C++ compilers.
4.42 2006-05-15
-------------------------
- Small changes in .h files in ANSI-C version.
4.39 beta 2006-04-14
-------------------------
- The bug in versions 4.33b:4.38b was fixed:
C++ version of LZMA encoder could not correctly compress
files larger than 2 GB with HC4 match finder (-mfhc4).
4.37 beta 2005-04-06
-------------------------
- Fixes in C++ code: code could no be compiled if _NO_EXCEPTIONS was defined.
4.35 beta 2005-03-02
-------------------------
- The bug was fixed in C++ version of LZMA Decoder:
If encoded stream was corrupted, decoder could access memory
outside of allocated range.
4.34 beta 2006-02-27
-------------------------
- Compressing speed and memory requirements for compressing were increased
- LZMA now can use only these match finders: HC4, BT2, BT3, BT4
4.32 2005-12-09
-------------------------
- Java version of LZMA SDK was included
4.30 2005-11-20
-------------------------
- Compression ratio was improved in -a2 mode
- Speed optimizations for compressing in -a2 mode
- -fb switch now supports values up to 273
- The bug in 7z_C (7zIn.c) was fixed:
It used Alloc/Free functions from different memory pools.
So if program used two memory pools, it worked incorrectly.
- 7z_C: .7z format supporting was improved
- LZMA# SDK (C#.NET version) was included
4.27 (Updated) 2005-09-21
-------------------------
- Some GUIDs/interfaces in C++ were changed.
IStream.h:
ISequentialInStream::Read now works as old ReadPart
ISequentialOutStream::Write now works as old WritePart
4.27 2005-08-07
-------------------------
- The bug in LzmaDecodeSize.c was fixed:
if _LZMA_IN_CB and _LZMA_OUT_READ were defined,
decompressing worked incorrectly.
4.26 2005-08-05
-------------------------
- Fixes in 7z_C code and LzmaTest.c:
previous versions could work incorrectly,
if malloc(0) returns 0
4.23 2005-06-29
-------------------------
- Small fixes in C++ code
4.22 2005-06-10
-------------------------
- Small fixes
4.21 2005-06-08
-------------------------
- Interfaces for ANSI-C LZMA Decoder (LzmaDecode.c) were changed
- New additional version of ANSI-C LZMA Decoder with zlib-like interface:
- LzmaStateDecode.h
- LzmaStateDecode.c
- LzmaStateTest.c
- ANSI-C LZMA Decoder now can decompress files larger than 4 GB
4.17 2005-04-18
-------------------------
- New example for RAM->RAM compressing/decompressing:
LZMA + BCJ (filter for x86 code):
- LzmaRam.h
- LzmaRam.cpp
- LzmaRamDecode.h
- LzmaRamDecode.c
- -f86 switch for lzma.exe
4.16 2005-03-29
-------------------------
- The bug was fixed in LzmaDecode.c (ANSI-C LZMA Decoder):
If _LZMA_OUT_READ was defined, and if encoded stream was corrupted,
decoder could access memory outside of allocated range.
- Speed optimization of ANSI-C LZMA Decoder (now it's about 20% faster).
Old version of LZMA Decoder now is in file LzmaDecodeSize.c.
LzmaDecodeSize.c can provide slightly smaller code than LzmaDecode.c
- Small speed optimization in LZMA C++ code
- filter for SPARC's code was added
- Simplified version of .7z ANSI-C Decoder was included
4.06 2004-09-05
-------------------------
- The bug in v4.05 was fixed:
LZMA-Encoder didn't release output stream in some cases.
4.05 2004-08-25
-------------------------
- Source code of filters for x86, IA-64, ARM, ARM-Thumb
and PowerPC code was included to SDK
- Some internal minor changes
4.04 2004-07-28
-------------------------
- More compatibility with some C++ compilers
4.03 2004-06-18
-------------------------
- "Benchmark" command was added. It measures compressing
and decompressing speed and shows rating values.
Also it checks hardware errors.
4.02 2004-06-10
-------------------------
- C++ LZMA Encoder/Decoder code now is more portable
and it can be compiled by GCC on Linux.
4.01 2004-02-15
-------------------------
- Some detection of data corruption was enabled.
LzmaDecode.c / RangeDecoderReadByte
.....
{
rd->ExtraBytes = 1;
return 0xFF;
}
4.00 2004-02-13
-------------------------
- Original version of LZMA SDK
HISTORY of the LZMA
-------------------
2001-2008: Improvements to LZMA compressing/decompressing code,
keeping compatibility with original LZMA format
1996-2001: Development of LZMA compression format
Some milestones:
2001-08-30: LZMA compression was added to 7-Zip
1999-01-02: First version of 7-Zip was released
End of document

594
extern/lzma/lzma.txt vendored Normal file
View File

@@ -0,0 +1,594 @@
LZMA SDK 4.65
-------------
LZMA SDK provides the documentation, samples, header files, libraries,
and tools you need to develop applications that use LZMA compression.
LZMA is default and general compression method of 7z format
in 7-Zip compression program (www.7-zip.org). LZMA provides high
compression ratio and very fast decompression.
LZMA is an improved version of famous LZ77 compression algorithm.
It was improved in way of maximum increasing of compression ratio,
keeping high decompression speed and low memory requirements for
decompressing.
LICENSE
-------
LZMA SDK is written and placed in the public domain by Igor Pavlov.
LZMA SDK Contents
-----------------
LZMA SDK includes:
- ANSI-C/C++/C#/Java source code for LZMA compressing and decompressing
- Compiled file->file LZMA compressing/decompressing program for Windows system
UNIX/Linux version
------------------
To compile C++ version of file->file LZMA encoding, go to directory
C++/7zip/Compress/LZMA_Alone
and call make to recompile it:
make -f makefile.gcc clean all
In some UNIX/Linux versions you must compile LZMA with static libraries.
To compile with static libraries, you can use
LIB = -lm -static
Files
---------------------
lzma.txt - LZMA SDK description (this file)
7zFormat.txt - 7z Format description
7zC.txt - 7z ANSI-C Decoder description
methods.txt - Compression method IDs for .7z
lzma.exe - Compiled file->file LZMA encoder/decoder for Windows
history.txt - history of the LZMA SDK
Source code structure
---------------------
C/ - C files
7zCrc*.* - CRC code
Alloc.* - Memory allocation functions
Bra*.* - Filters for x86, IA-64, ARM, ARM-Thumb, PowerPC and SPARC code
LzFind.* - Match finder for LZ (LZMA) encoders
LzFindMt.* - Match finder for LZ (LZMA) encoders for multithreading encoding
LzHash.h - Additional file for LZ match finder
LzmaDec.* - LZMA decoding
LzmaEnc.* - LZMA encoding
LzmaLib.* - LZMA Library for DLL calling
Types.h - Basic types for another .c files
Threads.* - The code for multithreading.
LzmaLib - LZMA Library (.DLL for Windows)
LzmaUtil - LZMA Utility (file->file LZMA encoder/decoder).
Archive - files related to archiving
7z - 7z ANSI-C Decoder
CPP/ -- CPP files
Common - common files for C++ projects
Windows - common files for Windows related code
7zip - files related to 7-Zip Project
Common - common files for 7-Zip
Compress - files related to compression/decompression
Copy - Copy coder
RangeCoder - Range Coder (special code of compression/decompression)
LZMA - LZMA compression/decompression on C++
LZMA_Alone - file->file LZMA compression/decompression
Branch - Filters for x86, IA-64, ARM, ARM-Thumb, PowerPC and SPARC code
Archive - files related to archiving
Common - common files for archive handling
7z - 7z C++ Encoder/Decoder
Bundles - Modules that are bundles of other modules
Alone7z - 7zr.exe: Standalone version of 7z.exe that supports only 7z/LZMA/BCJ/BCJ2
Format7zR - 7zr.dll: Reduced version of 7za.dll: extracting/compressing to 7z/LZMA/BCJ/BCJ2
Format7zExtractR - 7zxr.dll: Reduced version of 7zxa.dll: extracting from 7z/LZMA/BCJ/BCJ2.
UI - User Interface files
Client7z - Test application for 7za.dll, 7zr.dll, 7zxr.dll
Common - Common UI files
Console - Code for console archiver
CS/ - C# files
7zip
Common - some common files for 7-Zip
Compress - files related to compression/decompression
LZ - files related to LZ (Lempel-Ziv) compression algorithm
LZMA - LZMA compression/decompression
LzmaAlone - file->file LZMA compression/decompression
RangeCoder - Range Coder (special code of compression/decompression)
Java/ - Java files
SevenZip
Compression - files related to compression/decompression
LZ - files related to LZ (Lempel-Ziv) compression algorithm
LZMA - LZMA compression/decompression
RangeCoder - Range Coder (special code of compression/decompression)
C/C++ source code of LZMA SDK is part of 7-Zip project.
7-Zip source code can be downloaded from 7-Zip's SourceForge page:
http://sourceforge.net/projects/sevenzip/
LZMA features
-------------
- Variable dictionary size (up to 1 GB)
- Estimated compressing speed: about 2 MB/s on 2 GHz CPU
- Estimated decompressing speed:
- 20-30 MB/s on 2 GHz Core 2 or AMD Athlon 64
- 1-2 MB/s on 200 MHz ARM, MIPS, PowerPC or other simple RISC
- Small memory requirements for decompressing (16 KB + DictionarySize)
- Small code size for decompressing: 5-8 KB
LZMA decoder uses only integer operations and can be
implemented in any modern 32-bit CPU (or on 16-bit CPU with some conditions).
Some critical operations that affect the speed of LZMA decompression:
1) 32*16 bit integer multiply
2) Misspredicted branches (penalty mostly depends from pipeline length)
3) 32-bit shift and arithmetic operations
The speed of LZMA decompressing mostly depends from CPU speed.
Memory speed has no big meaning. But if your CPU has small data cache,
overall weight of memory speed will slightly increase.
How To Use
----------
Using LZMA encoder/decoder executable
--------------------------------------
Usage: LZMA <e|d> inputFile outputFile [<switches>...]
e: encode file
d: decode file
b: Benchmark. There are two tests: compressing and decompressing
with LZMA method. Benchmark shows rating in MIPS (million
instructions per second). Rating value is calculated from
measured speed and it is normalized with Intel's Core 2 results.
Also Benchmark checks possible hardware errors (RAM
errors in most cases). Benchmark uses these settings:
(-a1, -d21, -fb32, -mfbt4). You can change only -d parameter.
Also you can change the number of iterations. Example for 30 iterations:
LZMA b 30
Default number of iterations is 10.
<Switches>
-a{N}: set compression mode 0 = fast, 1 = normal
default: 1 (normal)
d{N}: Sets Dictionary size - [0, 30], default: 23 (8MB)
The maximum value for dictionary size is 1 GB = 2^30 bytes.
Dictionary size is calculated as DictionarySize = 2^N bytes.
For decompressing file compressed by LZMA method with dictionary
size D = 2^N you need about D bytes of memory (RAM).
-fb{N}: set number of fast bytes - [5, 273], default: 128
Usually big number gives a little bit better compression ratio
and slower compression process.
-lc{N}: set number of literal context bits - [0, 8], default: 3
Sometimes lc=4 gives gain for big files.
-lp{N}: set number of literal pos bits - [0, 4], default: 0
lp switch is intended for periodical data when period is
equal 2^N. For example, for 32-bit (4 bytes)
periodical data you can use lp=2. Often it's better to set lc0,
if you change lp switch.
-pb{N}: set number of pos bits - [0, 4], default: 2
pb switch is intended for periodical data
when period is equal 2^N.
-mf{MF_ID}: set Match Finder. Default: bt4.
Algorithms from hc* group doesn't provide good compression
ratio, but they often works pretty fast in combination with
fast mode (-a0).
Memory requirements depend from dictionary size
(parameter "d" in table below).
MF_ID Memory Description
bt2 d * 9.5 + 4MB Binary Tree with 2 bytes hashing.
bt3 d * 11.5 + 4MB Binary Tree with 3 bytes hashing.
bt4 d * 11.5 + 4MB Binary Tree with 4 bytes hashing.
hc4 d * 7.5 + 4MB Hash Chain with 4 bytes hashing.
-eos: write End Of Stream marker. By default LZMA doesn't write
eos marker, since LZMA decoder knows uncompressed size
stored in .lzma file header.
-si: Read data from stdin (it will write End Of Stream marker).
-so: Write data to stdout
Examples:
1) LZMA e file.bin file.lzma -d16 -lc0
compresses file.bin to file.lzma with 64 KB dictionary (2^16=64K)
and 0 literal context bits. -lc0 allows to reduce memory requirements
for decompression.
2) LZMA e file.bin file.lzma -lc0 -lp2
compresses file.bin to file.lzma with settings suitable
for 32-bit periodical data (for example, ARM or MIPS code).
3) LZMA d file.lzma file.bin
decompresses file.lzma to file.bin.
Compression ratio hints
-----------------------
Recommendations
---------------
To increase the compression ratio for LZMA compressing it's desirable
to have aligned data (if it's possible) and also it's desirable to locate
data in such order, where code is grouped in one place and data is
grouped in other place (it's better than such mixing: code, data, code,
data, ...).
Filters
-------
You can increase the compression ratio for some data types, using
special filters before compressing. For example, it's possible to
increase the compression ratio on 5-10% for code for those CPU ISAs:
x86, IA-64, ARM, ARM-Thumb, PowerPC, SPARC.
You can find C source code of such filters in C/Bra*.* files
You can check the compression ratio gain of these filters with such
7-Zip commands (example for ARM code):
No filter:
7z a a1.7z a.bin -m0=lzma
With filter for little-endian ARM code:
7z a a2.7z a.bin -m0=arm -m1=lzma
It works in such manner:
Compressing = Filter_encoding + LZMA_encoding
Decompressing = LZMA_decoding + Filter_decoding
Compressing and decompressing speed of such filters is very high,
so it will not increase decompressing time too much.
Moreover, it reduces decompression time for LZMA_decoding,
since compression ratio with filtering is higher.
These filters convert CALL (calling procedure) instructions
from relative offsets to absolute addresses, so such data becomes more
compressible.
For some ISAs (for example, for MIPS) it's impossible to get gain from such filter.
LZMA compressed file format
---------------------------
Offset Size Description
0 1 Special LZMA properties (lc,lp, pb in encoded form)
1 4 Dictionary size (little endian)
5 8 Uncompressed size (little endian). -1 means unknown size
13 Compressed data
ANSI-C LZMA Decoder
~~~~~~~~~~~~~~~~~~~
Please note that interfaces for ANSI-C code were changed in LZMA SDK 4.58.
If you want to use old interfaces you can download previous version of LZMA SDK
from sourceforge.net site.
To use ANSI-C LZMA Decoder you need the following files:
1) LzmaDec.h + LzmaDec.c + Types.h
LzmaUtil/LzmaUtil.c is example application that uses these files.
Memory requirements for LZMA decoding
-------------------------------------
Stack usage of LZMA decoding function for local variables is not
larger than 200-400 bytes.
LZMA Decoder uses dictionary buffer and internal state structure.
Internal state structure consumes
state_size = (4 + (1.5 << (lc + lp))) KB
by default (lc=3, lp=0), state_size = 16 KB.
How To decompress data
----------------------
LZMA Decoder (ANSI-C version) now supports 2 interfaces:
1) Single-call Decompressing
2) Multi-call State Decompressing (zlib-like interface)
You must use external allocator:
Example:
void *SzAlloc(void *p, size_t size) { p = p; return malloc(size); }
void SzFree(void *p, void *address) { p = p; free(address); }
ISzAlloc alloc = { SzAlloc, SzFree };
You can use p = p; operator to disable compiler warnings.
Single-call Decompressing
-------------------------
When to use: RAM->RAM decompressing
Compile files: LzmaDec.h + LzmaDec.c + Types.h
Compile defines: no defines
Memory Requirements:
- Input buffer: compressed size
- Output buffer: uncompressed size
- LZMA Internal Structures: state_size (16 KB for default settings)
Interface:
int LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode,
ELzmaStatus *status, ISzAlloc *alloc);
In:
dest - output data
destLen - output data size
src - input data
srcLen - input data size
propData - LZMA properties (5 bytes)
propSize - size of propData buffer (5 bytes)
finishMode - It has meaning only if the decoding reaches output limit (*destLen).
LZMA_FINISH_ANY - Decode just destLen bytes.
LZMA_FINISH_END - Stream must be finished after (*destLen).
You can use LZMA_FINISH_END, when you know that
current output buffer covers last bytes of stream.
alloc - Memory allocator.
Out:
destLen - processed output size
srcLen - processed input size
Output:
SZ_OK
status:
LZMA_STATUS_FINISHED_WITH_MARK
LZMA_STATUS_NOT_FINISHED
LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK
SZ_ERROR_DATA - Data error
SZ_ERROR_MEM - Memory allocation error
SZ_ERROR_UNSUPPORTED - Unsupported properties
SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src).
If LZMA decoder sees end_marker before reaching output limit, it returns OK result,
and output value of destLen will be less than output buffer size limit.
You can use multiple checks to test data integrity after full decompression:
1) Check Result and "status" variable.
2) Check that output(destLen) = uncompressedSize, if you know real uncompressedSize.
3) Check that output(srcLen) = compressedSize, if you know real compressedSize.
You must use correct finish mode in that case. */
Multi-call State Decompressing (zlib-like interface)
----------------------------------------------------
When to use: file->file decompressing
Compile files: LzmaDec.h + LzmaDec.c + Types.h
Memory Requirements:
- Buffer for input stream: any size (for example, 16 KB)
- Buffer for output stream: any size (for example, 16 KB)
- LZMA Internal Structures: state_size (16 KB for default settings)
- LZMA dictionary (dictionary size is encoded in LZMA properties header)
1) read LZMA properties (5 bytes) and uncompressed size (8 bytes, little-endian) to header:
unsigned char header[LZMA_PROPS_SIZE + 8];
ReadFile(inFile, header, sizeof(header)
2) Allocate CLzmaDec structures (state + dictionary) using LZMA properties
CLzmaDec state;
LzmaDec_Constr(&state);
res = LzmaDec_Allocate(&state, header, LZMA_PROPS_SIZE, &g_Alloc);
if (res != SZ_OK)
return res;
3) Init LzmaDec structure before any new LZMA stream. And call LzmaDec_DecodeToBuf in loop
LzmaDec_Init(&state);
for (;;)
{
...
int res = LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen,
const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode);
...
}
4) Free all allocated structures
LzmaDec_Free(&state, &g_Alloc);
For full code example, look at C/LzmaUtil/LzmaUtil.c code.
How To compress data
--------------------
Compile files: LzmaEnc.h + LzmaEnc.c + Types.h +
LzFind.c + LzFind.h + LzFindMt.c + LzFindMt.h + LzHash.h
Memory Requirements:
- (dictSize * 11.5 + 6 MB) + state_size
Lzma Encoder can use two memory allocators:
1) alloc - for small arrays.
2) allocBig - for big arrays.
For example, you can use Large RAM Pages (2 MB) in allocBig allocator for
better compression speed. Note that Windows has bad implementation for
Large RAM Pages.
It's OK to use same allocator for alloc and allocBig.
Single-call Compression with callbacks
--------------------------------------
Check C/LzmaUtil/LzmaUtil.c as example,
When to use: file->file decompressing
1) you must implement callback structures for interfaces:
ISeqInStream
ISeqOutStream
ICompressProgress
ISzAlloc
static void *SzAlloc(void *p, size_t size) { p = p; return MyAlloc(size); }
static void SzFree(void *p, void *address) { p = p; MyFree(address); }
static ISzAlloc g_Alloc = { SzAlloc, SzFree };
CFileSeqInStream inStream;
CFileSeqOutStream outStream;
inStream.funcTable.Read = MyRead;
inStream.file = inFile;
outStream.funcTable.Write = MyWrite;
outStream.file = outFile;
2) Create CLzmaEncHandle object;
CLzmaEncHandle enc;
enc = LzmaEnc_Create(&g_Alloc);
if (enc == 0)
return SZ_ERROR_MEM;
3) initialize CLzmaEncProps properties;
LzmaEncProps_Init(&props);
Then you can change some properties in that structure.
4) Send LZMA properties to LZMA Encoder
res = LzmaEnc_SetProps(enc, &props);
5) Write encoded properties to header
Byte header[LZMA_PROPS_SIZE + 8];
size_t headerSize = LZMA_PROPS_SIZE;
UInt64 fileSize;
int i;
res = LzmaEnc_WriteProperties(enc, header, &headerSize);
fileSize = MyGetFileLength(inFile);
for (i = 0; i < 8; i++)
header[headerSize++] = (Byte)(fileSize >> (8 * i));
MyWriteFileAndCheck(outFile, header, headerSize)
6) Call encoding function:
res = LzmaEnc_Encode(enc, &outStream.funcTable, &inStream.funcTable,
NULL, &g_Alloc, &g_Alloc);
7) Destroy LZMA Encoder Object
LzmaEnc_Destroy(enc, &g_Alloc, &g_Alloc);
If callback function return some error code, LzmaEnc_Encode also returns that code.
Single-call RAM->RAM Compression
--------------------------------
Single-call RAM->RAM Compression is similar to Compression with callbacks,
but you provide pointers to buffers instead of pointers to stream callbacks:
HRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark,
ICompressProgress *progress, ISzAlloc *alloc, ISzAlloc *allocBig);
Return code:
SZ_OK - OK
SZ_ERROR_MEM - Memory allocation error
SZ_ERROR_PARAM - Incorrect paramater
SZ_ERROR_OUTPUT_EOF - output buffer overflow
SZ_ERROR_THREAD - errors in multithreading functions (only for Mt version)
LZMA Defines
------------
_LZMA_SIZE_OPT - Enable some optimizations in LZMA Decoder to get smaller executable code.
_LZMA_PROB32 - It can increase the speed on some 32-bit CPUs, but memory usage for
some structures will be doubled in that case.
_LZMA_UINT32_IS_ULONG - Define it if int is 16-bit on your compiler and long is 32-bit.
_LZMA_NO_SYSTEM_SIZE_T - Define it if you don't want to use size_t type.
C++ LZMA Encoder/Decoder
~~~~~~~~~~~~~~~~~~~~~~~~
C++ LZMA code use COM-like interfaces. So if you want to use it,
you can study basics of COM/OLE.
C++ LZMA code is just wrapper over ANSI-C code.
C++ Notes
~~~~~~~~~~~~~~~~~~~~~~~~
If you use some C++ code folders in 7-Zip (for example, C++ code for .7z handling),
you must check that you correctly work with "new" operator.
7-Zip can be compiled with MSVC 6.0 that doesn't throw "exception" from "new" operator.
So 7-Zip uses "CPP\Common\NewHandler.cpp" that redefines "new" operator:
operator new(size_t size)
{
void *p = ::malloc(size);
if (p == 0)
throw CNewException();
return p;
}
If you use MSCV that throws exception for "new" operator, you can compile without
"NewHandler.cpp". So standard exception will be used. Actually some code of
7-Zip catches any exception in internal code and converts it to HRESULT code.
So you don't need to catch CNewException, if you call COM interfaces of 7-Zip.
---
http://www.7-zip.org
http://www.7-zip.org/sdk.html
http://www.7-zip.org/support.html