External: update LZMA SDK to latest version
Update from version 4.65 (2009) to latest 23.01 (2023). LZMA is only used for pointcache "Heavy" compression option. With updated SDK, testing in a 100k particle system point cache (Windows/Ryzen5950X), average times to read or write a cached frame: - Read a bit faster, 24.3 -> 23.2ms - Write/bake about 2x faster, 237 -> 103ms Pull Request: https://projects.blender.org/blender/blender/pulls/118433
This commit is contained in:
committed by
Aras Pranckevicius
parent
207861af2b
commit
3f0b9e4a58
597
extern/lzma/7zTypes.h
vendored
Normal file
597
extern/lzma/7zTypes.h
vendored
Normal file
@@ -0,0 +1,597 @@
|
||||
/* 7zTypes.h -- Basic types
|
||||
2023-04-02 : Igor Pavlov : Public domain */
|
||||
|
||||
#ifndef ZIP7_7Z_TYPES_H
|
||||
#define ZIP7_7Z_TYPES_H
|
||||
|
||||
#ifdef _WIN32
|
||||
/* #include <windows.h> */
|
||||
#else
|
||||
#include <errno.h>
|
||||
#endif
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#ifndef EXTERN_C_BEGIN
|
||||
#ifdef __cplusplus
|
||||
#define EXTERN_C_BEGIN extern "C" {
|
||||
#define EXTERN_C_END }
|
||||
#else
|
||||
#define EXTERN_C_BEGIN
|
||||
#define EXTERN_C_END
|
||||
#endif
|
||||
#endif
|
||||
|
||||
EXTERN_C_BEGIN
|
||||
|
||||
#define SZ_OK 0
|
||||
|
||||
#define SZ_ERROR_DATA 1
|
||||
#define SZ_ERROR_MEM 2
|
||||
#define SZ_ERROR_CRC 3
|
||||
#define SZ_ERROR_UNSUPPORTED 4
|
||||
#define SZ_ERROR_PARAM 5
|
||||
#define SZ_ERROR_INPUT_EOF 6
|
||||
#define SZ_ERROR_OUTPUT_EOF 7
|
||||
#define SZ_ERROR_READ 8
|
||||
#define SZ_ERROR_WRITE 9
|
||||
#define SZ_ERROR_PROGRESS 10
|
||||
#define SZ_ERROR_FAIL 11
|
||||
#define SZ_ERROR_THREAD 12
|
||||
|
||||
#define SZ_ERROR_ARCHIVE 16
|
||||
#define SZ_ERROR_NO_ARCHIVE 17
|
||||
|
||||
typedef int SRes;
|
||||
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#if _MSC_VER > 1200
|
||||
#define MY_ALIGN(n) __declspec(align(n))
|
||||
#else
|
||||
#define MY_ALIGN(n)
|
||||
#endif
|
||||
#else
|
||||
/*
|
||||
// C11/C++11:
|
||||
#include <stdalign.h>
|
||||
#define MY_ALIGN(n) alignas(n)
|
||||
*/
|
||||
#define MY_ALIGN(n) __attribute__ ((aligned(n)))
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef _WIN32
|
||||
|
||||
/* typedef DWORD WRes; */
|
||||
typedef unsigned WRes;
|
||||
#define MY_SRes_HRESULT_FROM_WRes(x) HRESULT_FROM_WIN32(x)
|
||||
|
||||
// #define MY_HRES_ERROR_INTERNAL_ERROR MY_SRes_HRESULT_FROM_WRes(ERROR_INTERNAL_ERROR)
|
||||
|
||||
#else // _WIN32
|
||||
|
||||
// #define ENV_HAVE_LSTAT
|
||||
typedef int WRes;
|
||||
|
||||
// (FACILITY_ERRNO = 0x800) is 7zip's FACILITY constant to represent (errno) errors in HRESULT
|
||||
#define MY_FACILITY_ERRNO 0x800
|
||||
#define MY_FACILITY_WIN32 7
|
||||
#define MY_FACILITY_WRes MY_FACILITY_ERRNO
|
||||
|
||||
#define MY_HRESULT_FROM_errno_CONST_ERROR(x) ((HRESULT)( \
|
||||
( (HRESULT)(x) & 0x0000FFFF) \
|
||||
| (MY_FACILITY_WRes << 16) \
|
||||
| (HRESULT)0x80000000 ))
|
||||
|
||||
#define MY_SRes_HRESULT_FROM_WRes(x) \
|
||||
((HRESULT)(x) <= 0 ? ((HRESULT)(x)) : MY_HRESULT_FROM_errno_CONST_ERROR(x))
|
||||
|
||||
// we call macro HRESULT_FROM_WIN32 for system errors (WRes) that are (errno)
|
||||
#define HRESULT_FROM_WIN32(x) MY_SRes_HRESULT_FROM_WRes(x)
|
||||
|
||||
/*
|
||||
#define ERROR_FILE_NOT_FOUND 2L
|
||||
#define ERROR_ACCESS_DENIED 5L
|
||||
#define ERROR_NO_MORE_FILES 18L
|
||||
#define ERROR_LOCK_VIOLATION 33L
|
||||
#define ERROR_FILE_EXISTS 80L
|
||||
#define ERROR_DISK_FULL 112L
|
||||
#define ERROR_NEGATIVE_SEEK 131L
|
||||
#define ERROR_ALREADY_EXISTS 183L
|
||||
#define ERROR_DIRECTORY 267L
|
||||
#define ERROR_TOO_MANY_POSTS 298L
|
||||
|
||||
#define ERROR_INTERNAL_ERROR 1359L
|
||||
#define ERROR_INVALID_REPARSE_DATA 4392L
|
||||
#define ERROR_REPARSE_TAG_INVALID 4393L
|
||||
#define ERROR_REPARSE_TAG_MISMATCH 4394L
|
||||
*/
|
||||
|
||||
// we use errno equivalents for some WIN32 errors:
|
||||
|
||||
#define ERROR_INVALID_PARAMETER EINVAL
|
||||
#define ERROR_INVALID_FUNCTION EINVAL
|
||||
#define ERROR_ALREADY_EXISTS EEXIST
|
||||
#define ERROR_FILE_EXISTS EEXIST
|
||||
#define ERROR_PATH_NOT_FOUND ENOENT
|
||||
#define ERROR_FILE_NOT_FOUND ENOENT
|
||||
#define ERROR_DISK_FULL ENOSPC
|
||||
// #define ERROR_INVALID_HANDLE EBADF
|
||||
|
||||
// we use FACILITY_WIN32 for errors that has no errno equivalent
|
||||
// Too many posts were made to a semaphore.
|
||||
#define ERROR_TOO_MANY_POSTS ((HRESULT)0x8007012AL)
|
||||
#define ERROR_INVALID_REPARSE_DATA ((HRESULT)0x80071128L)
|
||||
#define ERROR_REPARSE_TAG_INVALID ((HRESULT)0x80071129L)
|
||||
|
||||
// if (MY_FACILITY_WRes != FACILITY_WIN32),
|
||||
// we use FACILITY_WIN32 for COM errors:
|
||||
#define E_OUTOFMEMORY ((HRESULT)0x8007000EL)
|
||||
#define E_INVALIDARG ((HRESULT)0x80070057L)
|
||||
#define MY_E_ERROR_NEGATIVE_SEEK ((HRESULT)0x80070083L)
|
||||
|
||||
/*
|
||||
// we can use FACILITY_ERRNO for some COM errors, that have errno equivalents:
|
||||
#define E_OUTOFMEMORY MY_HRESULT_FROM_errno_CONST_ERROR(ENOMEM)
|
||||
#define E_INVALIDARG MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL)
|
||||
#define MY_E_ERROR_NEGATIVE_SEEK MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL)
|
||||
*/
|
||||
|
||||
#define TEXT(quote) quote
|
||||
|
||||
#define FILE_ATTRIBUTE_READONLY 0x0001
|
||||
#define FILE_ATTRIBUTE_HIDDEN 0x0002
|
||||
#define FILE_ATTRIBUTE_SYSTEM 0x0004
|
||||
#define FILE_ATTRIBUTE_DIRECTORY 0x0010
|
||||
#define FILE_ATTRIBUTE_ARCHIVE 0x0020
|
||||
#define FILE_ATTRIBUTE_DEVICE 0x0040
|
||||
#define FILE_ATTRIBUTE_NORMAL 0x0080
|
||||
#define FILE_ATTRIBUTE_TEMPORARY 0x0100
|
||||
#define FILE_ATTRIBUTE_SPARSE_FILE 0x0200
|
||||
#define FILE_ATTRIBUTE_REPARSE_POINT 0x0400
|
||||
#define FILE_ATTRIBUTE_COMPRESSED 0x0800
|
||||
#define FILE_ATTRIBUTE_OFFLINE 0x1000
|
||||
#define FILE_ATTRIBUTE_NOT_CONTENT_INDEXED 0x2000
|
||||
#define FILE_ATTRIBUTE_ENCRYPTED 0x4000
|
||||
|
||||
#define FILE_ATTRIBUTE_UNIX_EXTENSION 0x8000 /* trick for Unix */
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef RINOK
|
||||
#define RINOK(x) { const int _result_ = (x); if (_result_ != 0) return _result_; }
|
||||
#endif
|
||||
|
||||
#ifndef RINOK_WRes
|
||||
#define RINOK_WRes(x) { const WRes _result_ = (x); if (_result_ != 0) return _result_; }
|
||||
#endif
|
||||
|
||||
typedef unsigned char Byte;
|
||||
typedef short Int16;
|
||||
typedef unsigned short UInt16;
|
||||
|
||||
#ifdef Z7_DECL_Int32_AS_long
|
||||
typedef long Int32;
|
||||
typedef unsigned long UInt32;
|
||||
#else
|
||||
typedef int Int32;
|
||||
typedef unsigned int UInt32;
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef _WIN32
|
||||
|
||||
typedef int INT;
|
||||
typedef Int32 INT32;
|
||||
typedef unsigned int UINT;
|
||||
typedef UInt32 UINT32;
|
||||
typedef INT32 LONG; // LONG, ULONG and DWORD must be 32-bit for _WIN32 compatibility
|
||||
typedef UINT32 ULONG;
|
||||
|
||||
#undef DWORD
|
||||
typedef UINT32 DWORD;
|
||||
|
||||
#define VOID void
|
||||
|
||||
#define HRESULT LONG
|
||||
|
||||
typedef void *LPVOID;
|
||||
// typedef void VOID;
|
||||
// typedef ULONG_PTR DWORD_PTR, *PDWORD_PTR;
|
||||
// gcc / clang on Unix : sizeof(long==sizeof(void*) in 32 or 64 bits)
|
||||
typedef long INT_PTR;
|
||||
typedef unsigned long UINT_PTR;
|
||||
typedef long LONG_PTR;
|
||||
typedef unsigned long DWORD_PTR;
|
||||
|
||||
typedef size_t SIZE_T;
|
||||
|
||||
#endif // _WIN32
|
||||
|
||||
|
||||
#define MY_HRES_ERROR_INTERNAL_ERROR ((HRESULT)0x8007054FL)
|
||||
|
||||
|
||||
#ifdef Z7_DECL_Int64_AS_long
|
||||
|
||||
typedef long Int64;
|
||||
typedef unsigned long UInt64;
|
||||
|
||||
#else
|
||||
|
||||
#if (defined(_MSC_VER) || defined(__BORLANDC__)) && !defined(__clang__)
|
||||
typedef __int64 Int64;
|
||||
typedef unsigned __int64 UInt64;
|
||||
#else
|
||||
#if defined(__clang__) || defined(__GNUC__)
|
||||
#include <stdint.h>
|
||||
typedef int64_t Int64;
|
||||
typedef uint64_t UInt64;
|
||||
#else
|
||||
typedef long long int Int64;
|
||||
typedef unsigned long long int UInt64;
|
||||
// #define UINT64_CONST(n) n ## ULL
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#define UINT64_CONST(n) n
|
||||
|
||||
|
||||
#ifdef Z7_DECL_SizeT_AS_unsigned_int
|
||||
typedef unsigned int SizeT;
|
||||
#else
|
||||
typedef size_t SizeT;
|
||||
#endif
|
||||
|
||||
/*
|
||||
#if (defined(_MSC_VER) && _MSC_VER <= 1200)
|
||||
typedef size_t MY_uintptr_t;
|
||||
#else
|
||||
#include <stdint.h>
|
||||
typedef uintptr_t MY_uintptr_t;
|
||||
#endif
|
||||
*/
|
||||
|
||||
typedef int BoolInt;
|
||||
/* typedef BoolInt Bool; */
|
||||
#define True 1
|
||||
#define False 0
|
||||
|
||||
|
||||
#ifdef _WIN32
|
||||
#define Z7_STDCALL __stdcall
|
||||
#else
|
||||
#define Z7_STDCALL
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
|
||||
#if _MSC_VER >= 1300
|
||||
#define Z7_NO_INLINE __declspec(noinline)
|
||||
#else
|
||||
#define Z7_NO_INLINE
|
||||
#endif
|
||||
|
||||
#define Z7_FORCE_INLINE __forceinline
|
||||
|
||||
#define Z7_CDECL __cdecl
|
||||
#define Z7_FASTCALL __fastcall
|
||||
|
||||
#else // _MSC_VER
|
||||
|
||||
#if (defined(__GNUC__) && (__GNUC__ >= 4)) \
|
||||
|| (defined(__clang__) && (__clang_major__ >= 4)) \
|
||||
|| defined(__INTEL_COMPILER) \
|
||||
|| defined(__xlC__)
|
||||
#define Z7_NO_INLINE __attribute__((noinline))
|
||||
#define Z7_FORCE_INLINE __attribute__((always_inline)) inline
|
||||
#else
|
||||
#define Z7_NO_INLINE
|
||||
#define Z7_FORCE_INLINE
|
||||
#endif
|
||||
|
||||
#define Z7_CDECL
|
||||
|
||||
#if defined(_M_IX86) \
|
||||
|| defined(__i386__)
|
||||
// #define Z7_FASTCALL __attribute__((fastcall))
|
||||
// #define Z7_FASTCALL __attribute__((cdecl))
|
||||
#define Z7_FASTCALL
|
||||
#elif defined(MY_CPU_AMD64)
|
||||
// #define Z7_FASTCALL __attribute__((ms_abi))
|
||||
#define Z7_FASTCALL
|
||||
#else
|
||||
#define Z7_FASTCALL
|
||||
#endif
|
||||
|
||||
#endif // _MSC_VER
|
||||
|
||||
|
||||
/* The following interfaces use first parameter as pointer to structure */
|
||||
|
||||
// #define Z7_C_IFACE_CONST_QUAL
|
||||
#define Z7_C_IFACE_CONST_QUAL const
|
||||
|
||||
#define Z7_C_IFACE_DECL(a) \
|
||||
struct a ## _; \
|
||||
typedef Z7_C_IFACE_CONST_QUAL struct a ## _ * a ## Ptr; \
|
||||
typedef struct a ## _ a; \
|
||||
struct a ## _
|
||||
|
||||
|
||||
Z7_C_IFACE_DECL (IByteIn)
|
||||
{
|
||||
Byte (*Read)(IByteInPtr p); /* reads one byte, returns 0 in case of EOF or error */
|
||||
};
|
||||
#define IByteIn_Read(p) (p)->Read(p)
|
||||
|
||||
|
||||
Z7_C_IFACE_DECL (IByteOut)
|
||||
{
|
||||
void (*Write)(IByteOutPtr p, Byte b);
|
||||
};
|
||||
#define IByteOut_Write(p, b) (p)->Write(p, b)
|
||||
|
||||
|
||||
Z7_C_IFACE_DECL (ISeqInStream)
|
||||
{
|
||||
SRes (*Read)(ISeqInStreamPtr p, void *buf, size_t *size);
|
||||
/* if (input(*size) != 0 && output(*size) == 0) means end_of_stream.
|
||||
(output(*size) < input(*size)) is allowed */
|
||||
};
|
||||
#define ISeqInStream_Read(p, buf, size) (p)->Read(p, buf, size)
|
||||
|
||||
/* try to read as much as avail in stream and limited by (*processedSize) */
|
||||
SRes SeqInStream_ReadMax(ISeqInStreamPtr stream, void *buf, size_t *processedSize);
|
||||
/* it can return SZ_ERROR_INPUT_EOF */
|
||||
// SRes SeqInStream_Read(ISeqInStreamPtr stream, void *buf, size_t size);
|
||||
// SRes SeqInStream_Read2(ISeqInStreamPtr stream, void *buf, size_t size, SRes errorType);
|
||||
SRes SeqInStream_ReadByte(ISeqInStreamPtr stream, Byte *buf);
|
||||
|
||||
|
||||
Z7_C_IFACE_DECL (ISeqOutStream)
|
||||
{
|
||||
size_t (*Write)(ISeqOutStreamPtr p, const void *buf, size_t size);
|
||||
/* Returns: result - the number of actually written bytes.
|
||||
(result < size) means error */
|
||||
};
|
||||
#define ISeqOutStream_Write(p, buf, size) (p)->Write(p, buf, size)
|
||||
|
||||
typedef enum
|
||||
{
|
||||
SZ_SEEK_SET = 0,
|
||||
SZ_SEEK_CUR = 1,
|
||||
SZ_SEEK_END = 2
|
||||
} ESzSeek;
|
||||
|
||||
|
||||
Z7_C_IFACE_DECL (ISeekInStream)
|
||||
{
|
||||
SRes (*Read)(ISeekInStreamPtr p, void *buf, size_t *size); /* same as ISeqInStream::Read */
|
||||
SRes (*Seek)(ISeekInStreamPtr p, Int64 *pos, ESzSeek origin);
|
||||
};
|
||||
#define ISeekInStream_Read(p, buf, size) (p)->Read(p, buf, size)
|
||||
#define ISeekInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin)
|
||||
|
||||
|
||||
Z7_C_IFACE_DECL (ILookInStream)
|
||||
{
|
||||
SRes (*Look)(ILookInStreamPtr p, const void **buf, size_t *size);
|
||||
/* if (input(*size) != 0 && output(*size) == 0) means end_of_stream.
|
||||
(output(*size) > input(*size)) is not allowed
|
||||
(output(*size) < input(*size)) is allowed */
|
||||
SRes (*Skip)(ILookInStreamPtr p, size_t offset);
|
||||
/* offset must be <= output(*size) of Look */
|
||||
SRes (*Read)(ILookInStreamPtr p, void *buf, size_t *size);
|
||||
/* reads directly (without buffer). It's same as ISeqInStream::Read */
|
||||
SRes (*Seek)(ILookInStreamPtr p, Int64 *pos, ESzSeek origin);
|
||||
};
|
||||
|
||||
#define ILookInStream_Look(p, buf, size) (p)->Look(p, buf, size)
|
||||
#define ILookInStream_Skip(p, offset) (p)->Skip(p, offset)
|
||||
#define ILookInStream_Read(p, buf, size) (p)->Read(p, buf, size)
|
||||
#define ILookInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin)
|
||||
|
||||
|
||||
SRes LookInStream_LookRead(ILookInStreamPtr stream, void *buf, size_t *size);
|
||||
SRes LookInStream_SeekTo(ILookInStreamPtr stream, UInt64 offset);
|
||||
|
||||
/* reads via ILookInStream::Read */
|
||||
SRes LookInStream_Read2(ILookInStreamPtr stream, void *buf, size_t size, SRes errorType);
|
||||
SRes LookInStream_Read(ILookInStreamPtr stream, void *buf, size_t size);
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
ILookInStream vt;
|
||||
ISeekInStreamPtr realStream;
|
||||
|
||||
size_t pos;
|
||||
size_t size; /* it's data size */
|
||||
|
||||
/* the following variables must be set outside */
|
||||
Byte *buf;
|
||||
size_t bufSize;
|
||||
} CLookToRead2;
|
||||
|
||||
void LookToRead2_CreateVTable(CLookToRead2 *p, int lookahead);
|
||||
|
||||
#define LookToRead2_INIT(p) { (p)->pos = (p)->size = 0; }
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
ISeqInStream vt;
|
||||
ILookInStreamPtr realStream;
|
||||
} CSecToLook;
|
||||
|
||||
void SecToLook_CreateVTable(CSecToLook *p);
|
||||
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
ISeqInStream vt;
|
||||
ILookInStreamPtr realStream;
|
||||
} CSecToRead;
|
||||
|
||||
void SecToRead_CreateVTable(CSecToRead *p);
|
||||
|
||||
|
||||
Z7_C_IFACE_DECL (ICompressProgress)
|
||||
{
|
||||
SRes (*Progress)(ICompressProgressPtr p, UInt64 inSize, UInt64 outSize);
|
||||
/* Returns: result. (result != SZ_OK) means break.
|
||||
Value (UInt64)(Int64)-1 for size means unknown value. */
|
||||
};
|
||||
|
||||
#define ICompressProgress_Progress(p, inSize, outSize) (p)->Progress(p, inSize, outSize)
|
||||
|
||||
|
||||
|
||||
typedef struct ISzAlloc ISzAlloc;
|
||||
typedef const ISzAlloc * ISzAllocPtr;
|
||||
|
||||
struct ISzAlloc
|
||||
{
|
||||
void *(*Alloc)(ISzAllocPtr p, size_t size);
|
||||
void (*Free)(ISzAllocPtr p, void *address); /* address can be 0 */
|
||||
};
|
||||
|
||||
#define ISzAlloc_Alloc(p, size) (p)->Alloc(p, size)
|
||||
#define ISzAlloc_Free(p, a) (p)->Free(p, a)
|
||||
|
||||
/* deprecated */
|
||||
#define IAlloc_Alloc(p, size) ISzAlloc_Alloc(p, size)
|
||||
#define IAlloc_Free(p, a) ISzAlloc_Free(p, a)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#ifndef MY_offsetof
|
||||
#ifdef offsetof
|
||||
#define MY_offsetof(type, m) offsetof(type, m)
|
||||
/*
|
||||
#define MY_offsetof(type, m) FIELD_OFFSET(type, m)
|
||||
*/
|
||||
#else
|
||||
#define MY_offsetof(type, m) ((size_t)&(((type *)0)->m))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
#ifndef Z7_container_of
|
||||
|
||||
/*
|
||||
#define Z7_container_of(ptr, type, m) container_of(ptr, type, m)
|
||||
#define Z7_container_of(ptr, type, m) CONTAINING_RECORD(ptr, type, m)
|
||||
#define Z7_container_of(ptr, type, m) ((type *)((char *)(ptr) - offsetof(type, m)))
|
||||
#define Z7_container_of(ptr, type, m) (&((type *)0)->m == (ptr), ((type *)(((char *)(ptr)) - MY_offsetof(type, m))))
|
||||
*/
|
||||
|
||||
/*
|
||||
GCC shows warning: "perhaps the 'offsetof' macro was used incorrectly"
|
||||
GCC 3.4.4 : classes with constructor
|
||||
GCC 4.8.1 : classes with non-public variable members"
|
||||
*/
|
||||
|
||||
#define Z7_container_of(ptr, type, m) \
|
||||
((type *)(void *)((char *)(void *) \
|
||||
(1 ? (ptr) : &((type *)NULL)->m) - MY_offsetof(type, m)))
|
||||
|
||||
#define Z7_container_of_CONST(ptr, type, m) \
|
||||
((const type *)(const void *)((const char *)(const void *) \
|
||||
(1 ? (ptr) : &((type *)NULL)->m) - MY_offsetof(type, m)))
|
||||
|
||||
/*
|
||||
#define Z7_container_of_NON_CONST_FROM_CONST(ptr, type, m) \
|
||||
((type *)(void *)(const void *)((const char *)(const void *) \
|
||||
(1 ? (ptr) : &((type *)NULL)->m) - MY_offsetof(type, m)))
|
||||
*/
|
||||
|
||||
#endif
|
||||
|
||||
#define Z7_CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) ((type *)(void *)(ptr))
|
||||
|
||||
// #define Z7_CONTAINER_FROM_VTBL(ptr, type, m) Z7_CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m)
|
||||
#define Z7_CONTAINER_FROM_VTBL(ptr, type, m) Z7_container_of(ptr, type, m)
|
||||
// #define Z7_CONTAINER_FROM_VTBL(ptr, type, m) Z7_container_of_NON_CONST_FROM_CONST(ptr, type, m)
|
||||
|
||||
#define Z7_CONTAINER_FROM_VTBL_CONST(ptr, type, m) Z7_container_of_CONST(ptr, type, m)
|
||||
|
||||
#define Z7_CONTAINER_FROM_VTBL_CLS(ptr, type, m) Z7_CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m)
|
||||
/*
|
||||
#define Z7_CONTAINER_FROM_VTBL_CLS(ptr, type, m) Z7_CONTAINER_FROM_VTBL(ptr, type, m)
|
||||
*/
|
||||
#if defined (__clang__) || defined(__GNUC__)
|
||||
#define Z7_DIAGNOSCTIC_IGNORE_BEGIN_CAST_QUAL \
|
||||
_Pragma("GCC diagnostic push") \
|
||||
_Pragma("GCC diagnostic ignored \"-Wcast-qual\"")
|
||||
#define Z7_DIAGNOSCTIC_IGNORE_END_CAST_QUAL \
|
||||
_Pragma("GCC diagnostic pop")
|
||||
#else
|
||||
#define Z7_DIAGNOSCTIC_IGNORE_BEGIN_CAST_QUAL
|
||||
#define Z7_DIAGNOSCTIC_IGNORE_END_CAST_QUAL
|
||||
#endif
|
||||
|
||||
#define Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR(ptr, type, m, p) \
|
||||
Z7_DIAGNOSCTIC_IGNORE_BEGIN_CAST_QUAL \
|
||||
type *p = Z7_CONTAINER_FROM_VTBL(ptr, type, m); \
|
||||
Z7_DIAGNOSCTIC_IGNORE_END_CAST_QUAL
|
||||
|
||||
#define Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(type) \
|
||||
Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR(pp, type, vt, p)
|
||||
|
||||
|
||||
// #define ZIP7_DECLARE_HANDLE(name) typedef void *name;
|
||||
#define Z7_DECLARE_HANDLE(name) struct name##_dummy{int unused;}; typedef struct name##_dummy *name;
|
||||
|
||||
|
||||
#define Z7_memset_0_ARRAY(a) memset((a), 0, sizeof(a))
|
||||
|
||||
#ifndef Z7_ARRAY_SIZE
|
||||
#define Z7_ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef _WIN32
|
||||
|
||||
#define CHAR_PATH_SEPARATOR '\\'
|
||||
#define WCHAR_PATH_SEPARATOR L'\\'
|
||||
#define STRING_PATH_SEPARATOR "\\"
|
||||
#define WSTRING_PATH_SEPARATOR L"\\"
|
||||
|
||||
#else
|
||||
|
||||
#define CHAR_PATH_SEPARATOR '/'
|
||||
#define WCHAR_PATH_SEPARATOR L'/'
|
||||
#define STRING_PATH_SEPARATOR "/"
|
||||
#define WSTRING_PATH_SEPARATOR L"/"
|
||||
|
||||
#endif
|
||||
|
||||
#define k_PropVar_TimePrec_0 0
|
||||
#define k_PropVar_TimePrec_Unix 1
|
||||
#define k_PropVar_TimePrec_DOS 2
|
||||
#define k_PropVar_TimePrec_HighPrec 3
|
||||
#define k_PropVar_TimePrec_Base 16
|
||||
#define k_PropVar_TimePrec_100ns (k_PropVar_TimePrec_Base + 7)
|
||||
#define k_PropVar_TimePrec_1ns (k_PropVar_TimePrec_Base + 9)
|
||||
|
||||
EXTERN_C_END
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
#ifndef Z7_ST
|
||||
#ifdef _7ZIP_ST
|
||||
#define Z7_ST
|
||||
#endif
|
||||
#endif
|
||||
*/
|
||||
101
extern/lzma/7zWindows.h
vendored
Normal file
101
extern/lzma/7zWindows.h
vendored
Normal file
@@ -0,0 +1,101 @@
|
||||
/* 7zWindows.h -- StdAfx
|
||||
2023-04-02 : Igor Pavlov : Public domain */
|
||||
|
||||
#ifndef ZIP7_INC_7Z_WINDOWS_H
|
||||
#define ZIP7_INC_7Z_WINDOWS_H
|
||||
|
||||
#ifdef _WIN32
|
||||
|
||||
#if defined(__clang__)
|
||||
# pragma clang diagnostic push
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
|
||||
#pragma warning(push)
|
||||
#pragma warning(disable : 4668) // '_WIN32_WINNT' is not defined as a preprocessor macro, replacing with '0' for '#if/#elif'
|
||||
|
||||
#if _MSC_VER == 1900
|
||||
// for old kit10 versions
|
||||
// #pragma warning(disable : 4255) // winuser.h(13979): warning C4255: 'GetThreadDpiAwarenessContext':
|
||||
#endif
|
||||
// win10 Windows Kit:
|
||||
#endif // _MSC_VER
|
||||
|
||||
#if defined(_MSC_VER) && _MSC_VER <= 1200 && !defined(_WIN64)
|
||||
// for msvc6 without sdk2003
|
||||
#define RPC_NO_WINDOWS_H
|
||||
#endif
|
||||
|
||||
#if defined(__MINGW32__) || defined(__MINGW64__)
|
||||
// #if defined(__GNUC__) && !defined(__clang__)
|
||||
#include <windows.h>
|
||||
#else
|
||||
#include <Windows.h>
|
||||
#endif
|
||||
// #include <basetsd.h>
|
||||
// #include <wtypes.h>
|
||||
|
||||
// but if precompiled with clang-cl then we need
|
||||
// #include <windows.h>
|
||||
#if defined(_MSC_VER)
|
||||
#pragma warning(pop)
|
||||
#endif
|
||||
|
||||
#if defined(__clang__)
|
||||
# pragma clang diagnostic pop
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER) && _MSC_VER <= 1200 && !defined(_WIN64)
|
||||
#ifndef _W64
|
||||
|
||||
typedef long LONG_PTR, *PLONG_PTR;
|
||||
typedef unsigned long ULONG_PTR, *PULONG_PTR;
|
||||
typedef ULONG_PTR DWORD_PTR, *PDWORD_PTR;
|
||||
|
||||
#define Z7_OLD_WIN_SDK
|
||||
#endif // _W64
|
||||
#endif // _MSC_VER == 1200
|
||||
|
||||
#ifdef Z7_OLD_WIN_SDK
|
||||
|
||||
#ifndef INVALID_FILE_ATTRIBUTES
|
||||
#define INVALID_FILE_ATTRIBUTES ((DWORD)-1)
|
||||
#endif
|
||||
#ifndef INVALID_SET_FILE_POINTER
|
||||
#define INVALID_SET_FILE_POINTER ((DWORD)-1)
|
||||
#endif
|
||||
#ifndef FILE_SPECIAL_ACCESS
|
||||
#define FILE_SPECIAL_ACCESS (FILE_ANY_ACCESS)
|
||||
#endif
|
||||
|
||||
// ShlObj.h:
|
||||
// #define BIF_NEWDIALOGSTYLE 0x0040
|
||||
|
||||
#pragma warning(disable : 4201)
|
||||
// #pragma warning(disable : 4115)
|
||||
|
||||
#undef VARIANT_TRUE
|
||||
#define VARIANT_TRUE ((VARIANT_BOOL)-1)
|
||||
#endif
|
||||
|
||||
#endif // Z7_OLD_WIN_SDK
|
||||
|
||||
#ifdef UNDER_CE
|
||||
#undef VARIANT_TRUE
|
||||
#define VARIANT_TRUE ((VARIANT_BOOL)-1)
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#if _MSC_VER >= 1400 && _MSC_VER <= 1600
|
||||
// BaseTsd.h(148) : 'HandleToULong' : unreferenced inline function has been removed
|
||||
// string.h
|
||||
// #pragma warning(disable : 4514)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
/* #include "7zTypes.h" */
|
||||
|
||||
#endif
|
||||
528
extern/lzma/Alloc.c
vendored
528
extern/lzma/Alloc.c
vendored
@@ -1,33 +1,182 @@
|
||||
/* Alloc.c -- Memory allocation functions
|
||||
2008-09-24
|
||||
Igor Pavlov
|
||||
Public domain */
|
||||
2023-04-02 : Igor Pavlov : Public domain */
|
||||
|
||||
#include "Precomp.h"
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <windows.h>
|
||||
#include "7zWindows.h"
|
||||
#endif
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "Alloc.h"
|
||||
|
||||
/* #define _SZ_ALLOC_DEBUG */
|
||||
|
||||
/* use _SZ_ALLOC_DEBUG to debug alloc/free operations */
|
||||
#ifdef _SZ_ALLOC_DEBUG
|
||||
#include <stdio.h>
|
||||
int g_allocCount = 0;
|
||||
int g_allocCountMid = 0;
|
||||
int g_allocCountBig = 0;
|
||||
#ifdef _WIN32
|
||||
#ifdef Z7_LARGE_PAGES
|
||||
#if defined(__clang__) || defined(__GNUC__)
|
||||
typedef void (*Z7_voidFunction)(void);
|
||||
#define MY_CAST_FUNC (Z7_voidFunction)
|
||||
#elif defined(_MSC_VER) && _MSC_VER > 1920
|
||||
#define MY_CAST_FUNC (void *)
|
||||
// #pragma warning(disable : 4191) // 'type cast': unsafe conversion from 'FARPROC' to 'void (__cdecl *)()'
|
||||
#else
|
||||
#define MY_CAST_FUNC
|
||||
#endif
|
||||
#endif // Z7_LARGE_PAGES
|
||||
#endif // _WIN32
|
||||
|
||||
// #define SZ_ALLOC_DEBUG
|
||||
/* #define SZ_ALLOC_DEBUG */
|
||||
|
||||
/* use SZ_ALLOC_DEBUG to debug alloc/free operations */
|
||||
#ifdef SZ_ALLOC_DEBUG
|
||||
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
static int g_allocCount = 0;
|
||||
#ifdef _WIN32
|
||||
static int g_allocCountMid = 0;
|
||||
static int g_allocCountBig = 0;
|
||||
#endif
|
||||
|
||||
|
||||
#define CONVERT_INT_TO_STR(charType, tempSize) \
|
||||
char temp[tempSize]; unsigned i = 0; \
|
||||
while (val >= 10) { temp[i++] = (char)('0' + (unsigned)(val % 10)); val /= 10; } \
|
||||
*s++ = (charType)('0' + (unsigned)val); \
|
||||
while (i != 0) { i--; *s++ = temp[i]; } \
|
||||
*s = 0;
|
||||
|
||||
static void ConvertUInt64ToString(UInt64 val, char *s)
|
||||
{
|
||||
CONVERT_INT_TO_STR(char, 24)
|
||||
}
|
||||
|
||||
#define GET_HEX_CHAR(t) ((char)(((t < 10) ? ('0' + t) : ('A' + (t - 10)))))
|
||||
|
||||
static void ConvertUInt64ToHex(UInt64 val, char *s)
|
||||
{
|
||||
UInt64 v = val;
|
||||
unsigned i;
|
||||
for (i = 1;; i++)
|
||||
{
|
||||
v >>= 4;
|
||||
if (v == 0)
|
||||
break;
|
||||
}
|
||||
s[i] = 0;
|
||||
do
|
||||
{
|
||||
unsigned t = (unsigned)(val & 0xF);
|
||||
val >>= 4;
|
||||
s[--i] = GET_HEX_CHAR(t);
|
||||
}
|
||||
while (i);
|
||||
}
|
||||
|
||||
#define DEBUG_OUT_STREAM stderr
|
||||
|
||||
static void Print(const char *s)
|
||||
{
|
||||
fputs(s, DEBUG_OUT_STREAM);
|
||||
}
|
||||
|
||||
static void PrintAligned(const char *s, size_t align)
|
||||
{
|
||||
size_t len = strlen(s);
|
||||
for(;;)
|
||||
{
|
||||
fputc(' ', DEBUG_OUT_STREAM);
|
||||
if (len >= align)
|
||||
break;
|
||||
++len;
|
||||
}
|
||||
Print(s);
|
||||
}
|
||||
|
||||
static void PrintLn(void)
|
||||
{
|
||||
Print("\n");
|
||||
}
|
||||
|
||||
static void PrintHex(UInt64 v, size_t align)
|
||||
{
|
||||
char s[32];
|
||||
ConvertUInt64ToHex(v, s);
|
||||
PrintAligned(s, align);
|
||||
}
|
||||
|
||||
static void PrintDec(int v, size_t align)
|
||||
{
|
||||
char s[32];
|
||||
ConvertUInt64ToString((unsigned)v, s);
|
||||
PrintAligned(s, align);
|
||||
}
|
||||
|
||||
static void PrintAddr(void *p)
|
||||
{
|
||||
PrintHex((UInt64)(size_t)(ptrdiff_t)p, 12);
|
||||
}
|
||||
|
||||
|
||||
#define PRINT_REALLOC(name, cnt, size, ptr) { \
|
||||
Print(name " "); \
|
||||
if (!ptr) PrintDec(cnt++, 10); \
|
||||
PrintHex(size, 10); \
|
||||
PrintAddr(ptr); \
|
||||
PrintLn(); }
|
||||
|
||||
#define PRINT_ALLOC(name, cnt, size, ptr) { \
|
||||
Print(name " "); \
|
||||
PrintDec(cnt++, 10); \
|
||||
PrintHex(size, 10); \
|
||||
PrintAddr(ptr); \
|
||||
PrintLn(); }
|
||||
|
||||
#define PRINT_FREE(name, cnt, ptr) if (ptr) { \
|
||||
Print(name " "); \
|
||||
PrintDec(--cnt, 10); \
|
||||
PrintAddr(ptr); \
|
||||
PrintLn(); }
|
||||
|
||||
#else
|
||||
|
||||
#ifdef _WIN32
|
||||
#define PRINT_ALLOC(name, cnt, size, ptr)
|
||||
#endif
|
||||
#define PRINT_FREE(name, cnt, ptr)
|
||||
#define Print(s)
|
||||
#define PrintLn()
|
||||
#define PrintHex(v, align)
|
||||
#define PrintAddr(p)
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
by specification:
|
||||
malloc(non_NULL, 0) : returns NULL or a unique pointer value that can later be successfully passed to free()
|
||||
realloc(NULL, size) : the call is equivalent to malloc(size)
|
||||
realloc(non_NULL, 0) : the call is equivalent to free(ptr)
|
||||
|
||||
in main compilers:
|
||||
malloc(0) : returns non_NULL
|
||||
realloc(NULL, 0) : returns non_NULL
|
||||
realloc(non_NULL, 0) : returns NULL
|
||||
*/
|
||||
|
||||
|
||||
void *MyAlloc(size_t size)
|
||||
{
|
||||
if (size == 0)
|
||||
return 0;
|
||||
#ifdef _SZ_ALLOC_DEBUG
|
||||
return NULL;
|
||||
// PRINT_ALLOC("Alloc ", g_allocCount, size, NULL)
|
||||
#ifdef SZ_ALLOC_DEBUG
|
||||
{
|
||||
void *p = malloc(size);
|
||||
fprintf(stderr, "\nAlloc %10d bytes, count = %10d, addr = %8X", size, g_allocCount++, (unsigned)p);
|
||||
if (p)
|
||||
{
|
||||
PRINT_ALLOC("Alloc ", g_allocCount, size, p)
|
||||
}
|
||||
return p;
|
||||
}
|
||||
#else
|
||||
@@ -37,91 +186,350 @@ void *MyAlloc(size_t size)
|
||||
|
||||
void MyFree(void *address)
|
||||
{
|
||||
#ifdef _SZ_ALLOC_DEBUG
|
||||
if (address != 0)
|
||||
fprintf(stderr, "\nFree; count = %10d, addr = %8X", --g_allocCount, (unsigned)address);
|
||||
#endif
|
||||
PRINT_FREE("Free ", g_allocCount, address)
|
||||
|
||||
free(address);
|
||||
}
|
||||
|
||||
void *MyRealloc(void *address, size_t size)
|
||||
{
|
||||
if (size == 0)
|
||||
{
|
||||
MyFree(address);
|
||||
return NULL;
|
||||
}
|
||||
// PRINT_REALLOC("Realloc ", g_allocCount, size, address)
|
||||
#ifdef SZ_ALLOC_DEBUG
|
||||
{
|
||||
void *p = realloc(address, size);
|
||||
if (p)
|
||||
{
|
||||
PRINT_REALLOC("Realloc ", g_allocCount, size, address)
|
||||
}
|
||||
return p;
|
||||
}
|
||||
#else
|
||||
return realloc(address, size);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
#ifdef _WIN32
|
||||
|
||||
void *MidAlloc(size_t size)
|
||||
{
|
||||
if (size == 0)
|
||||
return 0;
|
||||
#ifdef _SZ_ALLOC_DEBUG
|
||||
fprintf(stderr, "\nAlloc_Mid %10d bytes; count = %10d", size, g_allocCountMid++);
|
||||
return NULL;
|
||||
#ifdef SZ_ALLOC_DEBUG
|
||||
{
|
||||
void *p = VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE);
|
||||
if (p)
|
||||
{
|
||||
PRINT_ALLOC("Alloc-Mid", g_allocCountMid, size, p)
|
||||
}
|
||||
return p;
|
||||
}
|
||||
#else
|
||||
return VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE);
|
||||
#endif
|
||||
return VirtualAlloc(0, size, MEM_COMMIT, PAGE_READWRITE);
|
||||
}
|
||||
|
||||
void MidFree(void *address)
|
||||
{
|
||||
#ifdef _SZ_ALLOC_DEBUG
|
||||
if (address != 0)
|
||||
fprintf(stderr, "\nFree_Mid; count = %10d", --g_allocCountMid);
|
||||
#endif
|
||||
if (address == 0)
|
||||
PRINT_FREE("Free-Mid", g_allocCountMid, address)
|
||||
|
||||
if (!address)
|
||||
return;
|
||||
VirtualFree(address, 0, MEM_RELEASE);
|
||||
}
|
||||
|
||||
#ifndef MEM_LARGE_PAGES
|
||||
#undef _7ZIP_LARGE_PAGES
|
||||
#ifdef Z7_LARGE_PAGES
|
||||
|
||||
#ifdef MEM_LARGE_PAGES
|
||||
#define MY__MEM_LARGE_PAGES MEM_LARGE_PAGES
|
||||
#else
|
||||
#define MY__MEM_LARGE_PAGES 0x20000000
|
||||
#endif
|
||||
|
||||
#ifdef _7ZIP_LARGE_PAGES
|
||||
extern
|
||||
SIZE_T g_LargePageSize;
|
||||
SIZE_T g_LargePageSize = 0;
|
||||
typedef SIZE_T (WINAPI *GetLargePageMinimumP)();
|
||||
#endif
|
||||
typedef SIZE_T (WINAPI *Func_GetLargePageMinimum)(VOID);
|
||||
|
||||
void SetLargePageSize()
|
||||
void SetLargePageSize(void)
|
||||
{
|
||||
#ifdef _7ZIP_LARGE_PAGES
|
||||
SIZE_T size = 0;
|
||||
GetLargePageMinimumP largePageMinimum = (GetLargePageMinimumP)
|
||||
GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), "GetLargePageMinimum");
|
||||
if (largePageMinimum == 0)
|
||||
#ifdef Z7_LARGE_PAGES
|
||||
SIZE_T size;
|
||||
const
|
||||
Func_GetLargePageMinimum fn =
|
||||
(Func_GetLargePageMinimum) MY_CAST_FUNC GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")),
|
||||
"GetLargePageMinimum");
|
||||
if (!fn)
|
||||
return;
|
||||
size = largePageMinimum();
|
||||
size = fn();
|
||||
if (size == 0 || (size & (size - 1)) != 0)
|
||||
return;
|
||||
g_LargePageSize = size;
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif // Z7_LARGE_PAGES
|
||||
|
||||
void *BigAlloc(size_t size)
|
||||
{
|
||||
if (size == 0)
|
||||
return 0;
|
||||
#ifdef _SZ_ALLOC_DEBUG
|
||||
fprintf(stderr, "\nAlloc_Big %10d bytes; count = %10d", size, g_allocCountBig++);
|
||||
#endif
|
||||
|
||||
#ifdef _7ZIP_LARGE_PAGES
|
||||
if (g_LargePageSize != 0 && g_LargePageSize <= (1 << 30) && size >= (1 << 18))
|
||||
return NULL;
|
||||
|
||||
PRINT_ALLOC("Alloc-Big", g_allocCountBig, size, NULL)
|
||||
|
||||
#ifdef Z7_LARGE_PAGES
|
||||
{
|
||||
void *res = VirtualAlloc(0, (size + g_LargePageSize - 1) & (~(g_LargePageSize - 1)),
|
||||
MEM_COMMIT | MEM_LARGE_PAGES, PAGE_READWRITE);
|
||||
if (res != 0)
|
||||
return res;
|
||||
SIZE_T ps = g_LargePageSize;
|
||||
if (ps != 0 && ps <= (1 << 30) && size > (ps / 2))
|
||||
{
|
||||
size_t size2;
|
||||
ps--;
|
||||
size2 = (size + ps) & ~ps;
|
||||
if (size2 >= size)
|
||||
{
|
||||
void *p = VirtualAlloc(NULL, size2, MEM_COMMIT | MY__MEM_LARGE_PAGES, PAGE_READWRITE);
|
||||
if (p)
|
||||
{
|
||||
PRINT_ALLOC("Alloc-BM ", g_allocCountMid, size2, p)
|
||||
return p;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return VirtualAlloc(0, size, MEM_COMMIT, PAGE_READWRITE);
|
||||
|
||||
return MidAlloc(size);
|
||||
}
|
||||
|
||||
void BigFree(void *address)
|
||||
{
|
||||
#ifdef _SZ_ALLOC_DEBUG
|
||||
if (address != 0)
|
||||
fprintf(stderr, "\nFree_Big; count = %10d", --g_allocCountBig);
|
||||
#endif
|
||||
|
||||
if (address == 0)
|
||||
return;
|
||||
VirtualFree(address, 0, MEM_RELEASE);
|
||||
PRINT_FREE("Free-Big", g_allocCountBig, address)
|
||||
MidFree(address);
|
||||
}
|
||||
|
||||
#endif // _WIN32
|
||||
|
||||
|
||||
static void *SzAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p) return MyAlloc(size); }
|
||||
static void SzFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p) MyFree(address); }
|
||||
const ISzAlloc g_Alloc = { SzAlloc, SzFree };
|
||||
|
||||
#ifdef _WIN32
|
||||
static void *SzMidAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p) return MidAlloc(size); }
|
||||
static void SzMidFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p) MidFree(address); }
|
||||
static void *SzBigAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p) return BigAlloc(size); }
|
||||
static void SzBigFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p) BigFree(address); }
|
||||
const ISzAlloc g_MidAlloc = { SzMidAlloc, SzMidFree };
|
||||
const ISzAlloc g_BigAlloc = { SzBigAlloc, SzBigFree };
|
||||
#endif
|
||||
|
||||
/*
|
||||
uintptr_t : <stdint.h> C99 (optional)
|
||||
: unsupported in VS6
|
||||
*/
|
||||
|
||||
#ifdef _WIN32
|
||||
typedef UINT_PTR UIntPtr;
|
||||
#else
|
||||
/*
|
||||
typedef uintptr_t UIntPtr;
|
||||
*/
|
||||
typedef ptrdiff_t UIntPtr;
|
||||
#endif
|
||||
|
||||
|
||||
#define ADJUST_ALLOC_SIZE 0
|
||||
/*
|
||||
#define ADJUST_ALLOC_SIZE (sizeof(void *) - 1)
|
||||
*/
|
||||
/*
|
||||
Use (ADJUST_ALLOC_SIZE = (sizeof(void *) - 1)), if
|
||||
MyAlloc() can return address that is NOT multiple of sizeof(void *).
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
#define MY_ALIGN_PTR_DOWN(p, align) ((void *)((char *)(p) - ((size_t)(UIntPtr)(p) & ((align) - 1))))
|
||||
*/
|
||||
#define MY_ALIGN_PTR_DOWN(p, align) ((void *)((((UIntPtr)(p)) & ~((UIntPtr)(align) - 1))))
|
||||
|
||||
|
||||
#if !defined(_WIN32) && defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE >= 200112L)
|
||||
#define USE_posix_memalign
|
||||
#endif
|
||||
|
||||
#ifndef USE_posix_memalign
|
||||
#define MY_ALIGN_PTR_UP_PLUS(p, align) MY_ALIGN_PTR_DOWN(((char *)(p) + (align) + ADJUST_ALLOC_SIZE), align)
|
||||
#endif
|
||||
|
||||
/*
|
||||
This posix_memalign() is for test purposes only.
|
||||
We also need special Free() function instead of free(),
|
||||
if this posix_memalign() is used.
|
||||
*/
|
||||
|
||||
/*
|
||||
static int posix_memalign(void **ptr, size_t align, size_t size)
|
||||
{
|
||||
size_t newSize = size + align;
|
||||
void *p;
|
||||
void *pAligned;
|
||||
*ptr = NULL;
|
||||
if (newSize < size)
|
||||
return 12; // ENOMEM
|
||||
p = MyAlloc(newSize);
|
||||
if (!p)
|
||||
return 12; // ENOMEM
|
||||
pAligned = MY_ALIGN_PTR_UP_PLUS(p, align);
|
||||
((void **)pAligned)[-1] = p;
|
||||
*ptr = pAligned;
|
||||
return 0;
|
||||
}
|
||||
*/
|
||||
|
||||
/*
|
||||
ALLOC_ALIGN_SIZE >= sizeof(void *)
|
||||
ALLOC_ALIGN_SIZE >= cache_line_size
|
||||
*/
|
||||
|
||||
#define ALLOC_ALIGN_SIZE ((size_t)1 << 7)
|
||||
|
||||
static void *SzAlignedAlloc(ISzAllocPtr pp, size_t size)
|
||||
{
|
||||
#ifndef USE_posix_memalign
|
||||
|
||||
void *p;
|
||||
void *pAligned;
|
||||
size_t newSize;
|
||||
UNUSED_VAR(pp)
|
||||
|
||||
/* also we can allocate additional dummy ALLOC_ALIGN_SIZE bytes after aligned
|
||||
block to prevent cache line sharing with another allocated blocks */
|
||||
|
||||
newSize = size + ALLOC_ALIGN_SIZE * 1 + ADJUST_ALLOC_SIZE;
|
||||
if (newSize < size)
|
||||
return NULL;
|
||||
|
||||
p = MyAlloc(newSize);
|
||||
|
||||
if (!p)
|
||||
return NULL;
|
||||
pAligned = MY_ALIGN_PTR_UP_PLUS(p, ALLOC_ALIGN_SIZE);
|
||||
|
||||
Print(" size="); PrintHex(size, 8);
|
||||
Print(" a_size="); PrintHex(newSize, 8);
|
||||
Print(" ptr="); PrintAddr(p);
|
||||
Print(" a_ptr="); PrintAddr(pAligned);
|
||||
PrintLn();
|
||||
|
||||
((void **)pAligned)[-1] = p;
|
||||
|
||||
return pAligned;
|
||||
|
||||
#else
|
||||
|
||||
void *p;
|
||||
UNUSED_VAR(pp)
|
||||
if (posix_memalign(&p, ALLOC_ALIGN_SIZE, size))
|
||||
return NULL;
|
||||
|
||||
Print(" posix_memalign="); PrintAddr(p);
|
||||
PrintLn();
|
||||
|
||||
return p;
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
static void SzAlignedFree(ISzAllocPtr pp, void *address)
|
||||
{
|
||||
UNUSED_VAR(pp)
|
||||
#ifndef USE_posix_memalign
|
||||
if (address)
|
||||
MyFree(((void **)address)[-1]);
|
||||
#else
|
||||
free(address);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
const ISzAlloc g_AlignedAlloc = { SzAlignedAlloc, SzAlignedFree };
|
||||
|
||||
|
||||
|
||||
#define MY_ALIGN_PTR_DOWN_1(p) MY_ALIGN_PTR_DOWN(p, sizeof(void *))
|
||||
|
||||
/* we align ptr to support cases where CAlignOffsetAlloc::offset is not multiply of sizeof(void *) */
|
||||
#define REAL_BLOCK_PTR_VAR(p) ((void **)MY_ALIGN_PTR_DOWN_1(p))[-1]
|
||||
/*
|
||||
#define REAL_BLOCK_PTR_VAR(p) ((void **)(p))[-1]
|
||||
*/
|
||||
|
||||
static void *AlignOffsetAlloc_Alloc(ISzAllocPtr pp, size_t size)
|
||||
{
|
||||
const CAlignOffsetAlloc *p = Z7_CONTAINER_FROM_VTBL_CONST(pp, CAlignOffsetAlloc, vt);
|
||||
void *adr;
|
||||
void *pAligned;
|
||||
size_t newSize;
|
||||
size_t extra;
|
||||
size_t alignSize = (size_t)1 << p->numAlignBits;
|
||||
|
||||
if (alignSize < sizeof(void *))
|
||||
alignSize = sizeof(void *);
|
||||
|
||||
if (p->offset >= alignSize)
|
||||
return NULL;
|
||||
|
||||
/* also we can allocate additional dummy ALLOC_ALIGN_SIZE bytes after aligned
|
||||
block to prevent cache line sharing with another allocated blocks */
|
||||
extra = p->offset & (sizeof(void *) - 1);
|
||||
newSize = size + alignSize + extra + ADJUST_ALLOC_SIZE;
|
||||
if (newSize < size)
|
||||
return NULL;
|
||||
|
||||
adr = ISzAlloc_Alloc(p->baseAlloc, newSize);
|
||||
|
||||
if (!adr)
|
||||
return NULL;
|
||||
|
||||
pAligned = (char *)MY_ALIGN_PTR_DOWN((char *)adr +
|
||||
alignSize - p->offset + extra + ADJUST_ALLOC_SIZE, alignSize) + p->offset;
|
||||
|
||||
PrintLn();
|
||||
Print("- Aligned: ");
|
||||
Print(" size="); PrintHex(size, 8);
|
||||
Print(" a_size="); PrintHex(newSize, 8);
|
||||
Print(" ptr="); PrintAddr(adr);
|
||||
Print(" a_ptr="); PrintAddr(pAligned);
|
||||
PrintLn();
|
||||
|
||||
REAL_BLOCK_PTR_VAR(pAligned) = adr;
|
||||
|
||||
return pAligned;
|
||||
}
|
||||
|
||||
|
||||
static void AlignOffsetAlloc_Free(ISzAllocPtr pp, void *address)
|
||||
{
|
||||
if (address)
|
||||
{
|
||||
const CAlignOffsetAlloc *p = Z7_CONTAINER_FROM_VTBL_CONST(pp, CAlignOffsetAlloc, vt);
|
||||
PrintLn();
|
||||
Print("- Aligned Free: ");
|
||||
PrintLn();
|
||||
ISzAlloc_Free(p->baseAlloc, REAL_BLOCK_PTR_VAR(address));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void AlignOffsetAlloc_CreateVTable(CAlignOffsetAlloc *p)
|
||||
{
|
||||
p->vt.Alloc = AlignOffsetAlloc_Alloc;
|
||||
p->vt.Free = AlignOffsetAlloc_Free;
|
||||
}
|
||||
|
||||
53
extern/lzma/Alloc.h
vendored
53
extern/lzma/Alloc.h
vendored
@@ -1,19 +1,32 @@
|
||||
/* Alloc.h -- Memory allocation functions
|
||||
2008-03-13
|
||||
Igor Pavlov
|
||||
Public domain */
|
||||
2023-03-04 : Igor Pavlov : Public domain */
|
||||
|
||||
#ifndef __COMMON_ALLOC_H
|
||||
#define __COMMON_ALLOC_H
|
||||
#ifndef ZIP7_INC_ALLOC_H
|
||||
#define ZIP7_INC_ALLOC_H
|
||||
|
||||
#include <stddef.h>
|
||||
#include "7zTypes.h"
|
||||
|
||||
EXTERN_C_BEGIN
|
||||
|
||||
/*
|
||||
MyFree(NULL) : is allowed, as free(NULL)
|
||||
MyAlloc(0) : returns NULL : but malloc(0) is allowed to return NULL or non_NULL
|
||||
MyRealloc(NULL, 0) : returns NULL : but realloc(NULL, 0) is allowed to return NULL or non_NULL
|
||||
MyRealloc() is similar to realloc() for the following cases:
|
||||
MyRealloc(non_NULL, 0) : returns NULL and always calls MyFree(ptr)
|
||||
MyRealloc(NULL, non_ZERO) : returns NULL, if allocation failed
|
||||
MyRealloc(non_NULL, non_ZERO) : returns NULL, if reallocation failed
|
||||
*/
|
||||
|
||||
void *MyAlloc(size_t size);
|
||||
void MyFree(void *address);
|
||||
void *MyRealloc(void *address, size_t size);
|
||||
|
||||
#ifdef _WIN32
|
||||
|
||||
void SetLargePageSize();
|
||||
#ifdef Z7_LARGE_PAGES
|
||||
void SetLargePageSize(void);
|
||||
#endif
|
||||
|
||||
void *MidAlloc(size_t size);
|
||||
void MidFree(void *address);
|
||||
@@ -29,4 +42,30 @@ void BigFree(void *address);
|
||||
|
||||
#endif
|
||||
|
||||
extern const ISzAlloc g_Alloc;
|
||||
|
||||
#ifdef _WIN32
|
||||
extern const ISzAlloc g_BigAlloc;
|
||||
extern const ISzAlloc g_MidAlloc;
|
||||
#else
|
||||
#define g_BigAlloc g_AlignedAlloc
|
||||
#define g_MidAlloc g_AlignedAlloc
|
||||
#endif
|
||||
|
||||
extern const ISzAlloc g_AlignedAlloc;
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
ISzAlloc vt;
|
||||
ISzAllocPtr baseAlloc;
|
||||
unsigned numAlignBits; /* ((1 << numAlignBits) >= sizeof(void *)) */
|
||||
size_t offset; /* (offset == (k * sizeof(void *)) && offset < (1 << numAlignBits) */
|
||||
} CAlignOffsetAlloc;
|
||||
|
||||
void AlignOffsetAlloc_CreateVTable(CAlignOffsetAlloc *p);
|
||||
|
||||
|
||||
EXTERN_C_END
|
||||
|
||||
#endif
|
||||
|
||||
14
extern/lzma/CMakeLists.txt
vendored
14
extern/lzma/CMakeLists.txt
vendored
@@ -1,4 +1,4 @@
|
||||
# SPDX-FileCopyrightText: 2006 Blender Foundation
|
||||
# SPDX-FileCopyrightText: 2024 Blender Foundation
|
||||
#
|
||||
# SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
@@ -19,18 +19,28 @@ set(INC_SYS
|
||||
|
||||
set(SRC
|
||||
Alloc.c
|
||||
CpuArch.c
|
||||
LzFind.c
|
||||
LzFindMt.c
|
||||
LzFindOpt.c
|
||||
LzmaDec.c
|
||||
LzmaEnc.c
|
||||
LzmaLib.c
|
||||
Threads.c
|
||||
|
||||
7zTypes.h
|
||||
7zWindows.h
|
||||
Alloc.h
|
||||
Compiler.h
|
||||
CpuArch.h
|
||||
LzFind.h
|
||||
LzFindMt.h
|
||||
LzHash.h
|
||||
LzmaDec.h
|
||||
LzmaEnc.h
|
||||
LzmaLib.h
|
||||
Types.h
|
||||
Precomp.h
|
||||
Threads.h
|
||||
)
|
||||
|
||||
set(LIB
|
||||
|
||||
159
extern/lzma/Compiler.h
vendored
Normal file
159
extern/lzma/Compiler.h
vendored
Normal file
@@ -0,0 +1,159 @@
|
||||
/* Compiler.h : Compiler specific defines and pragmas
|
||||
2023-04-02 : Igor Pavlov : Public domain */
|
||||
|
||||
#ifndef ZIP7_INC_COMPILER_H
|
||||
#define ZIP7_INC_COMPILER_H
|
||||
|
||||
#if defined(__clang__)
|
||||
# define Z7_CLANG_VERSION (__clang_major__ * 10000 + __clang_minor__ * 100 + __clang_patchlevel__)
|
||||
#endif
|
||||
#if defined(__clang__) && defined(__apple_build_version__)
|
||||
# define Z7_APPLE_CLANG_VERSION Z7_CLANG_VERSION
|
||||
#elif defined(__clang__)
|
||||
# define Z7_LLVM_CLANG_VERSION Z7_CLANG_VERSION
|
||||
#elif defined(__GNUC__)
|
||||
# define Z7_GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#if !defined(__clang__) && !defined(__GNUC__)
|
||||
#define Z7_MSC_VER_ORIGINAL _MSC_VER
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__MINGW32__) || defined(__MINGW64__)
|
||||
#define Z7_MINGW
|
||||
#endif
|
||||
|
||||
// #pragma GCC diagnostic ignored "-Wunknown-pragmas"
|
||||
|
||||
#ifdef __clang__
|
||||
// padding size of '' with 4 bytes to alignment boundary
|
||||
#pragma GCC diagnostic ignored "-Wpadded"
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef _MSC_VER
|
||||
|
||||
#ifdef UNDER_CE
|
||||
#define RPC_NO_WINDOWS_H
|
||||
/* #pragma warning(disable : 4115) // '_RPC_ASYNC_STATE' : named type definition in parentheses */
|
||||
#pragma warning(disable : 4201) // nonstandard extension used : nameless struct/union
|
||||
#pragma warning(disable : 4214) // nonstandard extension used : bit field types other than int
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER) && _MSC_VER >= 1800
|
||||
#pragma warning(disable : 4464) // relative include path contains '..'
|
||||
#endif
|
||||
|
||||
// == 1200 : -O1 : for __forceinline
|
||||
// >= 1900 : -O1 : for printf
|
||||
#pragma warning(disable : 4710) // function not inlined
|
||||
|
||||
#if _MSC_VER < 1900
|
||||
// winnt.h: 'Int64ShllMod32'
|
||||
#pragma warning(disable : 4514) // unreferenced inline function has been removed
|
||||
#endif
|
||||
|
||||
#if _MSC_VER < 1300
|
||||
// #pragma warning(disable : 4702) // unreachable code
|
||||
// Bra.c : -O1:
|
||||
#pragma warning(disable : 4714) // function marked as __forceinline not inlined
|
||||
#endif
|
||||
|
||||
/*
|
||||
#if _MSC_VER > 1400 && _MSC_VER <= 1900
|
||||
// strcat: This function or variable may be unsafe
|
||||
// sysinfoapi.h: kit10: GetVersion was declared deprecated
|
||||
#pragma warning(disable : 4996)
|
||||
#endif
|
||||
*/
|
||||
|
||||
#if _MSC_VER > 1200
|
||||
// -Wall warnings
|
||||
|
||||
#pragma warning(disable : 4711) // function selected for automatic inline expansion
|
||||
#pragma warning(disable : 4820) // '2' bytes padding added after data member
|
||||
|
||||
#if _MSC_VER >= 1400 && _MSC_VER < 1920
|
||||
// 1400: string.h: _DBG_MEMCPY_INLINE_
|
||||
// 1600 - 191x : smmintrin.h __cplusplus'
|
||||
// is not defined as a preprocessor macro, replacing with '0' for '#if/#elif'
|
||||
#pragma warning(disable : 4668)
|
||||
|
||||
// 1400 - 1600 : WinDef.h : 'FARPROC' :
|
||||
// 1900 - 191x : immintrin.h: _readfsbase_u32
|
||||
// no function prototype given : converting '()' to '(void)'
|
||||
#pragma warning(disable : 4255)
|
||||
#endif
|
||||
|
||||
#if _MSC_VER >= 1914
|
||||
// Compiler will insert Spectre mitigation for memory load if /Qspectre switch specified
|
||||
#pragma warning(disable : 5045)
|
||||
#endif
|
||||
|
||||
#endif // _MSC_VER > 1200
|
||||
#endif // _MSC_VER
|
||||
|
||||
|
||||
#if defined(__clang__) && (__clang_major__ >= 4)
|
||||
#define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE \
|
||||
_Pragma("clang loop unroll(disable)") \
|
||||
_Pragma("clang loop vectorize(disable)")
|
||||
#define Z7_ATTRIB_NO_VECTORIZE
|
||||
#elif defined(__GNUC__) && (__GNUC__ >= 5)
|
||||
#define Z7_ATTRIB_NO_VECTORIZE __attribute__((optimize("no-tree-vectorize")))
|
||||
// __attribute__((optimize("no-unroll-loops")));
|
||||
#define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
|
||||
#elif defined(_MSC_VER) && (_MSC_VER >= 1920)
|
||||
#define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE \
|
||||
_Pragma("loop( no_vector )")
|
||||
#define Z7_ATTRIB_NO_VECTORIZE
|
||||
#else
|
||||
#define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
|
||||
#define Z7_ATTRIB_NO_VECTORIZE
|
||||
#endif
|
||||
|
||||
#if defined(MY_CPU_X86_OR_AMD64) && ( \
|
||||
defined(__clang__) && (__clang_major__ >= 4) \
|
||||
|| defined(__GNUC__) && (__GNUC__ >= 5))
|
||||
#define Z7_ATTRIB_NO_SSE __attribute__((__target__("no-sse")))
|
||||
#else
|
||||
#define Z7_ATTRIB_NO_SSE
|
||||
#endif
|
||||
|
||||
#define Z7_ATTRIB_NO_VECTOR \
|
||||
Z7_ATTRIB_NO_VECTORIZE \
|
||||
Z7_ATTRIB_NO_SSE
|
||||
|
||||
|
||||
#if defined(__clang__) && (__clang_major__ >= 8) \
|
||||
|| defined(__GNUC__) && (__GNUC__ >= 1000) \
|
||||
/* || defined(_MSC_VER) && (_MSC_VER >= 1920) */
|
||||
// GCC is not good for __builtin_expect()
|
||||
#define Z7_LIKELY(x) (__builtin_expect((x), 1))
|
||||
#define Z7_UNLIKELY(x) (__builtin_expect((x), 0))
|
||||
// #define Z7_unlikely [[unlikely]]
|
||||
// #define Z7_likely [[likely]]
|
||||
#else
|
||||
#define Z7_LIKELY(x) (x)
|
||||
#define Z7_UNLIKELY(x) (x)
|
||||
// #define Z7_likely
|
||||
#endif
|
||||
|
||||
|
||||
#if (defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 36000))
|
||||
#define Z7_DIAGNOSCTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER \
|
||||
_Pragma("GCC diagnostic push") \
|
||||
_Pragma("GCC diagnostic ignored \"-Wreserved-macro-identifier\"")
|
||||
#define Z7_DIAGNOSCTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER \
|
||||
_Pragma("GCC diagnostic pop")
|
||||
#else
|
||||
#define Z7_DIAGNOSCTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
|
||||
#define Z7_DIAGNOSCTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
|
||||
#endif
|
||||
|
||||
#define UNUSED_VAR(x) (void)x;
|
||||
/* #define UNUSED_VAR(x) x=x; */
|
||||
|
||||
#endif
|
||||
823
extern/lzma/CpuArch.c
vendored
Normal file
823
extern/lzma/CpuArch.c
vendored
Normal file
@@ -0,0 +1,823 @@
|
||||
/* CpuArch.c -- CPU specific code
|
||||
2023-05-18 : Igor Pavlov : Public domain */
|
||||
|
||||
#include "Precomp.h"
|
||||
|
||||
// #include <stdio.h>
|
||||
|
||||
#include "CpuArch.h"
|
||||
|
||||
#ifdef MY_CPU_X86_OR_AMD64
|
||||
|
||||
#undef NEED_CHECK_FOR_CPUID
|
||||
#if !defined(MY_CPU_AMD64)
|
||||
#define NEED_CHECK_FOR_CPUID
|
||||
#endif
|
||||
|
||||
/*
|
||||
cpuid instruction supports (subFunction) parameter in ECX,
|
||||
that is used only with some specific (function) parameter values.
|
||||
But we always use only (subFunction==0).
|
||||
*/
|
||||
/*
|
||||
__cpuid(): MSVC and GCC/CLANG use same function/macro name
|
||||
but parameters are different.
|
||||
We use MSVC __cpuid() parameters style for our z7_x86_cpuid() function.
|
||||
*/
|
||||
|
||||
#if defined(__GNUC__) /* && (__GNUC__ >= 10) */ \
|
||||
|| defined(__clang__) /* && (__clang_major__ >= 10) */
|
||||
|
||||
/* there was some CLANG/GCC compilers that have issues with
|
||||
rbx(ebx) handling in asm blocks in -fPIC mode (__PIC__ is defined).
|
||||
compiler's <cpuid.h> contains the macro __cpuid() that is similar to our code.
|
||||
The history of __cpuid() changes in CLANG/GCC:
|
||||
GCC:
|
||||
2007: it preserved ebx for (__PIC__ && __i386__)
|
||||
2013: it preserved rbx and ebx for __PIC__
|
||||
2014: it doesn't preserves rbx and ebx anymore
|
||||
we suppose that (__GNUC__ >= 5) fixed that __PIC__ ebx/rbx problem.
|
||||
CLANG:
|
||||
2014+: it preserves rbx, but only for 64-bit code. No __PIC__ check.
|
||||
Why CLANG cares about 64-bit mode only, and doesn't care about ebx (in 32-bit)?
|
||||
Do we need __PIC__ test for CLANG or we must care about rbx even if
|
||||
__PIC__ is not defined?
|
||||
*/
|
||||
|
||||
#define ASM_LN "\n"
|
||||
|
||||
#if defined(MY_CPU_AMD64) && defined(__PIC__) \
|
||||
&& ((defined (__GNUC__) && (__GNUC__ < 5)) || defined(__clang__))
|
||||
|
||||
#define x86_cpuid_MACRO(p, func) { \
|
||||
__asm__ __volatile__ ( \
|
||||
ASM_LN "mov %%rbx, %q1" \
|
||||
ASM_LN "cpuid" \
|
||||
ASM_LN "xchg %%rbx, %q1" \
|
||||
: "=a" ((p)[0]), "=&r" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(0)); }
|
||||
|
||||
/* "=&r" selects free register. It can select even rbx, if that register is free.
|
||||
"=&D" for (RDI) also works, but the code can be larger with "=&D"
|
||||
"2"(0) means (subFunction = 0),
|
||||
2 is (zero-based) index in the output constraint list "=c" (ECX). */
|
||||
|
||||
#elif defined(MY_CPU_X86) && defined(__PIC__) \
|
||||
&& ((defined (__GNUC__) && (__GNUC__ < 5)) || defined(__clang__))
|
||||
|
||||
#define x86_cpuid_MACRO(p, func) { \
|
||||
__asm__ __volatile__ ( \
|
||||
ASM_LN "mov %%ebx, %k1" \
|
||||
ASM_LN "cpuid" \
|
||||
ASM_LN "xchg %%ebx, %k1" \
|
||||
: "=a" ((p)[0]), "=&r" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(0)); }
|
||||
|
||||
#else
|
||||
|
||||
#define x86_cpuid_MACRO(p, func) { \
|
||||
__asm__ __volatile__ ( \
|
||||
ASM_LN "cpuid" \
|
||||
: "=a" ((p)[0]), "=b" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(0)); }
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
|
||||
{
|
||||
x86_cpuid_MACRO(p, func)
|
||||
}
|
||||
|
||||
|
||||
Z7_NO_INLINE
|
||||
UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
|
||||
{
|
||||
#if defined(NEED_CHECK_FOR_CPUID)
|
||||
#define EFALGS_CPUID_BIT 21
|
||||
UInt32 a;
|
||||
__asm__ __volatile__ (
|
||||
ASM_LN "pushf"
|
||||
ASM_LN "pushf"
|
||||
ASM_LN "pop %0"
|
||||
// ASM_LN "movl %0, %1"
|
||||
// ASM_LN "xorl $0x200000, %0"
|
||||
ASM_LN "btc %1, %0"
|
||||
ASM_LN "push %0"
|
||||
ASM_LN "popf"
|
||||
ASM_LN "pushf"
|
||||
ASM_LN "pop %0"
|
||||
ASM_LN "xorl (%%esp), %0"
|
||||
|
||||
ASM_LN "popf"
|
||||
ASM_LN
|
||||
: "=&r" (a) // "=a"
|
||||
: "i" (EFALGS_CPUID_BIT)
|
||||
);
|
||||
if ((a & (1 << EFALGS_CPUID_BIT)) == 0)
|
||||
return 0;
|
||||
#endif
|
||||
{
|
||||
UInt32 p[4];
|
||||
x86_cpuid_MACRO(p, 0)
|
||||
return p[0];
|
||||
}
|
||||
}
|
||||
|
||||
#undef ASM_LN
|
||||
|
||||
#elif !defined(_MSC_VER)
|
||||
|
||||
/*
|
||||
// for gcc/clang and other: we can try to use __cpuid macro:
|
||||
#include <cpuid.h>
|
||||
void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
|
||||
{
|
||||
__cpuid(func, p[0], p[1], p[2], p[3]);
|
||||
}
|
||||
UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
|
||||
{
|
||||
return (UInt32)__get_cpuid_max(0, NULL);
|
||||
}
|
||||
*/
|
||||
// for unsupported cpuid:
|
||||
void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
|
||||
{
|
||||
UNUSED_VAR(func)
|
||||
p[0] = p[1] = p[2] = p[3] = 0;
|
||||
}
|
||||
UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
#else // _MSC_VER
|
||||
|
||||
#if !defined(MY_CPU_AMD64)
|
||||
|
||||
UInt32 __declspec(naked) Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
|
||||
{
|
||||
#if defined(NEED_CHECK_FOR_CPUID)
|
||||
#define EFALGS_CPUID_BIT 21
|
||||
__asm pushfd
|
||||
__asm pushfd
|
||||
/*
|
||||
__asm pop eax
|
||||
// __asm mov edx, eax
|
||||
__asm btc eax, EFALGS_CPUID_BIT
|
||||
__asm push eax
|
||||
*/
|
||||
__asm btc dword ptr [esp], EFALGS_CPUID_BIT
|
||||
__asm popfd
|
||||
__asm pushfd
|
||||
__asm pop eax
|
||||
// __asm xor eax, edx
|
||||
__asm xor eax, [esp]
|
||||
// __asm push edx
|
||||
__asm popfd
|
||||
__asm and eax, (1 shl EFALGS_CPUID_BIT)
|
||||
__asm jz end_func
|
||||
#endif
|
||||
__asm push ebx
|
||||
__asm xor eax, eax // func
|
||||
__asm xor ecx, ecx // subFunction (optional) for (func == 0)
|
||||
__asm cpuid
|
||||
__asm pop ebx
|
||||
#if defined(NEED_CHECK_FOR_CPUID)
|
||||
end_func:
|
||||
#endif
|
||||
__asm ret 0
|
||||
}
|
||||
|
||||
void __declspec(naked) Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
|
||||
{
|
||||
UNUSED_VAR(p)
|
||||
UNUSED_VAR(func)
|
||||
__asm push ebx
|
||||
__asm push edi
|
||||
__asm mov edi, ecx // p
|
||||
__asm mov eax, edx // func
|
||||
__asm xor ecx, ecx // subfunction (optional) for (func == 0)
|
||||
__asm cpuid
|
||||
__asm mov [edi ], eax
|
||||
__asm mov [edi + 4], ebx
|
||||
__asm mov [edi + 8], ecx
|
||||
__asm mov [edi + 12], edx
|
||||
__asm pop edi
|
||||
__asm pop ebx
|
||||
__asm ret 0
|
||||
}
|
||||
|
||||
#else // MY_CPU_AMD64
|
||||
|
||||
#if _MSC_VER >= 1600
|
||||
#include <intrin.h>
|
||||
#define MY_cpuidex __cpuidex
|
||||
#else
|
||||
/*
|
||||
__cpuid (func == (0 or 7)) requires subfunction number in ECX.
|
||||
MSDN: The __cpuid intrinsic clears the ECX register before calling the cpuid instruction.
|
||||
__cpuid() in new MSVC clears ECX.
|
||||
__cpuid() in old MSVC (14.00) x64 doesn't clear ECX
|
||||
We still can use __cpuid for low (func) values that don't require ECX,
|
||||
but __cpuid() in old MSVC will be incorrect for some func values: (func == 7).
|
||||
So here we use the hack for old MSVC to send (subFunction) in ECX register to cpuid instruction,
|
||||
where ECX value is first parameter for FASTCALL / NO_INLINE func,
|
||||
So the caller of MY_cpuidex_HACK() sets ECX as subFunction, and
|
||||
old MSVC for __cpuid() doesn't change ECX and cpuid instruction gets (subFunction) value.
|
||||
|
||||
DON'T remove Z7_NO_INLINE and Z7_FASTCALL for MY_cpuidex_HACK(): !!!
|
||||
*/
|
||||
static
|
||||
Z7_NO_INLINE void Z7_FASTCALL MY_cpuidex_HACK(UInt32 subFunction, UInt32 func, int *CPUInfo)
|
||||
{
|
||||
UNUSED_VAR(subFunction)
|
||||
__cpuid(CPUInfo, func);
|
||||
}
|
||||
#define MY_cpuidex(info, func, func2) MY_cpuidex_HACK(func2, func, info)
|
||||
#pragma message("======== MY_cpuidex_HACK WAS USED ========")
|
||||
#endif // _MSC_VER >= 1600
|
||||
|
||||
#if !defined(MY_CPU_AMD64)
|
||||
/* inlining for __cpuid() in MSVC x86 (32-bit) produces big ineffective code,
|
||||
so we disable inlining here */
|
||||
Z7_NO_INLINE
|
||||
#endif
|
||||
void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
|
||||
{
|
||||
MY_cpuidex((int *)p, (int)func, 0);
|
||||
}
|
||||
|
||||
Z7_NO_INLINE
|
||||
UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
|
||||
{
|
||||
int a[4];
|
||||
MY_cpuidex(a, 0, 0);
|
||||
return a[0];
|
||||
}
|
||||
|
||||
#endif // MY_CPU_AMD64
|
||||
#endif // _MSC_VER
|
||||
|
||||
#if defined(NEED_CHECK_FOR_CPUID)
|
||||
#define CHECK_CPUID_IS_SUPPORTED { if (z7_x86_cpuid_GetMaxFunc() == 0) return 0; }
|
||||
#else
|
||||
#define CHECK_CPUID_IS_SUPPORTED
|
||||
#endif
|
||||
#undef NEED_CHECK_FOR_CPUID
|
||||
|
||||
|
||||
static
|
||||
BoolInt x86cpuid_Func_1(UInt32 *p)
|
||||
{
|
||||
CHECK_CPUID_IS_SUPPORTED
|
||||
z7_x86_cpuid(p, 1);
|
||||
return True;
|
||||
}
|
||||
|
||||
/*
|
||||
static const UInt32 kVendors[][1] =
|
||||
{
|
||||
{ 0x756E6547 }, // , 0x49656E69, 0x6C65746E },
|
||||
{ 0x68747541 }, // , 0x69746E65, 0x444D4163 },
|
||||
{ 0x746E6543 } // , 0x48727561, 0x736C7561 }
|
||||
};
|
||||
*/
|
||||
|
||||
/*
|
||||
typedef struct
|
||||
{
|
||||
UInt32 maxFunc;
|
||||
UInt32 vendor[3];
|
||||
UInt32 ver;
|
||||
UInt32 b;
|
||||
UInt32 c;
|
||||
UInt32 d;
|
||||
} Cx86cpuid;
|
||||
|
||||
enum
|
||||
{
|
||||
CPU_FIRM_INTEL,
|
||||
CPU_FIRM_AMD,
|
||||
CPU_FIRM_VIA
|
||||
};
|
||||
int x86cpuid_GetFirm(const Cx86cpuid *p);
|
||||
#define x86cpuid_ver_GetFamily(ver) (((ver >> 16) & 0xff0) | ((ver >> 8) & 0xf))
|
||||
#define x86cpuid_ver_GetModel(ver) (((ver >> 12) & 0xf0) | ((ver >> 4) & 0xf))
|
||||
#define x86cpuid_ver_GetStepping(ver) (ver & 0xf)
|
||||
|
||||
int x86cpuid_GetFirm(const Cx86cpuid *p)
|
||||
{
|
||||
unsigned i;
|
||||
for (i = 0; i < sizeof(kVendors) / sizeof(kVendors[0]); i++)
|
||||
{
|
||||
const UInt32 *v = kVendors[i];
|
||||
if (v[0] == p->vendor[0]
|
||||
// && v[1] == p->vendor[1]
|
||||
// && v[2] == p->vendor[2]
|
||||
)
|
||||
return (int)i;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
BoolInt CPU_Is_InOrder()
|
||||
{
|
||||
Cx86cpuid p;
|
||||
UInt32 family, model;
|
||||
if (!x86cpuid_CheckAndRead(&p))
|
||||
return True;
|
||||
|
||||
family = x86cpuid_ver_GetFamily(p.ver);
|
||||
model = x86cpuid_ver_GetModel(p.ver);
|
||||
|
||||
switch (x86cpuid_GetFirm(&p))
|
||||
{
|
||||
case CPU_FIRM_INTEL: return (family < 6 || (family == 6 && (
|
||||
// In-Order Atom CPU
|
||||
model == 0x1C // 45 nm, N4xx, D4xx, N5xx, D5xx, 230, 330
|
||||
|| model == 0x26 // 45 nm, Z6xx
|
||||
|| model == 0x27 // 32 nm, Z2460
|
||||
|| model == 0x35 // 32 nm, Z2760
|
||||
|| model == 0x36 // 32 nm, N2xxx, D2xxx
|
||||
)));
|
||||
case CPU_FIRM_AMD: return (family < 5 || (family == 5 && (model < 6 || model == 0xA)));
|
||||
case CPU_FIRM_VIA: return (family < 6 || (family == 6 && model < 0xF));
|
||||
}
|
||||
return False; // v23 : unknown processors are not In-Order
|
||||
}
|
||||
*/
|
||||
|
||||
#ifdef _WIN32
|
||||
#include "7zWindows.h"
|
||||
#endif
|
||||
|
||||
#if !defined(MY_CPU_AMD64) && defined(_WIN32)
|
||||
|
||||
/* for legacy SSE ia32: there is no user-space cpu instruction to check
|
||||
that OS supports SSE register storing/restoring on context switches.
|
||||
So we need some OS-specific function to check that it's safe to use SSE registers.
|
||||
*/
|
||||
|
||||
Z7_FORCE_INLINE
|
||||
static BoolInt CPU_Sys_Is_SSE_Supported(void)
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(push)
|
||||
#pragma warning(disable : 4996) // `GetVersion': was declared deprecated
|
||||
#endif
|
||||
/* low byte is major version of Windows
|
||||
We suppose that any Windows version since
|
||||
Windows2000 (major == 5) supports SSE registers */
|
||||
return (Byte)GetVersion() >= 5;
|
||||
#if defined(_MSC_VER)
|
||||
#pragma warning(pop)
|
||||
#endif
|
||||
}
|
||||
#define CHECK_SYS_SSE_SUPPORT if (!CPU_Sys_Is_SSE_Supported()) return False;
|
||||
#else
|
||||
#define CHECK_SYS_SSE_SUPPORT
|
||||
#endif
|
||||
|
||||
|
||||
#if !defined(MY_CPU_AMD64)
|
||||
|
||||
BoolInt CPU_IsSupported_CMOV(void)
|
||||
{
|
||||
UInt32 a[4];
|
||||
if (!x86cpuid_Func_1(&a[0]))
|
||||
return 0;
|
||||
return (a[3] >> 15) & 1;
|
||||
}
|
||||
|
||||
BoolInt CPU_IsSupported_SSE(void)
|
||||
{
|
||||
UInt32 a[4];
|
||||
CHECK_SYS_SSE_SUPPORT
|
||||
if (!x86cpuid_Func_1(&a[0]))
|
||||
return 0;
|
||||
return (a[3] >> 25) & 1;
|
||||
}
|
||||
|
||||
BoolInt CPU_IsSupported_SSE2(void)
|
||||
{
|
||||
UInt32 a[4];
|
||||
CHECK_SYS_SSE_SUPPORT
|
||||
if (!x86cpuid_Func_1(&a[0]))
|
||||
return 0;
|
||||
return (a[3] >> 26) & 1;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
static UInt32 x86cpuid_Func_1_ECX(void)
|
||||
{
|
||||
UInt32 a[4];
|
||||
CHECK_SYS_SSE_SUPPORT
|
||||
if (!x86cpuid_Func_1(&a[0]))
|
||||
return 0;
|
||||
return a[2];
|
||||
}
|
||||
|
||||
BoolInt CPU_IsSupported_AES(void)
|
||||
{
|
||||
return (x86cpuid_Func_1_ECX() >> 25) & 1;
|
||||
}
|
||||
|
||||
BoolInt CPU_IsSupported_SSSE3(void)
|
||||
{
|
||||
return (x86cpuid_Func_1_ECX() >> 9) & 1;
|
||||
}
|
||||
|
||||
BoolInt CPU_IsSupported_SSE41(void)
|
||||
{
|
||||
return (x86cpuid_Func_1_ECX() >> 19) & 1;
|
||||
}
|
||||
|
||||
BoolInt CPU_IsSupported_SHA(void)
|
||||
{
|
||||
CHECK_SYS_SSE_SUPPORT
|
||||
|
||||
if (z7_x86_cpuid_GetMaxFunc() < 7)
|
||||
return False;
|
||||
{
|
||||
UInt32 d[4];
|
||||
z7_x86_cpuid(d, 7);
|
||||
return (d[1] >> 29) & 1;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
MSVC: _xgetbv() intrinsic is available since VS2010SP1.
|
||||
MSVC also defines (_XCR_XFEATURE_ENABLED_MASK) macro in
|
||||
<immintrin.h> that we can use or check.
|
||||
For any 32-bit x86 we can use asm code in MSVC,
|
||||
but MSVC asm code is huge after compilation.
|
||||
So _xgetbv() is better
|
||||
|
||||
ICC: _xgetbv() intrinsic is available (in what version of ICC?)
|
||||
ICC defines (__GNUC___) and it supports gnu assembler
|
||||
also ICC supports MASM style code with -use-msasm switch.
|
||||
but ICC doesn't support __attribute__((__target__))
|
||||
|
||||
GCC/CLANG 9:
|
||||
_xgetbv() is macro that works via __builtin_ia32_xgetbv()
|
||||
and we need __attribute__((__target__("xsave")).
|
||||
But with __target__("xsave") the function will be not
|
||||
inlined to function that has no __target__("xsave") attribute.
|
||||
If we want _xgetbv() call inlining, then we should use asm version
|
||||
instead of calling _xgetbv().
|
||||
Note:intrinsic is broke before GCC 8.2:
|
||||
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85684
|
||||
*/
|
||||
|
||||
#if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1100) \
|
||||
|| defined(_MSC_VER) && (_MSC_VER >= 1600) && (_MSC_FULL_VER >= 160040219) \
|
||||
|| defined(__GNUC__) && (__GNUC__ >= 9) \
|
||||
|| defined(__clang__) && (__clang_major__ >= 9)
|
||||
// we define ATTRIB_XGETBV, if we want to use predefined _xgetbv() from compiler
|
||||
#if defined(__INTEL_COMPILER)
|
||||
#define ATTRIB_XGETBV
|
||||
#elif defined(__GNUC__) || defined(__clang__)
|
||||
// we don't define ATTRIB_XGETBV here, because asm version is better for inlining.
|
||||
// #define ATTRIB_XGETBV __attribute__((__target__("xsave")))
|
||||
#else
|
||||
#define ATTRIB_XGETBV
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(ATTRIB_XGETBV)
|
||||
#include <immintrin.h>
|
||||
#endif
|
||||
|
||||
|
||||
// XFEATURE_ENABLED_MASK/XCR0
|
||||
#define MY_XCR_XFEATURE_ENABLED_MASK 0
|
||||
|
||||
#if defined(ATTRIB_XGETBV)
|
||||
ATTRIB_XGETBV
|
||||
#endif
|
||||
static UInt64 x86_xgetbv_0(UInt32 num)
|
||||
{
|
||||
#if defined(ATTRIB_XGETBV)
|
||||
{
|
||||
return
|
||||
#if (defined(_MSC_VER))
|
||||
_xgetbv(num);
|
||||
#else
|
||||
__builtin_ia32_xgetbv(
|
||||
#if !defined(__clang__)
|
||||
(int)
|
||||
#endif
|
||||
num);
|
||||
#endif
|
||||
}
|
||||
|
||||
#elif defined(__GNUC__) || defined(__clang__) || defined(__SUNPRO_CC)
|
||||
|
||||
UInt32 a, d;
|
||||
#if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4))
|
||||
__asm__
|
||||
(
|
||||
"xgetbv"
|
||||
: "=a"(a), "=d"(d) : "c"(num) : "cc"
|
||||
);
|
||||
#else // is old gcc
|
||||
__asm__
|
||||
(
|
||||
".byte 0x0f, 0x01, 0xd0" "\n\t"
|
||||
: "=a"(a), "=d"(d) : "c"(num) : "cc"
|
||||
);
|
||||
#endif
|
||||
return ((UInt64)d << 32) | a;
|
||||
// return a;
|
||||
|
||||
#elif defined(_MSC_VER) && !defined(MY_CPU_AMD64)
|
||||
|
||||
UInt32 a, d;
|
||||
__asm {
|
||||
push eax
|
||||
push edx
|
||||
push ecx
|
||||
mov ecx, num;
|
||||
// xor ecx, ecx // = MY_XCR_XFEATURE_ENABLED_MASK
|
||||
_emit 0x0f
|
||||
_emit 0x01
|
||||
_emit 0xd0
|
||||
mov a, eax
|
||||
mov d, edx
|
||||
pop ecx
|
||||
pop edx
|
||||
pop eax
|
||||
}
|
||||
return ((UInt64)d << 32) | a;
|
||||
// return a;
|
||||
|
||||
#else // it's unknown compiler
|
||||
// #error "Need xgetbv function"
|
||||
UNUSED_VAR(num)
|
||||
// for MSVC-X64 we could call external function from external file.
|
||||
/* Actually we had checked OSXSAVE/AVX in cpuid before.
|
||||
So it's expected that OS supports at least AVX and below. */
|
||||
// if (num != MY_XCR_XFEATURE_ENABLED_MASK) return 0; // if not XCR0
|
||||
return
|
||||
// (1 << 0) | // x87
|
||||
(1 << 1) // SSE
|
||||
| (1 << 2); // AVX
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef _WIN32
|
||||
/*
|
||||
Windows versions do not know about new ISA extensions that
|
||||
can be introduced. But we still can use new extensions,
|
||||
even if Windows doesn't report about supporting them,
|
||||
But we can use new extensions, only if Windows knows about new ISA extension
|
||||
that changes the number or size of registers: SSE, AVX/XSAVE, AVX512
|
||||
So it's enough to check
|
||||
MY_PF_AVX_INSTRUCTIONS_AVAILABLE
|
||||
instead of
|
||||
MY_PF_AVX2_INSTRUCTIONS_AVAILABLE
|
||||
*/
|
||||
#define MY_PF_XSAVE_ENABLED 17
|
||||
// #define MY_PF_SSSE3_INSTRUCTIONS_AVAILABLE 36
|
||||
// #define MY_PF_SSE4_1_INSTRUCTIONS_AVAILABLE 37
|
||||
// #define MY_PF_SSE4_2_INSTRUCTIONS_AVAILABLE 38
|
||||
// #define MY_PF_AVX_INSTRUCTIONS_AVAILABLE 39
|
||||
// #define MY_PF_AVX2_INSTRUCTIONS_AVAILABLE 40
|
||||
// #define MY_PF_AVX512F_INSTRUCTIONS_AVAILABLE 41
|
||||
#endif
|
||||
|
||||
BoolInt CPU_IsSupported_AVX(void)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
if (!IsProcessorFeaturePresent(MY_PF_XSAVE_ENABLED))
|
||||
return False;
|
||||
/* PF_AVX_INSTRUCTIONS_AVAILABLE probably is supported starting from
|
||||
some latest Win10 revisions. But we need AVX in older Windows also.
|
||||
So we don't use the following check: */
|
||||
/*
|
||||
if (!IsProcessorFeaturePresent(MY_PF_AVX_INSTRUCTIONS_AVAILABLE))
|
||||
return False;
|
||||
*/
|
||||
#endif
|
||||
|
||||
/*
|
||||
OS must use new special XSAVE/XRSTOR instructions to save
|
||||
AVX registers when it required for context switching.
|
||||
At OS statring:
|
||||
OS sets CR4.OSXSAVE flag to signal the processor that OS supports the XSAVE extensions.
|
||||
Also OS sets bitmask in XCR0 register that defines what
|
||||
registers will be processed by XSAVE instruction:
|
||||
XCR0.SSE[bit 0] - x87 registers and state
|
||||
XCR0.SSE[bit 1] - SSE registers and state
|
||||
XCR0.AVX[bit 2] - AVX registers and state
|
||||
CR4.OSXSAVE is reflected to CPUID.1:ECX.OSXSAVE[bit 27].
|
||||
So we can read that bit in user-space.
|
||||
XCR0 is available for reading in user-space by new XGETBV instruction.
|
||||
*/
|
||||
{
|
||||
const UInt32 c = x86cpuid_Func_1_ECX();
|
||||
if (0 == (1
|
||||
& (c >> 28) // AVX instructions are supported by hardware
|
||||
& (c >> 27))) // OSXSAVE bit: XSAVE and related instructions are enabled by OS.
|
||||
return False;
|
||||
}
|
||||
|
||||
/* also we can check
|
||||
CPUID.1:ECX.XSAVE [bit 26] : that shows that
|
||||
XSAVE, XRESTOR, XSETBV, XGETBV instructions are supported by hardware.
|
||||
But that check is redundant, because if OSXSAVE bit is set, then XSAVE is also set */
|
||||
|
||||
/* If OS have enabled XSAVE extension instructions (OSXSAVE == 1),
|
||||
in most cases we expect that OS also will support storing/restoring
|
||||
for AVX and SSE states at least.
|
||||
But to be ensure for that we call user-space instruction
|
||||
XGETBV(0) to get XCR0 value that contains bitmask that defines
|
||||
what exact states(registers) OS have enabled for storing/restoring.
|
||||
*/
|
||||
|
||||
{
|
||||
const UInt32 bm = (UInt32)x86_xgetbv_0(MY_XCR_XFEATURE_ENABLED_MASK);
|
||||
// printf("\n=== XGetBV=%d\n", bm);
|
||||
return 1
|
||||
& (bm >> 1) // SSE state is supported (set by OS) for storing/restoring
|
||||
& (bm >> 2); // AVX state is supported (set by OS) for storing/restoring
|
||||
}
|
||||
// since Win7SP1: we can use GetEnabledXStateFeatures();
|
||||
}
|
||||
|
||||
|
||||
BoolInt CPU_IsSupported_AVX2(void)
|
||||
{
|
||||
if (!CPU_IsSupported_AVX())
|
||||
return False;
|
||||
if (z7_x86_cpuid_GetMaxFunc() < 7)
|
||||
return False;
|
||||
{
|
||||
UInt32 d[4];
|
||||
z7_x86_cpuid(d, 7);
|
||||
// printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]);
|
||||
return 1
|
||||
& (d[1] >> 5); // avx2
|
||||
}
|
||||
}
|
||||
|
||||
BoolInt CPU_IsSupported_VAES_AVX2(void)
|
||||
{
|
||||
if (!CPU_IsSupported_AVX())
|
||||
return False;
|
||||
if (z7_x86_cpuid_GetMaxFunc() < 7)
|
||||
return False;
|
||||
{
|
||||
UInt32 d[4];
|
||||
z7_x86_cpuid(d, 7);
|
||||
// printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]);
|
||||
return 1
|
||||
& (d[1] >> 5) // avx2
|
||||
// & (d[1] >> 31) // avx512vl
|
||||
& (d[2] >> 9); // vaes // VEX-256/EVEX
|
||||
}
|
||||
}
|
||||
|
||||
BoolInt CPU_IsSupported_PageGB(void)
|
||||
{
|
||||
CHECK_CPUID_IS_SUPPORTED
|
||||
{
|
||||
UInt32 d[4];
|
||||
z7_x86_cpuid(d, 0x80000000);
|
||||
if (d[0] < 0x80000001)
|
||||
return False;
|
||||
z7_x86_cpuid(d, 0x80000001);
|
||||
return (d[3] >> 26) & 1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#elif defined(MY_CPU_ARM_OR_ARM64)
|
||||
|
||||
#ifdef _WIN32
|
||||
|
||||
#include "7zWindows.h"
|
||||
|
||||
BoolInt CPU_IsSupported_CRC32(void) { return IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
|
||||
BoolInt CPU_IsSupported_CRYPTO(void) { return IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
|
||||
BoolInt CPU_IsSupported_NEON(void) { return IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
|
||||
|
||||
#else
|
||||
|
||||
#if defined(__APPLE__)
|
||||
|
||||
/*
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
static void Print_sysctlbyname(const char *name)
|
||||
{
|
||||
size_t bufSize = 256;
|
||||
char buf[256];
|
||||
int res = sysctlbyname(name, &buf, &bufSize, NULL, 0);
|
||||
{
|
||||
int i;
|
||||
printf("\nres = %d : %s : '%s' : bufSize = %d, numeric", res, name, buf, (unsigned)bufSize);
|
||||
for (i = 0; i < 20; i++)
|
||||
printf(" %2x", (unsigned)(Byte)buf[i]);
|
||||
|
||||
}
|
||||
}
|
||||
*/
|
||||
/*
|
||||
Print_sysctlbyname("hw.pagesize");
|
||||
Print_sysctlbyname("machdep.cpu.brand_string");
|
||||
*/
|
||||
|
||||
static BoolInt z7_sysctlbyname_Get_BoolInt(const char *name)
|
||||
{
|
||||
UInt32 val = 0;
|
||||
if (z7_sysctlbyname_Get_UInt32(name, &val) == 0 && val == 1)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
BoolInt CPU_IsSupported_CRC32(void)
|
||||
{
|
||||
return z7_sysctlbyname_Get_BoolInt("hw.optional.armv8_crc32");
|
||||
}
|
||||
|
||||
BoolInt CPU_IsSupported_NEON(void)
|
||||
{
|
||||
return z7_sysctlbyname_Get_BoolInt("hw.optional.neon");
|
||||
}
|
||||
|
||||
#ifdef MY_CPU_ARM64
|
||||
#define APPLE_CRYPTO_SUPPORT_VAL 1
|
||||
#else
|
||||
#define APPLE_CRYPTO_SUPPORT_VAL 0
|
||||
#endif
|
||||
|
||||
BoolInt CPU_IsSupported_SHA1(void) { return APPLE_CRYPTO_SUPPORT_VAL; }
|
||||
BoolInt CPU_IsSupported_SHA2(void) { return APPLE_CRYPTO_SUPPORT_VAL; }
|
||||
BoolInt CPU_IsSupported_AES (void) { return APPLE_CRYPTO_SUPPORT_VAL; }
|
||||
|
||||
|
||||
#else // __APPLE__
|
||||
|
||||
#include <sys/auxv.h>
|
||||
|
||||
#define USE_HWCAP
|
||||
|
||||
#ifdef USE_HWCAP
|
||||
|
||||
#include <asm/hwcap.h>
|
||||
|
||||
#define MY_HWCAP_CHECK_FUNC_2(name1, name2) \
|
||||
BoolInt CPU_IsSupported_ ## name1() { return (getauxval(AT_HWCAP) & (HWCAP_ ## name2)) ? 1 : 0; }
|
||||
|
||||
#ifdef MY_CPU_ARM64
|
||||
#define MY_HWCAP_CHECK_FUNC(name) \
|
||||
MY_HWCAP_CHECK_FUNC_2(name, name)
|
||||
MY_HWCAP_CHECK_FUNC_2(NEON, ASIMD)
|
||||
// MY_HWCAP_CHECK_FUNC (ASIMD)
|
||||
#elif defined(MY_CPU_ARM)
|
||||
#define MY_HWCAP_CHECK_FUNC(name) \
|
||||
BoolInt CPU_IsSupported_ ## name() { return (getauxval(AT_HWCAP2) & (HWCAP2_ ## name)) ? 1 : 0; }
|
||||
MY_HWCAP_CHECK_FUNC_2(NEON, NEON)
|
||||
#endif
|
||||
|
||||
#else // USE_HWCAP
|
||||
|
||||
#define MY_HWCAP_CHECK_FUNC(name) \
|
||||
BoolInt CPU_IsSupported_ ## name() { return 0; }
|
||||
MY_HWCAP_CHECK_FUNC(NEON)
|
||||
|
||||
#endif // USE_HWCAP
|
||||
|
||||
MY_HWCAP_CHECK_FUNC (CRC32)
|
||||
MY_HWCAP_CHECK_FUNC (SHA1)
|
||||
MY_HWCAP_CHECK_FUNC (SHA2)
|
||||
MY_HWCAP_CHECK_FUNC (AES)
|
||||
|
||||
#endif // __APPLE__
|
||||
#endif // _WIN32
|
||||
|
||||
#endif // MY_CPU_ARM_OR_ARM64
|
||||
|
||||
|
||||
|
||||
#ifdef __APPLE__
|
||||
|
||||
#include <sys/sysctl.h>
|
||||
|
||||
int z7_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize)
|
||||
{
|
||||
return sysctlbyname(name, buf, bufSize, NULL, 0);
|
||||
}
|
||||
|
||||
int z7_sysctlbyname_Get_UInt32(const char *name, UInt32 *val)
|
||||
{
|
||||
size_t bufSize = sizeof(*val);
|
||||
const int res = z7_sysctlbyname_Get(name, val, &bufSize);
|
||||
if (res == 0 && bufSize != sizeof(*val))
|
||||
return EFAULT;
|
||||
return res;
|
||||
}
|
||||
|
||||
#endif
|
||||
523
extern/lzma/CpuArch.h
vendored
Normal file
523
extern/lzma/CpuArch.h
vendored
Normal file
@@ -0,0 +1,523 @@
|
||||
/* CpuArch.h -- CPU specific code
|
||||
2023-04-02 : Igor Pavlov : Public domain */
|
||||
|
||||
#ifndef ZIP7_INC_CPU_ARCH_H
|
||||
#define ZIP7_INC_CPU_ARCH_H
|
||||
|
||||
#include "7zTypes.h"
|
||||
|
||||
EXTERN_C_BEGIN
|
||||
|
||||
/*
|
||||
MY_CPU_LE means that CPU is LITTLE ENDIAN.
|
||||
MY_CPU_BE means that CPU is BIG ENDIAN.
|
||||
If MY_CPU_LE and MY_CPU_BE are not defined, we don't know about ENDIANNESS of platform.
|
||||
|
||||
MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned memory accesses.
|
||||
|
||||
MY_CPU_64BIT means that processor can work with 64-bit registers.
|
||||
MY_CPU_64BIT can be used to select fast code branch
|
||||
MY_CPU_64BIT doesn't mean that (sizeof(void *) == 8)
|
||||
*/
|
||||
|
||||
#if defined(_M_X64) \
|
||||
|| defined(_M_AMD64) \
|
||||
|| defined(__x86_64__) \
|
||||
|| defined(__AMD64__) \
|
||||
|| defined(__amd64__)
|
||||
#define MY_CPU_AMD64
|
||||
#ifdef __ILP32__
|
||||
#define MY_CPU_NAME "x32"
|
||||
#define MY_CPU_SIZEOF_POINTER 4
|
||||
#else
|
||||
#define MY_CPU_NAME "x64"
|
||||
#define MY_CPU_SIZEOF_POINTER 8
|
||||
#endif
|
||||
#define MY_CPU_64BIT
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(_M_IX86) \
|
||||
|| defined(__i386__)
|
||||
#define MY_CPU_X86
|
||||
#define MY_CPU_NAME "x86"
|
||||
/* #define MY_CPU_32BIT */
|
||||
#define MY_CPU_SIZEOF_POINTER 4
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(_M_ARM64) \
|
||||
|| defined(__AARCH64EL__) \
|
||||
|| defined(__AARCH64EB__) \
|
||||
|| defined(__aarch64__)
|
||||
#define MY_CPU_ARM64
|
||||
#ifdef __ILP32__
|
||||
#define MY_CPU_NAME "arm64-32"
|
||||
#define MY_CPU_SIZEOF_POINTER 4
|
||||
#else
|
||||
#define MY_CPU_NAME "arm64"
|
||||
#define MY_CPU_SIZEOF_POINTER 8
|
||||
#endif
|
||||
#define MY_CPU_64BIT
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(_M_ARM) \
|
||||
|| defined(_M_ARM_NT) \
|
||||
|| defined(_M_ARMT) \
|
||||
|| defined(__arm__) \
|
||||
|| defined(__thumb__) \
|
||||
|| defined(__ARMEL__) \
|
||||
|| defined(__ARMEB__) \
|
||||
|| defined(__THUMBEL__) \
|
||||
|| defined(__THUMBEB__)
|
||||
#define MY_CPU_ARM
|
||||
|
||||
#if defined(__thumb__) || defined(__THUMBEL__) || defined(_M_ARMT)
|
||||
#define MY_CPU_ARMT
|
||||
#define MY_CPU_NAME "armt"
|
||||
#else
|
||||
#define MY_CPU_ARM32
|
||||
#define MY_CPU_NAME "arm"
|
||||
#endif
|
||||
/* #define MY_CPU_32BIT */
|
||||
#define MY_CPU_SIZEOF_POINTER 4
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(_M_IA64) \
|
||||
|| defined(__ia64__)
|
||||
#define MY_CPU_IA64
|
||||
#define MY_CPU_NAME "ia64"
|
||||
#define MY_CPU_64BIT
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__mips64) \
|
||||
|| defined(__mips64__) \
|
||||
|| (defined(__mips) && (__mips == 64 || __mips == 4 || __mips == 3))
|
||||
#define MY_CPU_NAME "mips64"
|
||||
#define MY_CPU_64BIT
|
||||
#elif defined(__mips__)
|
||||
#define MY_CPU_NAME "mips"
|
||||
/* #define MY_CPU_32BIT */
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__ppc64__) \
|
||||
|| defined(__powerpc64__) \
|
||||
|| defined(__ppc__) \
|
||||
|| defined(__powerpc__) \
|
||||
|| defined(__PPC__) \
|
||||
|| defined(_POWER)
|
||||
|
||||
#define MY_CPU_PPC_OR_PPC64
|
||||
|
||||
#if defined(__ppc64__) \
|
||||
|| defined(__powerpc64__) \
|
||||
|| defined(_LP64) \
|
||||
|| defined(__64BIT__)
|
||||
#ifdef __ILP32__
|
||||
#define MY_CPU_NAME "ppc64-32"
|
||||
#define MY_CPU_SIZEOF_POINTER 4
|
||||
#else
|
||||
#define MY_CPU_NAME "ppc64"
|
||||
#define MY_CPU_SIZEOF_POINTER 8
|
||||
#endif
|
||||
#define MY_CPU_64BIT
|
||||
#else
|
||||
#define MY_CPU_NAME "ppc"
|
||||
#define MY_CPU_SIZEOF_POINTER 4
|
||||
/* #define MY_CPU_32BIT */
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__riscv) \
|
||||
|| defined(__riscv__)
|
||||
#if __riscv_xlen == 32
|
||||
#define MY_CPU_NAME "riscv32"
|
||||
#elif __riscv_xlen == 64
|
||||
#define MY_CPU_NAME "riscv64"
|
||||
#else
|
||||
#define MY_CPU_NAME "riscv"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(MY_CPU_X86) || defined(MY_CPU_AMD64)
|
||||
#define MY_CPU_X86_OR_AMD64
|
||||
#endif
|
||||
|
||||
#if defined(MY_CPU_ARM) || defined(MY_CPU_ARM64)
|
||||
#define MY_CPU_ARM_OR_ARM64
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef _WIN32
|
||||
|
||||
#ifdef MY_CPU_ARM
|
||||
#define MY_CPU_ARM_LE
|
||||
#endif
|
||||
|
||||
#ifdef MY_CPU_ARM64
|
||||
#define MY_CPU_ARM64_LE
|
||||
#endif
|
||||
|
||||
#ifdef _M_IA64
|
||||
#define MY_CPU_IA64_LE
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(MY_CPU_X86_OR_AMD64) \
|
||||
|| defined(MY_CPU_ARM_LE) \
|
||||
|| defined(MY_CPU_ARM64_LE) \
|
||||
|| defined(MY_CPU_IA64_LE) \
|
||||
|| defined(__LITTLE_ENDIAN__) \
|
||||
|| defined(__ARMEL__) \
|
||||
|| defined(__THUMBEL__) \
|
||||
|| defined(__AARCH64EL__) \
|
||||
|| defined(__MIPSEL__) \
|
||||
|| defined(__MIPSEL) \
|
||||
|| defined(_MIPSEL) \
|
||||
|| defined(__BFIN__) \
|
||||
|| (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__))
|
||||
#define MY_CPU_LE
|
||||
#endif
|
||||
|
||||
#if defined(__BIG_ENDIAN__) \
|
||||
|| defined(__ARMEB__) \
|
||||
|| defined(__THUMBEB__) \
|
||||
|| defined(__AARCH64EB__) \
|
||||
|| defined(__MIPSEB__) \
|
||||
|| defined(__MIPSEB) \
|
||||
|| defined(_MIPSEB) \
|
||||
|| defined(__m68k__) \
|
||||
|| defined(__s390__) \
|
||||
|| defined(__s390x__) \
|
||||
|| defined(__zarch__) \
|
||||
|| (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__))
|
||||
#define MY_CPU_BE
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(MY_CPU_LE) && defined(MY_CPU_BE)
|
||||
#error Stop_Compiling_Bad_Endian
|
||||
#endif
|
||||
|
||||
#if !defined(MY_CPU_LE) && !defined(MY_CPU_BE)
|
||||
#error Stop_Compiling_CPU_ENDIAN_must_be_detected_at_compile_time
|
||||
#endif
|
||||
|
||||
#if defined(MY_CPU_32BIT) && defined(MY_CPU_64BIT)
|
||||
#error Stop_Compiling_Bad_32_64_BIT
|
||||
#endif
|
||||
|
||||
#ifdef __SIZEOF_POINTER__
|
||||
#ifdef MY_CPU_SIZEOF_POINTER
|
||||
#if MY_CPU_SIZEOF_POINTER != __SIZEOF_POINTER__
|
||||
#error Stop_Compiling_Bad_MY_CPU_PTR_SIZE
|
||||
#endif
|
||||
#else
|
||||
#define MY_CPU_SIZEOF_POINTER __SIZEOF_POINTER__
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(MY_CPU_SIZEOF_POINTER) && (MY_CPU_SIZEOF_POINTER == 4)
|
||||
#if defined (_LP64)
|
||||
#error Stop_Compiling_Bad_MY_CPU_PTR_SIZE
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#if _MSC_VER >= 1300
|
||||
#define MY_CPU_pragma_pack_push_1 __pragma(pack(push, 1))
|
||||
#define MY_CPU_pragma_pop __pragma(pack(pop))
|
||||
#else
|
||||
#define MY_CPU_pragma_pack_push_1
|
||||
#define MY_CPU_pragma_pop
|
||||
#endif
|
||||
#else
|
||||
#ifdef __xlC__
|
||||
#define MY_CPU_pragma_pack_push_1 _Pragma("pack(1)")
|
||||
#define MY_CPU_pragma_pop _Pragma("pack()")
|
||||
#else
|
||||
#define MY_CPU_pragma_pack_push_1 _Pragma("pack(push, 1)")
|
||||
#define MY_CPU_pragma_pop _Pragma("pack(pop)")
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef MY_CPU_NAME
|
||||
#ifdef MY_CPU_LE
|
||||
#define MY_CPU_NAME "LE"
|
||||
#elif defined(MY_CPU_BE)
|
||||
#define MY_CPU_NAME "BE"
|
||||
#else
|
||||
/*
|
||||
#define MY_CPU_NAME ""
|
||||
*/
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#ifdef __has_builtin
|
||||
#define Z7_has_builtin(x) __has_builtin(x)
|
||||
#else
|
||||
#define Z7_has_builtin(x) 0
|
||||
#endif
|
||||
|
||||
|
||||
#define Z7_BSWAP32_CONST(v) \
|
||||
( (((UInt32)(v) << 24) ) \
|
||||
| (((UInt32)(v) << 8) & (UInt32)0xff0000) \
|
||||
| (((UInt32)(v) >> 8) & (UInt32)0xff00 ) \
|
||||
| (((UInt32)(v) >> 24) ))
|
||||
|
||||
|
||||
#if defined(_MSC_VER) && (_MSC_VER >= 1300)
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
/* Note: these macros will use bswap instruction (486), that is unsupported in 386 cpu */
|
||||
|
||||
#pragma intrinsic(_byteswap_ushort)
|
||||
#pragma intrinsic(_byteswap_ulong)
|
||||
#pragma intrinsic(_byteswap_uint64)
|
||||
|
||||
#define Z7_BSWAP16(v) _byteswap_ushort(v)
|
||||
#define Z7_BSWAP32(v) _byteswap_ulong (v)
|
||||
#define Z7_BSWAP64(v) _byteswap_uint64(v)
|
||||
#define Z7_CPU_FAST_BSWAP_SUPPORTED
|
||||
|
||||
#elif (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) \
|
||||
|| (defined(__clang__) && Z7_has_builtin(__builtin_bswap16))
|
||||
|
||||
#define Z7_BSWAP16(v) __builtin_bswap16(v)
|
||||
#define Z7_BSWAP32(v) __builtin_bswap32(v)
|
||||
#define Z7_BSWAP64(v) __builtin_bswap64(v)
|
||||
#define Z7_CPU_FAST_BSWAP_SUPPORTED
|
||||
|
||||
#else
|
||||
|
||||
#define Z7_BSWAP16(v) ((UInt16) \
|
||||
( ((UInt32)(v) << 8) \
|
||||
| ((UInt32)(v) >> 8) \
|
||||
))
|
||||
|
||||
#define Z7_BSWAP32(v) Z7_BSWAP32_CONST(v)
|
||||
|
||||
#define Z7_BSWAP64(v) \
|
||||
( ( ( (UInt64)(v) ) << 8 * 7 ) \
|
||||
| ( ( (UInt64)(v) & ((UInt32)0xff << 8 * 1) ) << 8 * 5 ) \
|
||||
| ( ( (UInt64)(v) & ((UInt32)0xff << 8 * 2) ) << 8 * 3 ) \
|
||||
| ( ( (UInt64)(v) & ((UInt32)0xff << 8 * 3) ) << 8 * 1 ) \
|
||||
| ( ( (UInt64)(v) >> 8 * 1 ) & ((UInt32)0xff << 8 * 3) ) \
|
||||
| ( ( (UInt64)(v) >> 8 * 3 ) & ((UInt32)0xff << 8 * 2) ) \
|
||||
| ( ( (UInt64)(v) >> 8 * 5 ) & ((UInt32)0xff << 8 * 1) ) \
|
||||
| ( ( (UInt64)(v) >> 8 * 7 ) ) \
|
||||
)
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
#ifdef MY_CPU_LE
|
||||
#if defined(MY_CPU_X86_OR_AMD64) \
|
||||
|| defined(MY_CPU_ARM64)
|
||||
#define MY_CPU_LE_UNALIGN
|
||||
#define MY_CPU_LE_UNALIGN_64
|
||||
#elif defined(__ARM_FEATURE_UNALIGNED)
|
||||
/* gcc9 for 32-bit arm can use LDRD instruction that requires 32-bit alignment.
|
||||
So we can't use unaligned 64-bit operations. */
|
||||
#define MY_CPU_LE_UNALIGN
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef MY_CPU_LE_UNALIGN
|
||||
|
||||
#define GetUi16(p) (*(const UInt16 *)(const void *)(p))
|
||||
#define GetUi32(p) (*(const UInt32 *)(const void *)(p))
|
||||
#ifdef MY_CPU_LE_UNALIGN_64
|
||||
#define GetUi64(p) (*(const UInt64 *)(const void *)(p))
|
||||
#define SetUi64(p, v) { *(UInt64 *)(void *)(p) = (v); }
|
||||
#endif
|
||||
|
||||
#define SetUi16(p, v) { *(UInt16 *)(void *)(p) = (v); }
|
||||
#define SetUi32(p, v) { *(UInt32 *)(void *)(p) = (v); }
|
||||
|
||||
#else
|
||||
|
||||
#define GetUi16(p) ( (UInt16) ( \
|
||||
((const Byte *)(p))[0] | \
|
||||
((UInt16)((const Byte *)(p))[1] << 8) ))
|
||||
|
||||
#define GetUi32(p) ( \
|
||||
((const Byte *)(p))[0] | \
|
||||
((UInt32)((const Byte *)(p))[1] << 8) | \
|
||||
((UInt32)((const Byte *)(p))[2] << 16) | \
|
||||
((UInt32)((const Byte *)(p))[3] << 24))
|
||||
|
||||
#define SetUi16(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \
|
||||
_ppp_[0] = (Byte)_vvv_; \
|
||||
_ppp_[1] = (Byte)(_vvv_ >> 8); }
|
||||
|
||||
#define SetUi32(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \
|
||||
_ppp_[0] = (Byte)_vvv_; \
|
||||
_ppp_[1] = (Byte)(_vvv_ >> 8); \
|
||||
_ppp_[2] = (Byte)(_vvv_ >> 16); \
|
||||
_ppp_[3] = (Byte)(_vvv_ >> 24); }
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef GetUi64
|
||||
#define GetUi64(p) (GetUi32(p) | ((UInt64)GetUi32(((const Byte *)(p)) + 4) << 32))
|
||||
#endif
|
||||
|
||||
#ifndef SetUi64
|
||||
#define SetUi64(p, v) { Byte *_ppp2_ = (Byte *)(p); UInt64 _vvv2_ = (v); \
|
||||
SetUi32(_ppp2_ , (UInt32)_vvv2_) \
|
||||
SetUi32(_ppp2_ + 4, (UInt32)(_vvv2_ >> 32)) }
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(MY_CPU_LE_UNALIGN) && defined(Z7_CPU_FAST_BSWAP_SUPPORTED)
|
||||
|
||||
#define GetBe32(p) Z7_BSWAP32 (*(const UInt32 *)(const void *)(p))
|
||||
#define SetBe32(p, v) { (*(UInt32 *)(void *)(p)) = Z7_BSWAP32(v); }
|
||||
|
||||
#if defined(MY_CPU_LE_UNALIGN_64)
|
||||
#define GetBe64(p) Z7_BSWAP64 (*(const UInt64 *)(const void *)(p))
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#define GetBe32(p) ( \
|
||||
((UInt32)((const Byte *)(p))[0] << 24) | \
|
||||
((UInt32)((const Byte *)(p))[1] << 16) | \
|
||||
((UInt32)((const Byte *)(p))[2] << 8) | \
|
||||
((const Byte *)(p))[3] )
|
||||
|
||||
#define SetBe32(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \
|
||||
_ppp_[0] = (Byte)(_vvv_ >> 24); \
|
||||
_ppp_[1] = (Byte)(_vvv_ >> 16); \
|
||||
_ppp_[2] = (Byte)(_vvv_ >> 8); \
|
||||
_ppp_[3] = (Byte)_vvv_; }
|
||||
|
||||
#endif
|
||||
|
||||
#ifndef GetBe64
|
||||
#define GetBe64(p) (((UInt64)GetBe32(p) << 32) | GetBe32(((const Byte *)(p)) + 4))
|
||||
#endif
|
||||
|
||||
#ifndef GetBe16
|
||||
#define GetBe16(p) ( (UInt16) ( \
|
||||
((UInt16)((const Byte *)(p))[0] << 8) | \
|
||||
((const Byte *)(p))[1] ))
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(MY_CPU_BE)
|
||||
#define Z7_CONV_BE_TO_NATIVE_CONST32(v) (v)
|
||||
#define Z7_CONV_LE_TO_NATIVE_CONST32(v) Z7_BSWAP32_CONST(v)
|
||||
#define Z7_CONV_NATIVE_TO_BE_32(v) (v)
|
||||
#elif defined(MY_CPU_LE)
|
||||
#define Z7_CONV_BE_TO_NATIVE_CONST32(v) Z7_BSWAP32_CONST(v)
|
||||
#define Z7_CONV_LE_TO_NATIVE_CONST32(v) (v)
|
||||
#define Z7_CONV_NATIVE_TO_BE_32(v) Z7_BSWAP32(v)
|
||||
#else
|
||||
#error Stop_Compiling_Unknown_Endian_CONV
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(MY_CPU_BE)
|
||||
|
||||
#define GetBe32a(p) (*(const UInt32 *)(const void *)(p))
|
||||
#define GetBe16a(p) (*(const UInt16 *)(const void *)(p))
|
||||
#define SetBe32a(p, v) { *(UInt32 *)(void *)(p) = (v); }
|
||||
#define SetBe16a(p, v) { *(UInt16 *)(void *)(p) = (v); }
|
||||
|
||||
#define GetUi32a(p) GetUi32(p)
|
||||
#define GetUi16a(p) GetUi16(p)
|
||||
#define SetUi32a(p, v) SetUi32(p, v)
|
||||
#define SetUi16a(p, v) SetUi16(p, v)
|
||||
|
||||
#elif defined(MY_CPU_LE)
|
||||
|
||||
#define GetUi32a(p) (*(const UInt32 *)(const void *)(p))
|
||||
#define GetUi16a(p) (*(const UInt16 *)(const void *)(p))
|
||||
#define SetUi32a(p, v) { *(UInt32 *)(void *)(p) = (v); }
|
||||
#define SetUi16a(p, v) { *(UInt16 *)(void *)(p) = (v); }
|
||||
|
||||
#define GetBe32a(p) GetBe32(p)
|
||||
#define GetBe16a(p) GetBe16(p)
|
||||
#define SetBe32a(p, v) SetBe32(p, v)
|
||||
#define SetBe16a(p, v) SetBe16(p, v)
|
||||
|
||||
#else
|
||||
#error Stop_Compiling_Unknown_Endian_CPU_a
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(MY_CPU_X86_OR_AMD64) \
|
||||
|| defined(MY_CPU_ARM_OR_ARM64) \
|
||||
|| defined(MY_CPU_PPC_OR_PPC64)
|
||||
#define Z7_CPU_FAST_ROTATE_SUPPORTED
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef MY_CPU_X86_OR_AMD64
|
||||
|
||||
void Z7_FASTCALL z7_x86_cpuid(UInt32 a[4], UInt32 function);
|
||||
UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void);
|
||||
#if defined(MY_CPU_AMD64)
|
||||
#define Z7_IF_X86_CPUID_SUPPORTED
|
||||
#else
|
||||
#define Z7_IF_X86_CPUID_SUPPORTED if (z7_x86_cpuid_GetMaxFunc())
|
||||
#endif
|
||||
|
||||
BoolInt CPU_IsSupported_AES(void);
|
||||
BoolInt CPU_IsSupported_AVX(void);
|
||||
BoolInt CPU_IsSupported_AVX2(void);
|
||||
BoolInt CPU_IsSupported_VAES_AVX2(void);
|
||||
BoolInt CPU_IsSupported_CMOV(void);
|
||||
BoolInt CPU_IsSupported_SSE(void);
|
||||
BoolInt CPU_IsSupported_SSE2(void);
|
||||
BoolInt CPU_IsSupported_SSSE3(void);
|
||||
BoolInt CPU_IsSupported_SSE41(void);
|
||||
BoolInt CPU_IsSupported_SHA(void);
|
||||
BoolInt CPU_IsSupported_PageGB(void);
|
||||
|
||||
#elif defined(MY_CPU_ARM_OR_ARM64)
|
||||
|
||||
BoolInt CPU_IsSupported_CRC32(void);
|
||||
BoolInt CPU_IsSupported_NEON(void);
|
||||
|
||||
#if defined(_WIN32)
|
||||
BoolInt CPU_IsSupported_CRYPTO(void);
|
||||
#define CPU_IsSupported_SHA1 CPU_IsSupported_CRYPTO
|
||||
#define CPU_IsSupported_SHA2 CPU_IsSupported_CRYPTO
|
||||
#define CPU_IsSupported_AES CPU_IsSupported_CRYPTO
|
||||
#else
|
||||
BoolInt CPU_IsSupported_SHA1(void);
|
||||
BoolInt CPU_IsSupported_SHA2(void);
|
||||
BoolInt CPU_IsSupported_AES(void);
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__APPLE__)
|
||||
int z7_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize);
|
||||
int z7_sysctlbyname_Get_UInt32(const char *name, UInt32 *val);
|
||||
#endif
|
||||
|
||||
EXTERN_C_END
|
||||
|
||||
#endif
|
||||
1764
extern/lzma/LzFind.c
vendored
1764
extern/lzma/LzFind.c
vendored
@@ -1,70 +1,166 @@
|
||||
/* LzFind.c -- Match finder for LZ algorithms
|
||||
2008-10-04 : Igor Pavlov : Public domain */
|
||||
2023-03-14 : Igor Pavlov : Public domain */
|
||||
|
||||
#include "Precomp.h"
|
||||
|
||||
#include <string.h>
|
||||
// #include <stdio.h>
|
||||
|
||||
#include "CpuArch.h"
|
||||
#include "LzFind.h"
|
||||
#include "LzHash.h"
|
||||
|
||||
#define kBlockMoveAlign (1 << 7) // alignment for memmove()
|
||||
#define kBlockSizeAlign (1 << 16) // alignment for block allocation
|
||||
#define kBlockSizeReserveMin (1 << 24) // it's 1/256 from 4 GB dictinary
|
||||
|
||||
#define kEmptyHashValue 0
|
||||
#define kMaxValForNormalize ((UInt32)0xFFFFFFFF)
|
||||
#define kNormalizeStepMin (1 << 10) /* it must be power of 2 */
|
||||
#define kNormalizeMask (~(kNormalizeStepMin - 1))
|
||||
#define kMaxHistorySize ((UInt32)3 << 30)
|
||||
|
||||
#define kStartMaxLen 3
|
||||
#define kMaxValForNormalize ((UInt32)0)
|
||||
// #define kMaxValForNormalize ((UInt32)(1 << 20) + 0xfff) // for debug
|
||||
|
||||
static void LzInWindow_Free(CMatchFinder *p, ISzAlloc *alloc)
|
||||
// #define kNormalizeAlign (1 << 7) // alignment for speculated accesses
|
||||
|
||||
#define GET_AVAIL_BYTES(p) \
|
||||
Inline_MatchFinder_GetNumAvailableBytes(p)
|
||||
|
||||
|
||||
// #define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size)
|
||||
#define kFix5HashSize kFix4HashSize
|
||||
|
||||
/*
|
||||
HASH2_CALC:
|
||||
if (hv) match, then cur[0] and cur[1] also match
|
||||
*/
|
||||
#define HASH2_CALC hv = GetUi16(cur);
|
||||
|
||||
// (crc[0 ... 255] & 0xFF) provides one-to-one correspondence to [0 ... 255]
|
||||
|
||||
/*
|
||||
HASH3_CALC:
|
||||
if (cur[0]) and (h2) match, then cur[1] also match
|
||||
if (cur[0]) and (hv) match, then cur[1] and cur[2] also match
|
||||
*/
|
||||
#define HASH3_CALC { \
|
||||
UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
|
||||
h2 = temp & (kHash2Size - 1); \
|
||||
hv = (temp ^ ((UInt32)cur[2] << 8)) & p->hashMask; }
|
||||
|
||||
#define HASH4_CALC { \
|
||||
UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
|
||||
h2 = temp & (kHash2Size - 1); \
|
||||
temp ^= ((UInt32)cur[2] << 8); \
|
||||
h3 = temp & (kHash3Size - 1); \
|
||||
hv = (temp ^ (p->crc[cur[3]] << kLzHash_CrcShift_1)) & p->hashMask; }
|
||||
|
||||
#define HASH5_CALC { \
|
||||
UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
|
||||
h2 = temp & (kHash2Size - 1); \
|
||||
temp ^= ((UInt32)cur[2] << 8); \
|
||||
h3 = temp & (kHash3Size - 1); \
|
||||
temp ^= (p->crc[cur[3]] << kLzHash_CrcShift_1); \
|
||||
/* h4 = temp & p->hash4Mask; */ /* (kHash4Size - 1); */ \
|
||||
hv = (temp ^ (p->crc[cur[4]] << kLzHash_CrcShift_2)) & p->hashMask; }
|
||||
|
||||
#define HASH_ZIP_CALC hv = ((cur[2] | ((UInt32)cur[0] << 8)) ^ p->crc[cur[1]]) & 0xFFFF;
|
||||
|
||||
|
||||
static void LzInWindow_Free(CMatchFinder *p, ISzAllocPtr alloc)
|
||||
{
|
||||
if (!p->directInput)
|
||||
// if (!p->directInput)
|
||||
{
|
||||
alloc->Free(alloc, p->bufferBase);
|
||||
p->bufferBase = 0;
|
||||
ISzAlloc_Free(alloc, p->bufBase);
|
||||
p->bufBase = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/* keepSizeBefore + keepSizeAfter + keepSizeReserv must be < 4G) */
|
||||
|
||||
static int LzInWindow_Create(CMatchFinder *p, UInt32 keepSizeReserv, ISzAlloc *alloc)
|
||||
static int LzInWindow_Create2(CMatchFinder *p, UInt32 blockSize, ISzAllocPtr alloc)
|
||||
{
|
||||
UInt32 blockSize = p->keepSizeBefore + p->keepSizeAfter + keepSizeReserv;
|
||||
if (p->directInput)
|
||||
{
|
||||
p->blockSize = blockSize;
|
||||
return 1;
|
||||
}
|
||||
if (p->bufferBase == 0 || p->blockSize != blockSize)
|
||||
if (blockSize == 0)
|
||||
return 0;
|
||||
if (!p->bufBase || p->blockSize != blockSize)
|
||||
{
|
||||
// size_t blockSizeT;
|
||||
LzInWindow_Free(p, alloc);
|
||||
p->blockSize = blockSize;
|
||||
p->bufferBase = (Byte *)alloc->Alloc(alloc, (size_t)blockSize);
|
||||
// blockSizeT = blockSize;
|
||||
|
||||
// printf("\nblockSize = 0x%x\n", blockSize);
|
||||
/*
|
||||
#if defined _WIN64
|
||||
// we can allocate 4GiB, but still use UInt32 for (p->blockSize)
|
||||
// we use UInt32 type for (p->blockSize), because
|
||||
// we don't want to wrap over 4 GiB,
|
||||
// when we use (p->streamPos - p->pos) that is UInt32.
|
||||
if (blockSize >= (UInt32)0 - (UInt32)kBlockSizeAlign)
|
||||
{
|
||||
blockSizeT = ((size_t)1 << 32);
|
||||
printf("\nchanged to blockSizeT = 4GiB\n");
|
||||
}
|
||||
#endif
|
||||
*/
|
||||
|
||||
p->bufBase = (Byte *)ISzAlloc_Alloc(alloc, blockSize);
|
||||
// printf("\nbufferBase = %p\n", p->bufBase);
|
||||
// return 0; // for debug
|
||||
}
|
||||
return (p->bufferBase != 0);
|
||||
return (p->bufBase != NULL);
|
||||
}
|
||||
|
||||
Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return p->buffer; }
|
||||
Byte MatchFinder_GetIndexByte(CMatchFinder *p, Int32 index) { return p->buffer[index]; }
|
||||
static const Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return p->buffer; }
|
||||
|
||||
UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder *p) { return p->streamPos - p->pos; }
|
||||
static UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder *p) { return GET_AVAIL_BYTES(p); }
|
||||
|
||||
void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue)
|
||||
{
|
||||
p->posLimit -= subValue;
|
||||
p->pos -= subValue;
|
||||
p->streamPos -= subValue;
|
||||
}
|
||||
|
||||
Z7_NO_INLINE
|
||||
static void MatchFinder_ReadBlock(CMatchFinder *p)
|
||||
{
|
||||
if (p->streamEndWasReached || p->result != SZ_OK)
|
||||
return;
|
||||
|
||||
/* We use (p->streamPos - p->pos) value.
|
||||
(p->streamPos < p->pos) is allowed. */
|
||||
|
||||
if (p->directInput)
|
||||
{
|
||||
UInt32 curSize = 0xFFFFFFFF - GET_AVAIL_BYTES(p);
|
||||
if (curSize > p->directInputRem)
|
||||
curSize = (UInt32)p->directInputRem;
|
||||
p->streamPos += curSize;
|
||||
p->directInputRem -= curSize;
|
||||
if (p->directInputRem == 0)
|
||||
p->streamEndWasReached = 1;
|
||||
return;
|
||||
}
|
||||
|
||||
for (;;)
|
||||
{
|
||||
Byte *dest = p->buffer + (p->streamPos - p->pos);
|
||||
size_t size = (p->bufferBase + p->blockSize - dest);
|
||||
const Byte *dest = p->buffer + GET_AVAIL_BYTES(p);
|
||||
size_t size = (size_t)(p->bufBase + p->blockSize - dest);
|
||||
if (size == 0)
|
||||
{
|
||||
/* we call ReadBlock() after NeedMove() and MoveBlock().
|
||||
NeedMove() and MoveBlock() povide more than (keepSizeAfter)
|
||||
to the end of (blockSize).
|
||||
So we don't execute this branch in normal code flow.
|
||||
We can go here, if we will call ReadBlock() before NeedMove(), MoveBlock().
|
||||
*/
|
||||
// p->result = SZ_ERROR_FAIL; // we can show error here
|
||||
return;
|
||||
p->result = p->stream->Read(p->stream, dest, &size);
|
||||
}
|
||||
|
||||
// #define kRead 3
|
||||
// if (size > kRead) size = kRead; // for debug
|
||||
|
||||
/*
|
||||
// we need cast (Byte *)dest.
|
||||
#ifdef __clang__
|
||||
#pragma GCC diagnostic ignored "-Wcast-qual"
|
||||
#endif
|
||||
*/
|
||||
p->result = ISeqInStream_Read(p->stream,
|
||||
p->bufBase + (dest - p->bufBase), &size);
|
||||
if (p->result != SZ_OK)
|
||||
return;
|
||||
if (size == 0)
|
||||
@@ -73,47 +169,60 @@ static void MatchFinder_ReadBlock(CMatchFinder *p)
|
||||
return;
|
||||
}
|
||||
p->streamPos += (UInt32)size;
|
||||
if (p->streamPos - p->pos > p->keepSizeAfter)
|
||||
if (GET_AVAIL_BYTES(p) > p->keepSizeAfter)
|
||||
return;
|
||||
/* here and in another (p->keepSizeAfter) checks we keep on 1 byte more than was requested by Create() function
|
||||
(GET_AVAIL_BYTES(p) >= p->keepSizeAfter) - minimal required size */
|
||||
}
|
||||
|
||||
// on exit: (p->result != SZ_OK || p->streamEndWasReached || GET_AVAIL_BYTES(p) > p->keepSizeAfter)
|
||||
}
|
||||
|
||||
|
||||
|
||||
Z7_NO_INLINE
|
||||
void MatchFinder_MoveBlock(CMatchFinder *p)
|
||||
{
|
||||
memmove(p->bufferBase,
|
||||
p->buffer - p->keepSizeBefore,
|
||||
(size_t)(p->streamPos - p->pos + p->keepSizeBefore));
|
||||
p->buffer = p->bufferBase + p->keepSizeBefore;
|
||||
const size_t offset = (size_t)(p->buffer - p->bufBase) - p->keepSizeBefore;
|
||||
const size_t keepBefore = (offset & (kBlockMoveAlign - 1)) + p->keepSizeBefore;
|
||||
p->buffer = p->bufBase + keepBefore;
|
||||
memmove(p->bufBase,
|
||||
p->bufBase + (offset & ~((size_t)kBlockMoveAlign - 1)),
|
||||
keepBefore + (size_t)GET_AVAIL_BYTES(p));
|
||||
}
|
||||
|
||||
/* We call MoveBlock() before ReadBlock().
|
||||
So MoveBlock() can be wasteful operation, if the whole input data
|
||||
can fit in current block even without calling MoveBlock().
|
||||
in important case where (dataSize <= historySize)
|
||||
condition (p->blockSize > dataSize + p->keepSizeAfter) is met
|
||||
So there is no MoveBlock() in that case case.
|
||||
*/
|
||||
|
||||
int MatchFinder_NeedMove(CMatchFinder *p)
|
||||
{
|
||||
/* if (p->streamEndWasReached) return 0; */
|
||||
return ((size_t)(p->bufferBase + p->blockSize - p->buffer) <= p->keepSizeAfter);
|
||||
if (p->directInput)
|
||||
return 0;
|
||||
if (p->streamEndWasReached || p->result != SZ_OK)
|
||||
return 0;
|
||||
return ((size_t)(p->bufBase + p->blockSize - p->buffer) <= p->keepSizeAfter);
|
||||
}
|
||||
|
||||
void MatchFinder_ReadIfRequired(CMatchFinder *p)
|
||||
{
|
||||
if (p->streamEndWasReached)
|
||||
return;
|
||||
if (p->keepSizeAfter >= p->streamPos - p->pos)
|
||||
if (p->keepSizeAfter >= GET_AVAIL_BYTES(p))
|
||||
MatchFinder_ReadBlock(p);
|
||||
}
|
||||
|
||||
static void MatchFinder_CheckAndMoveAndRead(CMatchFinder *p)
|
||||
{
|
||||
if (MatchFinder_NeedMove(p))
|
||||
MatchFinder_MoveBlock(p);
|
||||
MatchFinder_ReadBlock(p);
|
||||
}
|
||||
|
||||
|
||||
static void MatchFinder_SetDefaultSettings(CMatchFinder *p)
|
||||
{
|
||||
p->cutValue = 32;
|
||||
p->btMode = 1;
|
||||
p->numHashBytes = 4;
|
||||
/* p->skipModeBits = 0; */
|
||||
p->directInput = 0;
|
||||
p->numHashBytes_Min = 2;
|
||||
p->numHashOutBits = 0;
|
||||
p->bigHash = 0;
|
||||
}
|
||||
|
||||
@@ -121,204 +230,657 @@ static void MatchFinder_SetDefaultSettings(CMatchFinder *p)
|
||||
|
||||
void MatchFinder_Construct(CMatchFinder *p)
|
||||
{
|
||||
UInt32 i;
|
||||
p->bufferBase = 0;
|
||||
unsigned i;
|
||||
p->buffer = NULL;
|
||||
p->bufBase = NULL;
|
||||
p->directInput = 0;
|
||||
p->hash = 0;
|
||||
p->stream = NULL;
|
||||
p->hash = NULL;
|
||||
p->expectedDataSize = (UInt64)(Int64)-1;
|
||||
MatchFinder_SetDefaultSettings(p);
|
||||
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
UInt32 r = i;
|
||||
int j;
|
||||
UInt32 r = (UInt32)i;
|
||||
unsigned j;
|
||||
for (j = 0; j < 8; j++)
|
||||
r = (r >> 1) ^ (kCrcPoly & ~((r & 1) - 1));
|
||||
r = (r >> 1) ^ (kCrcPoly & ((UInt32)0 - (r & 1)));
|
||||
p->crc[i] = r;
|
||||
}
|
||||
}
|
||||
|
||||
static void MatchFinder_FreeThisClassMemory(CMatchFinder *p, ISzAlloc *alloc)
|
||||
#undef kCrcPoly
|
||||
|
||||
static void MatchFinder_FreeThisClassMemory(CMatchFinder *p, ISzAllocPtr alloc)
|
||||
{
|
||||
alloc->Free(alloc, p->hash);
|
||||
p->hash = 0;
|
||||
ISzAlloc_Free(alloc, p->hash);
|
||||
p->hash = NULL;
|
||||
}
|
||||
|
||||
void MatchFinder_Free(CMatchFinder *p, ISzAlloc *alloc)
|
||||
void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc)
|
||||
{
|
||||
MatchFinder_FreeThisClassMemory(p, alloc);
|
||||
LzInWindow_Free(p, alloc);
|
||||
}
|
||||
|
||||
static CLzRef* AllocRefs(UInt32 num, ISzAlloc *alloc)
|
||||
static CLzRef* AllocRefs(size_t num, ISzAllocPtr alloc)
|
||||
{
|
||||
size_t sizeInBytes = (size_t)num * sizeof(CLzRef);
|
||||
const size_t sizeInBytes = (size_t)num * sizeof(CLzRef);
|
||||
if (sizeInBytes / sizeof(CLzRef) != num)
|
||||
return 0;
|
||||
return (CLzRef *)alloc->Alloc(alloc, sizeInBytes);
|
||||
return NULL;
|
||||
return (CLzRef *)ISzAlloc_Alloc(alloc, sizeInBytes);
|
||||
}
|
||||
|
||||
#if (kBlockSizeReserveMin < kBlockSizeAlign * 2)
|
||||
#error Stop_Compiling_Bad_Reserve
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
static UInt32 GetBlockSize(CMatchFinder *p, UInt32 historySize)
|
||||
{
|
||||
UInt32 blockSize = (p->keepSizeBefore + p->keepSizeAfter);
|
||||
/*
|
||||
if (historySize > kMaxHistorySize)
|
||||
return 0;
|
||||
*/
|
||||
// printf("\nhistorySize == 0x%x\n", historySize);
|
||||
|
||||
if (p->keepSizeBefore < historySize || blockSize < p->keepSizeBefore) // if 32-bit overflow
|
||||
return 0;
|
||||
|
||||
{
|
||||
const UInt32 kBlockSizeMax = (UInt32)0 - (UInt32)kBlockSizeAlign;
|
||||
const UInt32 rem = kBlockSizeMax - blockSize;
|
||||
const UInt32 reserve = (blockSize >> (blockSize < ((UInt32)1 << 30) ? 1 : 2))
|
||||
+ (1 << 12) + kBlockMoveAlign + kBlockSizeAlign; // do not overflow 32-bit here
|
||||
if (blockSize >= kBlockSizeMax
|
||||
|| rem < kBlockSizeReserveMin) // we reject settings that will be slow
|
||||
return 0;
|
||||
if (reserve >= rem)
|
||||
blockSize = kBlockSizeMax;
|
||||
else
|
||||
{
|
||||
blockSize += reserve;
|
||||
blockSize &= ~(UInt32)(kBlockSizeAlign - 1);
|
||||
}
|
||||
}
|
||||
// printf("\n LzFind_blockSize = %x\n", blockSize);
|
||||
// printf("\n LzFind_blockSize = %d\n", blockSize >> 20);
|
||||
return blockSize;
|
||||
}
|
||||
|
||||
|
||||
// input is historySize
|
||||
static UInt32 MatchFinder_GetHashMask2(CMatchFinder *p, UInt32 hs)
|
||||
{
|
||||
if (p->numHashBytes == 2)
|
||||
return (1 << 16) - 1;
|
||||
if (hs != 0)
|
||||
hs--;
|
||||
hs |= (hs >> 1);
|
||||
hs |= (hs >> 2);
|
||||
hs |= (hs >> 4);
|
||||
hs |= (hs >> 8);
|
||||
// we propagated 16 bits in (hs). Low 16 bits must be set later
|
||||
if (hs >= (1 << 24))
|
||||
{
|
||||
if (p->numHashBytes == 3)
|
||||
hs = (1 << 24) - 1;
|
||||
/* if (bigHash) mode, GetHeads4b() in LzFindMt.c needs (hs >= ((1 << 24) - 1))) */
|
||||
}
|
||||
// (hash_size >= (1 << 16)) : Required for (numHashBytes > 2)
|
||||
hs |= (1 << 16) - 1; /* don't change it! */
|
||||
// bt5: we adjust the size with recommended minimum size
|
||||
if (p->numHashBytes >= 5)
|
||||
hs |= (256 << kLzHash_CrcShift_2) - 1;
|
||||
return hs;
|
||||
}
|
||||
|
||||
// input is historySize
|
||||
static UInt32 MatchFinder_GetHashMask(CMatchFinder *p, UInt32 hs)
|
||||
{
|
||||
if (p->numHashBytes == 2)
|
||||
return (1 << 16) - 1;
|
||||
if (hs != 0)
|
||||
hs--;
|
||||
hs |= (hs >> 1);
|
||||
hs |= (hs >> 2);
|
||||
hs |= (hs >> 4);
|
||||
hs |= (hs >> 8);
|
||||
// we propagated 16 bits in (hs). Low 16 bits must be set later
|
||||
hs >>= 1;
|
||||
if (hs >= (1 << 24))
|
||||
{
|
||||
if (p->numHashBytes == 3)
|
||||
hs = (1 << 24) - 1;
|
||||
else
|
||||
hs >>= 1;
|
||||
/* if (bigHash) mode, GetHeads4b() in LzFindMt.c needs (hs >= ((1 << 24) - 1))) */
|
||||
}
|
||||
// (hash_size >= (1 << 16)) : Required for (numHashBytes > 2)
|
||||
hs |= (1 << 16) - 1; /* don't change it! */
|
||||
// bt5: we adjust the size with recommended minimum size
|
||||
if (p->numHashBytes >= 5)
|
||||
hs |= (256 << kLzHash_CrcShift_2) - 1;
|
||||
return hs;
|
||||
}
|
||||
|
||||
|
||||
int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
|
||||
UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter,
|
||||
ISzAlloc *alloc)
|
||||
ISzAllocPtr alloc)
|
||||
{
|
||||
UInt32 sizeReserv;
|
||||
if (historySize > kMaxHistorySize)
|
||||
{
|
||||
MatchFinder_Free(p, alloc);
|
||||
return 0;
|
||||
}
|
||||
sizeReserv = historySize >> 1;
|
||||
if (historySize > ((UInt32)2 << 30))
|
||||
sizeReserv = historySize >> 2;
|
||||
sizeReserv += (keepAddBufferBefore + matchMaxLen + keepAddBufferAfter) / 2 + (1 << 19);
|
||||
|
||||
/* we need one additional byte in (p->keepSizeBefore),
|
||||
since we use MoveBlock() after (p->pos++) and before dictionary using */
|
||||
// keepAddBufferBefore = (UInt32)0xFFFFFFFF - (1 << 22); // for debug
|
||||
p->keepSizeBefore = historySize + keepAddBufferBefore + 1;
|
||||
p->keepSizeAfter = matchMaxLen + keepAddBufferAfter;
|
||||
/* we need one additional byte, since we use MoveBlock after pos++ and before dictionary using */
|
||||
if (LzInWindow_Create(p, sizeReserv, alloc))
|
||||
|
||||
keepAddBufferAfter += matchMaxLen;
|
||||
/* we need (p->keepSizeAfter >= p->numHashBytes) */
|
||||
if (keepAddBufferAfter < p->numHashBytes)
|
||||
keepAddBufferAfter = p->numHashBytes;
|
||||
// keepAddBufferAfter -= 2; // for debug
|
||||
p->keepSizeAfter = keepAddBufferAfter;
|
||||
|
||||
if (p->directInput)
|
||||
p->blockSize = 0;
|
||||
if (p->directInput || LzInWindow_Create2(p, GetBlockSize(p, historySize), alloc))
|
||||
{
|
||||
UInt32 newCyclicBufferSize = (historySize /* >> p->skipModeBits */) + 1;
|
||||
UInt32 hs;
|
||||
p->matchMaxLen = matchMaxLen;
|
||||
size_t hashSizeSum;
|
||||
{
|
||||
p->fixedHashSize = 0;
|
||||
if (p->numHashBytes == 2)
|
||||
hs = (1 << 16) - 1;
|
||||
else
|
||||
UInt32 hs;
|
||||
UInt32 hsCur;
|
||||
|
||||
if (p->numHashOutBits != 0)
|
||||
{
|
||||
hs = historySize - 1;
|
||||
hs |= (hs >> 1);
|
||||
hs |= (hs >> 2);
|
||||
hs |= (hs >> 4);
|
||||
hs |= (hs >> 8);
|
||||
hs >>= 1;
|
||||
/* hs >>= p->skipModeBits; */
|
||||
hs |= 0xFFFF; /* don't change it! It's required for Deflate */
|
||||
if (hs > (1 << 24))
|
||||
unsigned numBits = p->numHashOutBits;
|
||||
const unsigned nbMax =
|
||||
(p->numHashBytes == 2 ? 16 :
|
||||
(p->numHashBytes == 3 ? 24 : 32));
|
||||
if (numBits > nbMax)
|
||||
numBits = nbMax;
|
||||
if (numBits >= 32)
|
||||
hs = (UInt32)0 - 1;
|
||||
else
|
||||
hs = ((UInt32)1 << numBits) - 1;
|
||||
// (hash_size >= (1 << 16)) : Required for (numHashBytes > 2)
|
||||
hs |= (1 << 16) - 1; /* don't change it! */
|
||||
if (p->numHashBytes >= 5)
|
||||
hs |= (256 << kLzHash_CrcShift_2) - 1;
|
||||
{
|
||||
if (p->numHashBytes == 3)
|
||||
hs = (1 << 24) - 1;
|
||||
else
|
||||
hs >>= 1;
|
||||
const UInt32 hs2 = MatchFinder_GetHashMask2(p, historySize);
|
||||
if (hs > hs2)
|
||||
hs = hs2;
|
||||
}
|
||||
hsCur = hs;
|
||||
if (p->expectedDataSize < historySize)
|
||||
{
|
||||
const UInt32 hs2 = MatchFinder_GetHashMask2(p, (UInt32)p->expectedDataSize);
|
||||
if (hsCur > hs2)
|
||||
hsCur = hs2;
|
||||
}
|
||||
}
|
||||
p->hashMask = hs;
|
||||
hs++;
|
||||
if (p->numHashBytes > 2) p->fixedHashSize += kHash2Size;
|
||||
if (p->numHashBytes > 3) p->fixedHashSize += kHash3Size;
|
||||
if (p->numHashBytes > 4) p->fixedHashSize += kHash4Size;
|
||||
hs += p->fixedHashSize;
|
||||
else
|
||||
{
|
||||
hs = MatchFinder_GetHashMask(p, historySize);
|
||||
hsCur = hs;
|
||||
if (p->expectedDataSize < historySize)
|
||||
{
|
||||
hsCur = MatchFinder_GetHashMask(p, (UInt32)p->expectedDataSize);
|
||||
if (hsCur > hs) // is it possible?
|
||||
hsCur = hs;
|
||||
}
|
||||
}
|
||||
|
||||
p->hashMask = hsCur;
|
||||
|
||||
hashSizeSum = hs;
|
||||
hashSizeSum++;
|
||||
if (hashSizeSum < hs)
|
||||
return 0;
|
||||
{
|
||||
UInt32 fixedHashSize = 0;
|
||||
if (p->numHashBytes > 2 && p->numHashBytes_Min <= 2) fixedHashSize += kHash2Size;
|
||||
if (p->numHashBytes > 3 && p->numHashBytes_Min <= 3) fixedHashSize += kHash3Size;
|
||||
// if (p->numHashBytes > 4) p->fixedHashSize += hs4; // kHash4Size;
|
||||
hashSizeSum += fixedHashSize;
|
||||
p->fixedHashSize = fixedHashSize;
|
||||
}
|
||||
}
|
||||
|
||||
p->matchMaxLen = matchMaxLen;
|
||||
|
||||
{
|
||||
UInt32 prevSize = p->hashSizeSum + p->numSons;
|
||||
UInt32 newSize;
|
||||
size_t newSize;
|
||||
size_t numSons;
|
||||
const UInt32 newCyclicBufferSize = historySize + 1; // do not change it
|
||||
p->historySize = historySize;
|
||||
p->hashSizeSum = hs;
|
||||
p->cyclicBufferSize = newCyclicBufferSize;
|
||||
p->numSons = (p->btMode ? newCyclicBufferSize * 2 : newCyclicBufferSize);
|
||||
newSize = p->hashSizeSum + p->numSons;
|
||||
if (p->hash != 0 && prevSize == newSize)
|
||||
p->cyclicBufferSize = newCyclicBufferSize; // it must be = (historySize + 1)
|
||||
|
||||
numSons = newCyclicBufferSize;
|
||||
if (p->btMode)
|
||||
numSons <<= 1;
|
||||
newSize = hashSizeSum + numSons;
|
||||
|
||||
if (numSons < newCyclicBufferSize || newSize < numSons)
|
||||
return 0;
|
||||
|
||||
// aligned size is not required here, but it can be better for some loops
|
||||
#define NUM_REFS_ALIGN_MASK 0xF
|
||||
newSize = (newSize + NUM_REFS_ALIGN_MASK) & ~(size_t)NUM_REFS_ALIGN_MASK;
|
||||
|
||||
// 22.02: we don't reallocate buffer, if old size is enough
|
||||
if (p->hash && p->numRefs >= newSize)
|
||||
return 1;
|
||||
|
||||
MatchFinder_FreeThisClassMemory(p, alloc);
|
||||
p->numRefs = newSize;
|
||||
p->hash = AllocRefs(newSize, alloc);
|
||||
if (p->hash != 0)
|
||||
|
||||
if (p->hash)
|
||||
{
|
||||
p->son = p->hash + p->hashSizeSum;
|
||||
p->son = p->hash + hashSizeSum;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
MatchFinder_Free(p, alloc);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static void MatchFinder_SetLimits(CMatchFinder *p)
|
||||
{
|
||||
UInt32 limit = kMaxValForNormalize - p->pos;
|
||||
UInt32 limit2 = p->cyclicBufferSize - p->cyclicBufferPos;
|
||||
if (limit2 < limit)
|
||||
limit = limit2;
|
||||
limit2 = p->streamPos - p->pos;
|
||||
if (limit2 <= p->keepSizeAfter)
|
||||
UInt32 k;
|
||||
UInt32 n = kMaxValForNormalize - p->pos;
|
||||
if (n == 0)
|
||||
n = (UInt32)(Int32)-1; // we allow (pos == 0) at start even with (kMaxValForNormalize == 0)
|
||||
|
||||
k = p->cyclicBufferSize - p->cyclicBufferPos;
|
||||
if (k < n)
|
||||
n = k;
|
||||
|
||||
k = GET_AVAIL_BYTES(p);
|
||||
{
|
||||
if (limit2 > 0)
|
||||
limit2 = 1;
|
||||
const UInt32 ksa = p->keepSizeAfter;
|
||||
UInt32 mm = p->matchMaxLen;
|
||||
if (k > ksa)
|
||||
k -= ksa; // we must limit exactly to keepSizeAfter for ReadBlock
|
||||
else if (k >= mm)
|
||||
{
|
||||
// the limitation for (p->lenLimit) update
|
||||
k -= mm; // optimization : to reduce the number of checks
|
||||
k++;
|
||||
// k = 1; // non-optimized version : for debug
|
||||
}
|
||||
else
|
||||
{
|
||||
mm = k;
|
||||
if (k != 0)
|
||||
k = 1;
|
||||
}
|
||||
p->lenLimit = mm;
|
||||
}
|
||||
else
|
||||
limit2 -= p->keepSizeAfter;
|
||||
if (limit2 < limit)
|
||||
limit = limit2;
|
||||
{
|
||||
UInt32 lenLimit = p->streamPos - p->pos;
|
||||
if (lenLimit > p->matchMaxLen)
|
||||
lenLimit = p->matchMaxLen;
|
||||
p->lenLimit = lenLimit;
|
||||
}
|
||||
p->posLimit = p->pos + limit;
|
||||
if (k < n)
|
||||
n = k;
|
||||
|
||||
p->posLimit = p->pos + n;
|
||||
}
|
||||
|
||||
|
||||
void MatchFinder_Init_LowHash(CMatchFinder *p)
|
||||
{
|
||||
size_t i;
|
||||
CLzRef *items = p->hash;
|
||||
const size_t numItems = p->fixedHashSize;
|
||||
for (i = 0; i < numItems; i++)
|
||||
items[i] = kEmptyHashValue;
|
||||
}
|
||||
|
||||
|
||||
void MatchFinder_Init_HighHash(CMatchFinder *p)
|
||||
{
|
||||
size_t i;
|
||||
CLzRef *items = p->hash + p->fixedHashSize;
|
||||
const size_t numItems = (size_t)p->hashMask + 1;
|
||||
for (i = 0; i < numItems; i++)
|
||||
items[i] = kEmptyHashValue;
|
||||
}
|
||||
|
||||
|
||||
void MatchFinder_Init_4(CMatchFinder *p)
|
||||
{
|
||||
if (!p->directInput)
|
||||
p->buffer = p->bufBase;
|
||||
{
|
||||
/* kEmptyHashValue = 0 (Zero) is used in hash tables as NO-VALUE marker.
|
||||
the code in CMatchFinderMt expects (pos = 1) */
|
||||
p->pos =
|
||||
p->streamPos =
|
||||
1; // it's smallest optimal value. do not change it
|
||||
// 0; // for debug
|
||||
}
|
||||
p->result = SZ_OK;
|
||||
p->streamEndWasReached = 0;
|
||||
}
|
||||
|
||||
|
||||
// (CYC_TO_POS_OFFSET == 0) is expected by some optimized code
|
||||
#define CYC_TO_POS_OFFSET 0
|
||||
// #define CYC_TO_POS_OFFSET 1 // for debug
|
||||
|
||||
void MatchFinder_Init(CMatchFinder *p)
|
||||
{
|
||||
UInt32 i;
|
||||
for (i = 0; i < p->hashSizeSum; i++)
|
||||
p->hash[i] = kEmptyHashValue;
|
||||
p->cyclicBufferPos = 0;
|
||||
p->buffer = p->bufferBase;
|
||||
p->pos = p->streamPos = p->cyclicBufferSize;
|
||||
p->result = SZ_OK;
|
||||
p->streamEndWasReached = 0;
|
||||
MatchFinder_Init_HighHash(p);
|
||||
MatchFinder_Init_LowHash(p);
|
||||
MatchFinder_Init_4(p);
|
||||
// if (readData)
|
||||
MatchFinder_ReadBlock(p);
|
||||
|
||||
/* if we init (cyclicBufferPos = pos), then we can use one variable
|
||||
instead of both (cyclicBufferPos) and (pos) : only before (cyclicBufferPos) wrapping */
|
||||
p->cyclicBufferPos = (p->pos - CYC_TO_POS_OFFSET); // init with relation to (pos)
|
||||
// p->cyclicBufferPos = 0; // smallest value
|
||||
// p->son[0] = p->son[1] = 0; // unused: we can init skipped record for speculated accesses.
|
||||
MatchFinder_SetLimits(p);
|
||||
}
|
||||
|
||||
static UInt32 MatchFinder_GetSubValue(CMatchFinder *p)
|
||||
|
||||
|
||||
#ifdef MY_CPU_X86_OR_AMD64
|
||||
#if defined(__clang__) && (__clang_major__ >= 4) \
|
||||
|| defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40701)
|
||||
// || defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1900)
|
||||
|
||||
#define USE_LZFIND_SATUR_SUB_128
|
||||
#define USE_LZFIND_SATUR_SUB_256
|
||||
#define LZFIND_ATTRIB_SSE41 __attribute__((__target__("sse4.1")))
|
||||
#define LZFIND_ATTRIB_AVX2 __attribute__((__target__("avx2")))
|
||||
#elif defined(_MSC_VER)
|
||||
#if (_MSC_VER >= 1600)
|
||||
#define USE_LZFIND_SATUR_SUB_128
|
||||
#endif
|
||||
#if (_MSC_VER >= 1900)
|
||||
#define USE_LZFIND_SATUR_SUB_256
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// #elif defined(MY_CPU_ARM_OR_ARM64)
|
||||
#elif defined(MY_CPU_ARM64)
|
||||
|
||||
#if defined(__clang__) && (__clang_major__ >= 8) \
|
||||
|| defined(__GNUC__) && (__GNUC__ >= 8)
|
||||
#define USE_LZFIND_SATUR_SUB_128
|
||||
#ifdef MY_CPU_ARM64
|
||||
// #define LZFIND_ATTRIB_SSE41 __attribute__((__target__("")))
|
||||
#else
|
||||
// #define LZFIND_ATTRIB_SSE41 __attribute__((__target__("fpu=crypto-neon-fp-armv8")))
|
||||
#endif
|
||||
|
||||
#elif defined(_MSC_VER)
|
||||
#if (_MSC_VER >= 1910)
|
||||
#define USE_LZFIND_SATUR_SUB_128
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER) && defined(MY_CPU_ARM64)
|
||||
#include <arm64_neon.h>
|
||||
#else
|
||||
#include <arm_neon.h>
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef USE_LZFIND_SATUR_SUB_128
|
||||
|
||||
// #define Z7_SHOW_HW_STATUS
|
||||
|
||||
#ifdef Z7_SHOW_HW_STATUS
|
||||
#include <stdio.h>
|
||||
#define PRF(x) x
|
||||
PRF(;)
|
||||
#else
|
||||
#define PRF(x)
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef MY_CPU_ARM_OR_ARM64
|
||||
|
||||
#ifdef MY_CPU_ARM64
|
||||
// #define FORCE_LZFIND_SATUR_SUB_128
|
||||
#endif
|
||||
typedef uint32x4_t LzFind_v128;
|
||||
#define SASUB_128_V(v, s) \
|
||||
vsubq_u32(vmaxq_u32(v, s), s)
|
||||
|
||||
#else // MY_CPU_ARM_OR_ARM64
|
||||
|
||||
#include <smmintrin.h> // sse4.1
|
||||
|
||||
typedef __m128i LzFind_v128;
|
||||
// SSE 4.1
|
||||
#define SASUB_128_V(v, s) \
|
||||
_mm_sub_epi32(_mm_max_epu32(v, s), s)
|
||||
|
||||
#endif // MY_CPU_ARM_OR_ARM64
|
||||
|
||||
|
||||
#define SASUB_128(i) \
|
||||
*( LzFind_v128 *)( void *)(items + (i) * 4) = SASUB_128_V( \
|
||||
*(const LzFind_v128 *)(const void *)(items + (i) * 4), sub2);
|
||||
|
||||
|
||||
Z7_NO_INLINE
|
||||
static
|
||||
#ifdef LZFIND_ATTRIB_SSE41
|
||||
LZFIND_ATTRIB_SSE41
|
||||
#endif
|
||||
void
|
||||
Z7_FASTCALL
|
||||
LzFind_SaturSub_128(UInt32 subValue, CLzRef *items, const CLzRef *lim)
|
||||
{
|
||||
return (p->pos - p->historySize - 1) & kNormalizeMask;
|
||||
const LzFind_v128 sub2 =
|
||||
#ifdef MY_CPU_ARM_OR_ARM64
|
||||
vdupq_n_u32(subValue);
|
||||
#else
|
||||
_mm_set_epi32((Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue);
|
||||
#endif
|
||||
Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
|
||||
do
|
||||
{
|
||||
SASUB_128(0) SASUB_128(1) items += 2 * 4;
|
||||
SASUB_128(0) SASUB_128(1) items += 2 * 4;
|
||||
}
|
||||
while (items != lim);
|
||||
}
|
||||
|
||||
void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, UInt32 numItems)
|
||||
|
||||
|
||||
#ifdef USE_LZFIND_SATUR_SUB_256
|
||||
|
||||
#include <immintrin.h> // avx
|
||||
/*
|
||||
clang :immintrin.h uses
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__AVX2__)
|
||||
#include <avx2intrin.h>
|
||||
#endif
|
||||
so we need <avxintrin.h> for clang-cl */
|
||||
|
||||
#if defined(__clang__)
|
||||
#include <avxintrin.h>
|
||||
#include <avx2intrin.h>
|
||||
#endif
|
||||
|
||||
// AVX2:
|
||||
#define SASUB_256(i) \
|
||||
*( __m256i *)( void *)(items + (i) * 8) = \
|
||||
_mm256_sub_epi32(_mm256_max_epu32( \
|
||||
*(const __m256i *)(const void *)(items + (i) * 8), sub2), sub2);
|
||||
|
||||
Z7_NO_INLINE
|
||||
static
|
||||
#ifdef LZFIND_ATTRIB_AVX2
|
||||
LZFIND_ATTRIB_AVX2
|
||||
#endif
|
||||
void
|
||||
Z7_FASTCALL
|
||||
LzFind_SaturSub_256(UInt32 subValue, CLzRef *items, const CLzRef *lim)
|
||||
{
|
||||
UInt32 i;
|
||||
for (i = 0; i < numItems; i++)
|
||||
const __m256i sub2 = _mm256_set_epi32(
|
||||
(Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue,
|
||||
(Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue);
|
||||
Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
|
||||
do
|
||||
{
|
||||
UInt32 value = items[i];
|
||||
if (value <= subValue)
|
||||
value = kEmptyHashValue;
|
||||
else
|
||||
value -= subValue;
|
||||
items[i] = value;
|
||||
SASUB_256(0) SASUB_256(1) items += 2 * 8;
|
||||
SASUB_256(0) SASUB_256(1) items += 2 * 8;
|
||||
}
|
||||
while (items != lim);
|
||||
}
|
||||
#endif // USE_LZFIND_SATUR_SUB_256
|
||||
|
||||
#ifndef FORCE_LZFIND_SATUR_SUB_128
|
||||
typedef void (Z7_FASTCALL *LZFIND_SATUR_SUB_CODE_FUNC)(
|
||||
UInt32 subValue, CLzRef *items, const CLzRef *lim);
|
||||
static LZFIND_SATUR_SUB_CODE_FUNC g_LzFind_SaturSub;
|
||||
#endif // FORCE_LZFIND_SATUR_SUB_128
|
||||
|
||||
#endif // USE_LZFIND_SATUR_SUB_128
|
||||
|
||||
|
||||
// kEmptyHashValue must be zero
|
||||
// #define SASUB_32(i) { UInt32 v = items[i]; UInt32 m = v - subValue; if (v < subValue) m = kEmptyHashValue; items[i] = m; }
|
||||
#define SASUB_32(i) { UInt32 v = items[i]; if (v < subValue) v = subValue; items[i] = v - subValue; }
|
||||
|
||||
#ifdef FORCE_LZFIND_SATUR_SUB_128
|
||||
|
||||
#define DEFAULT_SaturSub LzFind_SaturSub_128
|
||||
|
||||
#else
|
||||
|
||||
#define DEFAULT_SaturSub LzFind_SaturSub_32
|
||||
|
||||
Z7_NO_INLINE
|
||||
static
|
||||
void
|
||||
Z7_FASTCALL
|
||||
LzFind_SaturSub_32(UInt32 subValue, CLzRef *items, const CLzRef *lim)
|
||||
{
|
||||
Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
|
||||
do
|
||||
{
|
||||
SASUB_32(0) SASUB_32(1) items += 2;
|
||||
SASUB_32(0) SASUB_32(1) items += 2;
|
||||
SASUB_32(0) SASUB_32(1) items += 2;
|
||||
SASUB_32(0) SASUB_32(1) items += 2;
|
||||
}
|
||||
while (items != lim);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
Z7_NO_INLINE
|
||||
void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems)
|
||||
{
|
||||
#define LZFIND_NORM_ALIGN_BLOCK_SIZE (1 << 7)
|
||||
Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
|
||||
for (; numItems != 0 && ((unsigned)(ptrdiff_t)items & (LZFIND_NORM_ALIGN_BLOCK_SIZE - 1)) != 0; numItems--)
|
||||
{
|
||||
SASUB_32(0)
|
||||
items++;
|
||||
}
|
||||
{
|
||||
const size_t k_Align_Mask = (LZFIND_NORM_ALIGN_BLOCK_SIZE / 4 - 1);
|
||||
CLzRef *lim = items + (numItems & ~(size_t)k_Align_Mask);
|
||||
numItems &= k_Align_Mask;
|
||||
if (items != lim)
|
||||
{
|
||||
#if defined(USE_LZFIND_SATUR_SUB_128) && !defined(FORCE_LZFIND_SATUR_SUB_128)
|
||||
if (g_LzFind_SaturSub)
|
||||
g_LzFind_SaturSub(subValue, items, lim);
|
||||
else
|
||||
#endif
|
||||
DEFAULT_SaturSub(subValue, items, lim);
|
||||
}
|
||||
items = lim;
|
||||
}
|
||||
Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
|
||||
for (; numItems != 0; numItems--)
|
||||
{
|
||||
SASUB_32(0)
|
||||
items++;
|
||||
}
|
||||
}
|
||||
|
||||
static void MatchFinder_Normalize(CMatchFinder *p)
|
||||
{
|
||||
UInt32 subValue = MatchFinder_GetSubValue(p);
|
||||
MatchFinder_Normalize3(subValue, p->hash, p->hashSizeSum + p->numSons);
|
||||
MatchFinder_ReduceOffsets(p, subValue);
|
||||
}
|
||||
|
||||
|
||||
// call MatchFinder_CheckLimits() only after (p->pos++) update
|
||||
|
||||
Z7_NO_INLINE
|
||||
static void MatchFinder_CheckLimits(CMatchFinder *p)
|
||||
{
|
||||
if (// !p->streamEndWasReached && p->result == SZ_OK &&
|
||||
p->keepSizeAfter == GET_AVAIL_BYTES(p))
|
||||
{
|
||||
// we try to read only in exact state (p->keepSizeAfter == GET_AVAIL_BYTES(p))
|
||||
if (MatchFinder_NeedMove(p))
|
||||
MatchFinder_MoveBlock(p);
|
||||
MatchFinder_ReadBlock(p);
|
||||
}
|
||||
|
||||
if (p->pos == kMaxValForNormalize)
|
||||
MatchFinder_Normalize(p);
|
||||
if (!p->streamEndWasReached && p->keepSizeAfter == p->streamPos - p->pos)
|
||||
MatchFinder_CheckAndMoveAndRead(p);
|
||||
if (GET_AVAIL_BYTES(p) >= p->numHashBytes) // optional optimization for last bytes of data.
|
||||
/*
|
||||
if we disable normalization for last bytes of data, and
|
||||
if (data_size == 4 GiB), we don't call wastfull normalization,
|
||||
but (pos) will be wrapped over Zero (0) in that case.
|
||||
And we cannot resume later to normal operation
|
||||
*/
|
||||
{
|
||||
// MatchFinder_Normalize(p);
|
||||
/* after normalization we need (p->pos >= p->historySize + 1); */
|
||||
/* we can reduce subValue to aligned value, if want to keep alignment
|
||||
of (p->pos) and (p->buffer) for speculated accesses. */
|
||||
const UInt32 subValue = (p->pos - p->historySize - 1) /* & ~(UInt32)(kNormalizeAlign - 1) */;
|
||||
// const UInt32 subValue = (1 << 15); // for debug
|
||||
// printf("\nMatchFinder_Normalize() subValue == 0x%x\n", subValue);
|
||||
MatchFinder_REDUCE_OFFSETS(p, subValue)
|
||||
MatchFinder_Normalize3(subValue, p->hash, (size_t)p->hashMask + 1 + p->fixedHashSize);
|
||||
{
|
||||
size_t numSonRefs = p->cyclicBufferSize;
|
||||
if (p->btMode)
|
||||
numSonRefs <<= 1;
|
||||
MatchFinder_Normalize3(subValue, p->son, numSonRefs);
|
||||
}
|
||||
}
|
||||
|
||||
if (p->cyclicBufferPos == p->cyclicBufferSize)
|
||||
p->cyclicBufferPos = 0;
|
||||
|
||||
MatchFinder_SetLimits(p);
|
||||
}
|
||||
|
||||
static UInt32 * Hc_GetMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
|
||||
UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,
|
||||
UInt32 *distances, UInt32 maxLen)
|
||||
|
||||
/*
|
||||
(lenLimit > maxLen)
|
||||
*/
|
||||
Z7_FORCE_INLINE
|
||||
static UInt32 * Hc_GetMatchesSpec(size_t lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
|
||||
size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,
|
||||
UInt32 *d, unsigned maxLen)
|
||||
{
|
||||
/*
|
||||
son[_cyclicBufferPos] = curMatch;
|
||||
for (;;)
|
||||
{
|
||||
UInt32 delta = pos - curMatch;
|
||||
if (cutValue-- == 0 || delta >= _cyclicBufferSize)
|
||||
return distances;
|
||||
return d;
|
||||
{
|
||||
const Byte *pb = cur - delta;
|
||||
curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)];
|
||||
@@ -330,35 +892,91 @@ static UInt32 * Hc_GetMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos,
|
||||
break;
|
||||
if (maxLen < len)
|
||||
{
|
||||
*distances++ = maxLen = len;
|
||||
*distances++ = delta - 1;
|
||||
maxLen = len;
|
||||
*d++ = len;
|
||||
*d++ = delta - 1;
|
||||
if (len == lenLimit)
|
||||
return distances;
|
||||
return d;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
const Byte *lim = cur + lenLimit;
|
||||
son[_cyclicBufferPos] = curMatch;
|
||||
|
||||
do
|
||||
{
|
||||
UInt32 delta;
|
||||
|
||||
if (curMatch == 0)
|
||||
break;
|
||||
// if (curMatch2 >= curMatch) return NULL;
|
||||
delta = pos - curMatch;
|
||||
if (delta >= _cyclicBufferSize)
|
||||
break;
|
||||
{
|
||||
ptrdiff_t diff;
|
||||
curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)];
|
||||
diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
|
||||
if (cur[maxLen] == cur[(ptrdiff_t)maxLen + diff])
|
||||
{
|
||||
const Byte *c = cur;
|
||||
while (*c == c[diff])
|
||||
{
|
||||
if (++c == lim)
|
||||
{
|
||||
d[0] = (UInt32)(lim - cur);
|
||||
d[1] = delta - 1;
|
||||
return d + 2;
|
||||
}
|
||||
}
|
||||
{
|
||||
const unsigned len = (unsigned)(c - cur);
|
||||
if (maxLen < len)
|
||||
{
|
||||
maxLen = len;
|
||||
d[0] = (UInt32)len;
|
||||
d[1] = delta - 1;
|
||||
d += 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
while (--cutValue);
|
||||
|
||||
return d;
|
||||
}
|
||||
|
||||
|
||||
Z7_FORCE_INLINE
|
||||
UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
|
||||
UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,
|
||||
UInt32 *distances, UInt32 maxLen)
|
||||
size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,
|
||||
UInt32 *d, UInt32 maxLen)
|
||||
{
|
||||
CLzRef *ptr0 = son + (_cyclicBufferPos << 1) + 1;
|
||||
CLzRef *ptr1 = son + (_cyclicBufferPos << 1);
|
||||
UInt32 len0 = 0, len1 = 0;
|
||||
for (;;)
|
||||
CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
|
||||
CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
|
||||
unsigned len0 = 0, len1 = 0;
|
||||
|
||||
UInt32 cmCheck;
|
||||
|
||||
// if (curMatch >= pos) { *ptr0 = *ptr1 = kEmptyHashValue; return NULL; }
|
||||
|
||||
cmCheck = (UInt32)(pos - _cyclicBufferSize);
|
||||
if ((UInt32)pos <= _cyclicBufferSize)
|
||||
cmCheck = 0;
|
||||
|
||||
if (cmCheck < curMatch)
|
||||
do
|
||||
{
|
||||
UInt32 delta = pos - curMatch;
|
||||
if (cutValue-- == 0 || delta >= _cyclicBufferSize)
|
||||
const UInt32 delta = pos - curMatch;
|
||||
{
|
||||
*ptr0 = *ptr1 = kEmptyHashValue;
|
||||
return distances;
|
||||
}
|
||||
{
|
||||
CLzRef *pair = son + ((_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1);
|
||||
CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1);
|
||||
const Byte *pb = cur - delta;
|
||||
UInt32 len = (len0 < len1 ? len0 : len1);
|
||||
unsigned len = (len0 < len1 ? len0 : len1);
|
||||
const UInt32 pair0 = pair[0];
|
||||
if (pb[len] == cur[len])
|
||||
{
|
||||
if (++len != lenLimit && pb[len] == cur[len])
|
||||
@@ -367,52 +985,65 @@ UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byt
|
||||
break;
|
||||
if (maxLen < len)
|
||||
{
|
||||
*distances++ = maxLen = len;
|
||||
*distances++ = delta - 1;
|
||||
maxLen = (UInt32)len;
|
||||
*d++ = (UInt32)len;
|
||||
*d++ = delta - 1;
|
||||
if (len == lenLimit)
|
||||
{
|
||||
*ptr1 = pair[0];
|
||||
*ptr1 = pair0;
|
||||
*ptr0 = pair[1];
|
||||
return distances;
|
||||
return d;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (pb[len] < cur[len])
|
||||
{
|
||||
*ptr1 = curMatch;
|
||||
// const UInt32 curMatch2 = pair[1];
|
||||
// if (curMatch2 >= curMatch) { *ptr0 = *ptr1 = kEmptyHashValue; return NULL; }
|
||||
// curMatch = curMatch2;
|
||||
curMatch = pair[1];
|
||||
ptr1 = pair + 1;
|
||||
curMatch = *ptr1;
|
||||
len1 = len;
|
||||
}
|
||||
else
|
||||
{
|
||||
*ptr0 = curMatch;
|
||||
curMatch = pair[0];
|
||||
ptr0 = pair;
|
||||
curMatch = *ptr0;
|
||||
len0 = len;
|
||||
}
|
||||
}
|
||||
}
|
||||
while(--cutValue && cmCheck < curMatch);
|
||||
|
||||
*ptr0 = *ptr1 = kEmptyHashValue;
|
||||
return d;
|
||||
}
|
||||
|
||||
|
||||
static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
|
||||
UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue)
|
||||
size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue)
|
||||
{
|
||||
CLzRef *ptr0 = son + (_cyclicBufferPos << 1) + 1;
|
||||
CLzRef *ptr1 = son + (_cyclicBufferPos << 1);
|
||||
UInt32 len0 = 0, len1 = 0;
|
||||
for (;;)
|
||||
CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
|
||||
CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
|
||||
unsigned len0 = 0, len1 = 0;
|
||||
|
||||
UInt32 cmCheck;
|
||||
|
||||
cmCheck = (UInt32)(pos - _cyclicBufferSize);
|
||||
if ((UInt32)pos <= _cyclicBufferSize)
|
||||
cmCheck = 0;
|
||||
|
||||
if (// curMatch >= pos || // failure
|
||||
cmCheck < curMatch)
|
||||
do
|
||||
{
|
||||
UInt32 delta = pos - curMatch;
|
||||
if (cutValue-- == 0 || delta >= _cyclicBufferSize)
|
||||
const UInt32 delta = pos - curMatch;
|
||||
{
|
||||
*ptr0 = *ptr1 = kEmptyHashValue;
|
||||
return;
|
||||
}
|
||||
{
|
||||
CLzRef *pair = son + ((_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1);
|
||||
CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1);
|
||||
const Byte *pb = cur - delta;
|
||||
UInt32 len = (len0 < len1 ? len0 : len1);
|
||||
unsigned len = (len0 < len1 ? len0 : len1);
|
||||
if (pb[len] == cur[len])
|
||||
{
|
||||
while (++len != lenLimit)
|
||||
@@ -430,308 +1061,594 @@ static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const
|
||||
if (pb[len] < cur[len])
|
||||
{
|
||||
*ptr1 = curMatch;
|
||||
curMatch = pair[1];
|
||||
ptr1 = pair + 1;
|
||||
curMatch = *ptr1;
|
||||
len1 = len;
|
||||
}
|
||||
else
|
||||
{
|
||||
*ptr0 = curMatch;
|
||||
curMatch = pair[0];
|
||||
ptr0 = pair;
|
||||
curMatch = *ptr0;
|
||||
len0 = len;
|
||||
}
|
||||
}
|
||||
}
|
||||
while(--cutValue && cmCheck < curMatch);
|
||||
|
||||
*ptr0 = *ptr1 = kEmptyHashValue;
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
#define MOVE_POS \
|
||||
++p->cyclicBufferPos; \
|
||||
p->buffer++; \
|
||||
if (++p->pos == p->posLimit) MatchFinder_CheckLimits(p);
|
||||
{ const UInt32 pos1 = p->pos + 1; p->pos = pos1; if (pos1 == p->posLimit) MatchFinder_CheckLimits(p); }
|
||||
|
||||
#define MOVE_POS_RET MOVE_POS return offset;
|
||||
#define MOVE_POS_RET MOVE_POS return distances;
|
||||
|
||||
static void MatchFinder_MovePos(CMatchFinder *p) { MOVE_POS; }
|
||||
Z7_NO_INLINE
|
||||
static void MatchFinder_MovePos(CMatchFinder *p)
|
||||
{
|
||||
/* we go here at the end of stream data, when (avail < num_hash_bytes)
|
||||
We don't update sons[cyclicBufferPos << btMode].
|
||||
So (sons) record will contain junk. And we cannot resume match searching
|
||||
to normal operation, even if we will provide more input data in buffer.
|
||||
p->sons[p->cyclicBufferPos << p->btMode] = 0; // kEmptyHashValue
|
||||
if (p->btMode)
|
||||
p->sons[(p->cyclicBufferPos << p->btMode) + 1] = 0; // kEmptyHashValue
|
||||
*/
|
||||
MOVE_POS
|
||||
}
|
||||
|
||||
#define GET_MATCHES_HEADER2(minLen, ret_op) \
|
||||
UInt32 lenLimit; UInt32 hashValue; const Byte *cur; UInt32 curMatch; \
|
||||
lenLimit = p->lenLimit; { if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; }} \
|
||||
unsigned lenLimit; UInt32 hv; const Byte *cur; UInt32 curMatch; \
|
||||
lenLimit = (unsigned)p->lenLimit; { if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; }} \
|
||||
cur = p->buffer;
|
||||
|
||||
#define GET_MATCHES_HEADER(minLen) GET_MATCHES_HEADER2(minLen, return 0)
|
||||
#define SKIP_HEADER(minLen) GET_MATCHES_HEADER2(minLen, continue)
|
||||
#define GET_MATCHES_HEADER(minLen) GET_MATCHES_HEADER2(minLen, return distances)
|
||||
#define SKIP_HEADER(minLen) do { GET_MATCHES_HEADER2(minLen, continue)
|
||||
|
||||
#define MF_PARAMS(p) p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue
|
||||
#define MF_PARAMS(p) lenLimit, curMatch, p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue
|
||||
|
||||
#define GET_MATCHES_FOOTER(offset, maxLen) \
|
||||
offset = (UInt32)(GetMatchesSpec1(lenLimit, curMatch, MF_PARAMS(p), \
|
||||
distances + offset, maxLen) - distances); MOVE_POS_RET;
|
||||
#define SKIP_FOOTER SkipMatchesSpec(MF_PARAMS(p)); MOVE_POS } while (--num);
|
||||
|
||||
#define SKIP_FOOTER \
|
||||
SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p)); MOVE_POS;
|
||||
#define GET_MATCHES_FOOTER_BASE(_maxLen_, func) \
|
||||
distances = func(MF_PARAMS(p), \
|
||||
distances, (UInt32)_maxLen_); MOVE_POS_RET
|
||||
|
||||
static UInt32 Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
|
||||
#define GET_MATCHES_FOOTER_BT(_maxLen_) \
|
||||
GET_MATCHES_FOOTER_BASE(_maxLen_, GetMatchesSpec1)
|
||||
|
||||
#define GET_MATCHES_FOOTER_HC(_maxLen_) \
|
||||
GET_MATCHES_FOOTER_BASE(_maxLen_, Hc_GetMatchesSpec)
|
||||
|
||||
|
||||
|
||||
#define UPDATE_maxLen { \
|
||||
const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)d2; \
|
||||
const Byte *c = cur + maxLen; \
|
||||
const Byte *lim = cur + lenLimit; \
|
||||
for (; c != lim; c++) if (*(c + diff) != *c) break; \
|
||||
maxLen = (unsigned)(c - cur); }
|
||||
|
||||
static UInt32* Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
|
||||
{
|
||||
UInt32 offset;
|
||||
GET_MATCHES_HEADER(2)
|
||||
HASH2_CALC;
|
||||
curMatch = p->hash[hashValue];
|
||||
p->hash[hashValue] = p->pos;
|
||||
offset = 0;
|
||||
GET_MATCHES_FOOTER(offset, 1)
|
||||
HASH2_CALC
|
||||
curMatch = p->hash[hv];
|
||||
p->hash[hv] = p->pos;
|
||||
GET_MATCHES_FOOTER_BT(1)
|
||||
}
|
||||
|
||||
UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
|
||||
UInt32* Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
|
||||
{
|
||||
UInt32 offset;
|
||||
GET_MATCHES_HEADER(3)
|
||||
HASH_ZIP_CALC;
|
||||
curMatch = p->hash[hashValue];
|
||||
p->hash[hashValue] = p->pos;
|
||||
offset = 0;
|
||||
GET_MATCHES_FOOTER(offset, 2)
|
||||
HASH_ZIP_CALC
|
||||
curMatch = p->hash[hv];
|
||||
p->hash[hv] = p->pos;
|
||||
GET_MATCHES_FOOTER_BT(2)
|
||||
}
|
||||
|
||||
static UInt32 Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
|
||||
|
||||
#define SET_mmm \
|
||||
mmm = p->cyclicBufferSize; \
|
||||
if (pos < mmm) \
|
||||
mmm = pos;
|
||||
|
||||
|
||||
static UInt32* Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
|
||||
{
|
||||
UInt32 hash2Value, delta2, maxLen, offset;
|
||||
UInt32 mmm;
|
||||
UInt32 h2, d2, pos;
|
||||
unsigned maxLen;
|
||||
UInt32 *hash;
|
||||
GET_MATCHES_HEADER(3)
|
||||
|
||||
HASH3_CALC;
|
||||
HASH3_CALC
|
||||
|
||||
delta2 = p->pos - p->hash[hash2Value];
|
||||
curMatch = p->hash[kFix3HashSize + hashValue];
|
||||
hash = p->hash;
|
||||
pos = p->pos;
|
||||
|
||||
d2 = pos - hash[h2];
|
||||
|
||||
curMatch = (hash + kFix3HashSize)[hv];
|
||||
|
||||
p->hash[hash2Value] =
|
||||
p->hash[kFix3HashSize + hashValue] = p->pos;
|
||||
hash[h2] = pos;
|
||||
(hash + kFix3HashSize)[hv] = pos;
|
||||
|
||||
SET_mmm
|
||||
|
||||
maxLen = 2;
|
||||
offset = 0;
|
||||
if (delta2 < p->cyclicBufferSize && *(cur - delta2) == *cur)
|
||||
|
||||
if (d2 < mmm && *(cur - d2) == *cur)
|
||||
{
|
||||
for (; maxLen != lenLimit; maxLen++)
|
||||
if (cur[(ptrdiff_t)maxLen - delta2] != cur[maxLen])
|
||||
break;
|
||||
distances[0] = maxLen;
|
||||
distances[1] = delta2 - 1;
|
||||
offset = 2;
|
||||
UPDATE_maxLen
|
||||
distances[0] = (UInt32)maxLen;
|
||||
distances[1] = d2 - 1;
|
||||
distances += 2;
|
||||
if (maxLen == lenLimit)
|
||||
{
|
||||
SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p));
|
||||
MOVE_POS_RET;
|
||||
SkipMatchesSpec(MF_PARAMS(p));
|
||||
MOVE_POS_RET
|
||||
}
|
||||
}
|
||||
GET_MATCHES_FOOTER(offset, maxLen)
|
||||
}
|
||||
|
||||
static UInt32 Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
|
||||
{
|
||||
UInt32 hash2Value, hash3Value, delta2, delta3, maxLen, offset;
|
||||
GET_MATCHES_HEADER(4)
|
||||
|
||||
HASH4_CALC;
|
||||
|
||||
delta2 = p->pos - p->hash[ hash2Value];
|
||||
delta3 = p->pos - p->hash[kFix3HashSize + hash3Value];
|
||||
curMatch = p->hash[kFix4HashSize + hashValue];
|
||||
|
||||
p->hash[ hash2Value] =
|
||||
p->hash[kFix3HashSize + hash3Value] =
|
||||
p->hash[kFix4HashSize + hashValue] = p->pos;
|
||||
|
||||
maxLen = 1;
|
||||
offset = 0;
|
||||
if (delta2 < p->cyclicBufferSize && *(cur - delta2) == *cur)
|
||||
{
|
||||
distances[0] = maxLen = 2;
|
||||
distances[1] = delta2 - 1;
|
||||
offset = 2;
|
||||
}
|
||||
if (delta2 != delta3 && delta3 < p->cyclicBufferSize && *(cur - delta3) == *cur)
|
||||
{
|
||||
maxLen = 3;
|
||||
distances[offset + 1] = delta3 - 1;
|
||||
offset += 2;
|
||||
delta2 = delta3;
|
||||
}
|
||||
if (offset != 0)
|
||||
{
|
||||
for (; maxLen != lenLimit; maxLen++)
|
||||
if (cur[(ptrdiff_t)maxLen - delta2] != cur[maxLen])
|
||||
break;
|
||||
distances[offset - 2] = maxLen;
|
||||
if (maxLen == lenLimit)
|
||||
{
|
||||
SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p));
|
||||
MOVE_POS_RET;
|
||||
}
|
||||
}
|
||||
if (maxLen < 3)
|
||||
maxLen = 3;
|
||||
GET_MATCHES_FOOTER(offset, maxLen)
|
||||
GET_MATCHES_FOOTER_BT(maxLen)
|
||||
}
|
||||
|
||||
static UInt32 Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
|
||||
|
||||
static UInt32* Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
|
||||
{
|
||||
UInt32 hash2Value, hash3Value, delta2, delta3, maxLen, offset;
|
||||
UInt32 mmm;
|
||||
UInt32 h2, h3, d2, d3, pos;
|
||||
unsigned maxLen;
|
||||
UInt32 *hash;
|
||||
GET_MATCHES_HEADER(4)
|
||||
|
||||
HASH4_CALC;
|
||||
HASH4_CALC
|
||||
|
||||
delta2 = p->pos - p->hash[ hash2Value];
|
||||
delta3 = p->pos - p->hash[kFix3HashSize + hash3Value];
|
||||
curMatch = p->hash[kFix4HashSize + hashValue];
|
||||
hash = p->hash;
|
||||
pos = p->pos;
|
||||
|
||||
p->hash[ hash2Value] =
|
||||
p->hash[kFix3HashSize + hash3Value] =
|
||||
p->hash[kFix4HashSize + hashValue] = p->pos;
|
||||
d2 = pos - hash [h2];
|
||||
d3 = pos - (hash + kFix3HashSize)[h3];
|
||||
curMatch = (hash + kFix4HashSize)[hv];
|
||||
|
||||
maxLen = 1;
|
||||
offset = 0;
|
||||
if (delta2 < p->cyclicBufferSize && *(cur - delta2) == *cur)
|
||||
hash [h2] = pos;
|
||||
(hash + kFix3HashSize)[h3] = pos;
|
||||
(hash + kFix4HashSize)[hv] = pos;
|
||||
|
||||
SET_mmm
|
||||
|
||||
maxLen = 3;
|
||||
|
||||
for (;;)
|
||||
{
|
||||
distances[0] = maxLen = 2;
|
||||
distances[1] = delta2 - 1;
|
||||
offset = 2;
|
||||
}
|
||||
if (delta2 != delta3 && delta3 < p->cyclicBufferSize && *(cur - delta3) == *cur)
|
||||
{
|
||||
maxLen = 3;
|
||||
distances[offset + 1] = delta3 - 1;
|
||||
offset += 2;
|
||||
delta2 = delta3;
|
||||
}
|
||||
if (offset != 0)
|
||||
{
|
||||
for (; maxLen != lenLimit; maxLen++)
|
||||
if (cur[(ptrdiff_t)maxLen - delta2] != cur[maxLen])
|
||||
if (d2 < mmm && *(cur - d2) == *cur)
|
||||
{
|
||||
distances[0] = 2;
|
||||
distances[1] = d2 - 1;
|
||||
distances += 2;
|
||||
if (*(cur - d2 + 2) == cur[2])
|
||||
{
|
||||
// distances[-2] = 3;
|
||||
}
|
||||
else if (d3 < mmm && *(cur - d3) == *cur)
|
||||
{
|
||||
d2 = d3;
|
||||
distances[1] = d3 - 1;
|
||||
distances += 2;
|
||||
}
|
||||
else
|
||||
break;
|
||||
distances[offset - 2] = maxLen;
|
||||
}
|
||||
else if (d3 < mmm && *(cur - d3) == *cur)
|
||||
{
|
||||
d2 = d3;
|
||||
distances[1] = d3 - 1;
|
||||
distances += 2;
|
||||
}
|
||||
else
|
||||
break;
|
||||
|
||||
UPDATE_maxLen
|
||||
distances[-2] = (UInt32)maxLen;
|
||||
if (maxLen == lenLimit)
|
||||
{
|
||||
SkipMatchesSpec(MF_PARAMS(p));
|
||||
MOVE_POS_RET
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
GET_MATCHES_FOOTER_BT(maxLen)
|
||||
}
|
||||
|
||||
|
||||
static UInt32* Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
|
||||
{
|
||||
UInt32 mmm;
|
||||
UInt32 h2, h3, d2, d3, maxLen, pos;
|
||||
UInt32 *hash;
|
||||
GET_MATCHES_HEADER(5)
|
||||
|
||||
HASH5_CALC
|
||||
|
||||
hash = p->hash;
|
||||
pos = p->pos;
|
||||
|
||||
d2 = pos - hash [h2];
|
||||
d3 = pos - (hash + kFix3HashSize)[h3];
|
||||
// d4 = pos - (hash + kFix4HashSize)[h4];
|
||||
|
||||
curMatch = (hash + kFix5HashSize)[hv];
|
||||
|
||||
hash [h2] = pos;
|
||||
(hash + kFix3HashSize)[h3] = pos;
|
||||
// (hash + kFix4HashSize)[h4] = pos;
|
||||
(hash + kFix5HashSize)[hv] = pos;
|
||||
|
||||
SET_mmm
|
||||
|
||||
maxLen = 4;
|
||||
|
||||
for (;;)
|
||||
{
|
||||
if (d2 < mmm && *(cur - d2) == *cur)
|
||||
{
|
||||
distances[0] = 2;
|
||||
distances[1] = d2 - 1;
|
||||
distances += 2;
|
||||
if (*(cur - d2 + 2) == cur[2])
|
||||
{
|
||||
}
|
||||
else if (d3 < mmm && *(cur - d3) == *cur)
|
||||
{
|
||||
distances[1] = d3 - 1;
|
||||
distances += 2;
|
||||
d2 = d3;
|
||||
}
|
||||
else
|
||||
break;
|
||||
}
|
||||
else if (d3 < mmm && *(cur - d3) == *cur)
|
||||
{
|
||||
distances[1] = d3 - 1;
|
||||
distances += 2;
|
||||
d2 = d3;
|
||||
}
|
||||
else
|
||||
break;
|
||||
|
||||
distances[-2] = 3;
|
||||
if (*(cur - d2 + 3) != cur[3])
|
||||
break;
|
||||
UPDATE_maxLen
|
||||
distances[-2] = (UInt32)maxLen;
|
||||
if (maxLen == lenLimit)
|
||||
{
|
||||
SkipMatchesSpec(MF_PARAMS(p));
|
||||
MOVE_POS_RET
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
GET_MATCHES_FOOTER_BT(maxLen)
|
||||
}
|
||||
|
||||
|
||||
static UInt32* Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
|
||||
{
|
||||
UInt32 mmm;
|
||||
UInt32 h2, h3, d2, d3, pos;
|
||||
unsigned maxLen;
|
||||
UInt32 *hash;
|
||||
GET_MATCHES_HEADER(4)
|
||||
|
||||
HASH4_CALC
|
||||
|
||||
hash = p->hash;
|
||||
pos = p->pos;
|
||||
|
||||
d2 = pos - hash [h2];
|
||||
d3 = pos - (hash + kFix3HashSize)[h3];
|
||||
curMatch = (hash + kFix4HashSize)[hv];
|
||||
|
||||
hash [h2] = pos;
|
||||
(hash + kFix3HashSize)[h3] = pos;
|
||||
(hash + kFix4HashSize)[hv] = pos;
|
||||
|
||||
SET_mmm
|
||||
|
||||
maxLen = 3;
|
||||
|
||||
for (;;)
|
||||
{
|
||||
if (d2 < mmm && *(cur - d2) == *cur)
|
||||
{
|
||||
distances[0] = 2;
|
||||
distances[1] = d2 - 1;
|
||||
distances += 2;
|
||||
if (*(cur - d2 + 2) == cur[2])
|
||||
{
|
||||
// distances[-2] = 3;
|
||||
}
|
||||
else if (d3 < mmm && *(cur - d3) == *cur)
|
||||
{
|
||||
d2 = d3;
|
||||
distances[1] = d3 - 1;
|
||||
distances += 2;
|
||||
}
|
||||
else
|
||||
break;
|
||||
}
|
||||
else if (d3 < mmm && *(cur - d3) == *cur)
|
||||
{
|
||||
d2 = d3;
|
||||
distances[1] = d3 - 1;
|
||||
distances += 2;
|
||||
}
|
||||
else
|
||||
break;
|
||||
|
||||
UPDATE_maxLen
|
||||
distances[-2] = (UInt32)maxLen;
|
||||
if (maxLen == lenLimit)
|
||||
{
|
||||
p->son[p->cyclicBufferPos] = curMatch;
|
||||
MOVE_POS_RET;
|
||||
MOVE_POS_RET
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (maxLen < 3)
|
||||
maxLen = 3;
|
||||
offset = (UInt32)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p),
|
||||
distances + offset, maxLen) - (distances));
|
||||
MOVE_POS_RET
|
||||
|
||||
GET_MATCHES_FOOTER_HC(maxLen)
|
||||
}
|
||||
|
||||
UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
|
||||
|
||||
static UInt32 * Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
|
||||
{
|
||||
UInt32 offset;
|
||||
GET_MATCHES_HEADER(3)
|
||||
HASH_ZIP_CALC;
|
||||
curMatch = p->hash[hashValue];
|
||||
p->hash[hashValue] = p->pos;
|
||||
offset = (UInt32)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p),
|
||||
distances, 2) - (distances));
|
||||
MOVE_POS_RET
|
||||
UInt32 mmm;
|
||||
UInt32 h2, h3, d2, d3, maxLen, pos;
|
||||
UInt32 *hash;
|
||||
GET_MATCHES_HEADER(5)
|
||||
|
||||
HASH5_CALC
|
||||
|
||||
hash = p->hash;
|
||||
pos = p->pos;
|
||||
|
||||
d2 = pos - hash [h2];
|
||||
d3 = pos - (hash + kFix3HashSize)[h3];
|
||||
// d4 = pos - (hash + kFix4HashSize)[h4];
|
||||
|
||||
curMatch = (hash + kFix5HashSize)[hv];
|
||||
|
||||
hash [h2] = pos;
|
||||
(hash + kFix3HashSize)[h3] = pos;
|
||||
// (hash + kFix4HashSize)[h4] = pos;
|
||||
(hash + kFix5HashSize)[hv] = pos;
|
||||
|
||||
SET_mmm
|
||||
|
||||
maxLen = 4;
|
||||
|
||||
for (;;)
|
||||
{
|
||||
if (d2 < mmm && *(cur - d2) == *cur)
|
||||
{
|
||||
distances[0] = 2;
|
||||
distances[1] = d2 - 1;
|
||||
distances += 2;
|
||||
if (*(cur - d2 + 2) == cur[2])
|
||||
{
|
||||
}
|
||||
else if (d3 < mmm && *(cur - d3) == *cur)
|
||||
{
|
||||
distances[1] = d3 - 1;
|
||||
distances += 2;
|
||||
d2 = d3;
|
||||
}
|
||||
else
|
||||
break;
|
||||
}
|
||||
else if (d3 < mmm && *(cur - d3) == *cur)
|
||||
{
|
||||
distances[1] = d3 - 1;
|
||||
distances += 2;
|
||||
d2 = d3;
|
||||
}
|
||||
else
|
||||
break;
|
||||
|
||||
distances[-2] = 3;
|
||||
if (*(cur - d2 + 3) != cur[3])
|
||||
break;
|
||||
UPDATE_maxLen
|
||||
distances[-2] = maxLen;
|
||||
if (maxLen == lenLimit)
|
||||
{
|
||||
p->son[p->cyclicBufferPos] = curMatch;
|
||||
MOVE_POS_RET
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
GET_MATCHES_FOOTER_HC(maxLen)
|
||||
}
|
||||
|
||||
|
||||
UInt32* Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
|
||||
{
|
||||
GET_MATCHES_HEADER(3)
|
||||
HASH_ZIP_CALC
|
||||
curMatch = p->hash[hv];
|
||||
p->hash[hv] = p->pos;
|
||||
GET_MATCHES_FOOTER_HC(2)
|
||||
}
|
||||
|
||||
|
||||
static void Bt2_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
|
||||
{
|
||||
do
|
||||
SKIP_HEADER(2)
|
||||
{
|
||||
SKIP_HEADER(2)
|
||||
HASH2_CALC;
|
||||
curMatch = p->hash[hashValue];
|
||||
p->hash[hashValue] = p->pos;
|
||||
SKIP_FOOTER
|
||||
HASH2_CALC
|
||||
curMatch = p->hash[hv];
|
||||
p->hash[hv] = p->pos;
|
||||
}
|
||||
while (--num != 0);
|
||||
SKIP_FOOTER
|
||||
}
|
||||
|
||||
void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
|
||||
{
|
||||
do
|
||||
SKIP_HEADER(3)
|
||||
{
|
||||
SKIP_HEADER(3)
|
||||
HASH_ZIP_CALC;
|
||||
curMatch = p->hash[hashValue];
|
||||
p->hash[hashValue] = p->pos;
|
||||
SKIP_FOOTER
|
||||
HASH_ZIP_CALC
|
||||
curMatch = p->hash[hv];
|
||||
p->hash[hv] = p->pos;
|
||||
}
|
||||
while (--num != 0);
|
||||
SKIP_FOOTER
|
||||
}
|
||||
|
||||
static void Bt3_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
|
||||
{
|
||||
do
|
||||
SKIP_HEADER(3)
|
||||
{
|
||||
UInt32 hash2Value;
|
||||
SKIP_HEADER(3)
|
||||
HASH3_CALC;
|
||||
curMatch = p->hash[kFix3HashSize + hashValue];
|
||||
p->hash[hash2Value] =
|
||||
p->hash[kFix3HashSize + hashValue] = p->pos;
|
||||
SKIP_FOOTER
|
||||
UInt32 h2;
|
||||
UInt32 *hash;
|
||||
HASH3_CALC
|
||||
hash = p->hash;
|
||||
curMatch = (hash + kFix3HashSize)[hv];
|
||||
hash[h2] =
|
||||
(hash + kFix3HashSize)[hv] = p->pos;
|
||||
}
|
||||
while (--num != 0);
|
||||
SKIP_FOOTER
|
||||
}
|
||||
|
||||
static void Bt4_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
|
||||
{
|
||||
do
|
||||
SKIP_HEADER(4)
|
||||
{
|
||||
UInt32 hash2Value, hash3Value;
|
||||
SKIP_HEADER(4)
|
||||
HASH4_CALC;
|
||||
curMatch = p->hash[kFix4HashSize + hashValue];
|
||||
p->hash[ hash2Value] =
|
||||
p->hash[kFix3HashSize + hash3Value] = p->pos;
|
||||
p->hash[kFix4HashSize + hashValue] = p->pos;
|
||||
SKIP_FOOTER
|
||||
UInt32 h2, h3;
|
||||
UInt32 *hash;
|
||||
HASH4_CALC
|
||||
hash = p->hash;
|
||||
curMatch = (hash + kFix4HashSize)[hv];
|
||||
hash [h2] =
|
||||
(hash + kFix3HashSize)[h3] =
|
||||
(hash + kFix4HashSize)[hv] = p->pos;
|
||||
}
|
||||
while (--num != 0);
|
||||
SKIP_FOOTER
|
||||
}
|
||||
|
||||
static void Bt5_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
|
||||
{
|
||||
SKIP_HEADER(5)
|
||||
{
|
||||
UInt32 h2, h3;
|
||||
UInt32 *hash;
|
||||
HASH5_CALC
|
||||
hash = p->hash;
|
||||
curMatch = (hash + kFix5HashSize)[hv];
|
||||
hash [h2] =
|
||||
(hash + kFix3HashSize)[h3] =
|
||||
// (hash + kFix4HashSize)[h4] =
|
||||
(hash + kFix5HashSize)[hv] = p->pos;
|
||||
}
|
||||
SKIP_FOOTER
|
||||
}
|
||||
|
||||
|
||||
#define HC_SKIP_HEADER(minLen) \
|
||||
do { if (p->lenLimit < minLen) { MatchFinder_MovePos(p); num--; continue; } { \
|
||||
const Byte *cur; \
|
||||
UInt32 *hash; \
|
||||
UInt32 *son; \
|
||||
UInt32 pos = p->pos; \
|
||||
UInt32 num2 = num; \
|
||||
/* (p->pos == p->posLimit) is not allowed here !!! */ \
|
||||
{ const UInt32 rem = p->posLimit - pos; if (num2 > rem) num2 = rem; } \
|
||||
num -= num2; \
|
||||
{ const UInt32 cycPos = p->cyclicBufferPos; \
|
||||
son = p->son + cycPos; \
|
||||
p->cyclicBufferPos = cycPos + num2; } \
|
||||
cur = p->buffer; \
|
||||
hash = p->hash; \
|
||||
do { \
|
||||
UInt32 curMatch; \
|
||||
UInt32 hv;
|
||||
|
||||
|
||||
#define HC_SKIP_FOOTER \
|
||||
cur++; pos++; *son++ = curMatch; \
|
||||
} while (--num2); \
|
||||
p->buffer = cur; \
|
||||
p->pos = pos; \
|
||||
if (pos == p->posLimit) MatchFinder_CheckLimits(p); \
|
||||
}} while(num); \
|
||||
|
||||
|
||||
static void Hc4_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
|
||||
{
|
||||
do
|
||||
{
|
||||
UInt32 hash2Value, hash3Value;
|
||||
SKIP_HEADER(4)
|
||||
HASH4_CALC;
|
||||
curMatch = p->hash[kFix4HashSize + hashValue];
|
||||
p->hash[ hash2Value] =
|
||||
p->hash[kFix3HashSize + hash3Value] =
|
||||
p->hash[kFix4HashSize + hashValue] = p->pos;
|
||||
p->son[p->cyclicBufferPos] = curMatch;
|
||||
MOVE_POS
|
||||
}
|
||||
while (--num != 0);
|
||||
HC_SKIP_HEADER(4)
|
||||
|
||||
UInt32 h2, h3;
|
||||
HASH4_CALC
|
||||
curMatch = (hash + kFix4HashSize)[hv];
|
||||
hash [h2] =
|
||||
(hash + kFix3HashSize)[h3] =
|
||||
(hash + kFix4HashSize)[hv] = pos;
|
||||
|
||||
HC_SKIP_FOOTER
|
||||
}
|
||||
|
||||
|
||||
static void Hc5_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
|
||||
{
|
||||
HC_SKIP_HEADER(5)
|
||||
|
||||
UInt32 h2, h3;
|
||||
HASH5_CALC
|
||||
curMatch = (hash + kFix5HashSize)[hv];
|
||||
hash [h2] =
|
||||
(hash + kFix3HashSize)[h3] =
|
||||
// (hash + kFix4HashSize)[h4] =
|
||||
(hash + kFix5HashSize)[hv] = pos;
|
||||
|
||||
HC_SKIP_FOOTER
|
||||
}
|
||||
|
||||
|
||||
void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
|
||||
{
|
||||
do
|
||||
{
|
||||
SKIP_HEADER(3)
|
||||
HASH_ZIP_CALC;
|
||||
curMatch = p->hash[hashValue];
|
||||
p->hash[hashValue] = p->pos;
|
||||
p->son[p->cyclicBufferPos] = curMatch;
|
||||
MOVE_POS
|
||||
}
|
||||
while (--num != 0);
|
||||
HC_SKIP_HEADER(3)
|
||||
|
||||
HASH_ZIP_CALC
|
||||
curMatch = hash[hv];
|
||||
hash[hv] = pos;
|
||||
|
||||
HC_SKIP_FOOTER
|
||||
}
|
||||
|
||||
void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable)
|
||||
|
||||
void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder2 *vTable)
|
||||
{
|
||||
vTable->Init = (Mf_Init_Func)MatchFinder_Init;
|
||||
vTable->GetIndexByte = (Mf_GetIndexByte_Func)MatchFinder_GetIndexByte;
|
||||
vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinder_GetNumAvailableBytes;
|
||||
vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinder_GetPointerToCurrentPos;
|
||||
if (!p->btMode)
|
||||
{
|
||||
vTable->GetMatches = (Mf_GetMatches_Func)Hc4_MatchFinder_GetMatches;
|
||||
vTable->Skip = (Mf_Skip_Func)Hc4_MatchFinder_Skip;
|
||||
if (p->numHashBytes <= 4)
|
||||
{
|
||||
vTable->GetMatches = (Mf_GetMatches_Func)Hc4_MatchFinder_GetMatches;
|
||||
vTable->Skip = (Mf_Skip_Func)Hc4_MatchFinder_Skip;
|
||||
}
|
||||
else
|
||||
{
|
||||
vTable->GetMatches = (Mf_GetMatches_Func)Hc5_MatchFinder_GetMatches;
|
||||
vTable->Skip = (Mf_Skip_Func)Hc5_MatchFinder_Skip;
|
||||
}
|
||||
}
|
||||
else if (p->numHashBytes == 2)
|
||||
{
|
||||
@@ -743,9 +1660,58 @@ void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable)
|
||||
vTable->GetMatches = (Mf_GetMatches_Func)Bt3_MatchFinder_GetMatches;
|
||||
vTable->Skip = (Mf_Skip_Func)Bt3_MatchFinder_Skip;
|
||||
}
|
||||
else
|
||||
else if (p->numHashBytes == 4)
|
||||
{
|
||||
vTable->GetMatches = (Mf_GetMatches_Func)Bt4_MatchFinder_GetMatches;
|
||||
vTable->Skip = (Mf_Skip_Func)Bt4_MatchFinder_Skip;
|
||||
}
|
||||
else
|
||||
{
|
||||
vTable->GetMatches = (Mf_GetMatches_Func)Bt5_MatchFinder_GetMatches;
|
||||
vTable->Skip = (Mf_Skip_Func)Bt5_MatchFinder_Skip;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
void LzFindPrepare(void)
|
||||
{
|
||||
#ifndef FORCE_LZFIND_SATUR_SUB_128
|
||||
#ifdef USE_LZFIND_SATUR_SUB_128
|
||||
LZFIND_SATUR_SUB_CODE_FUNC f = NULL;
|
||||
#ifdef MY_CPU_ARM_OR_ARM64
|
||||
{
|
||||
if (CPU_IsSupported_NEON())
|
||||
{
|
||||
// #pragma message ("=== LzFind NEON")
|
||||
PRF(printf("\n=== LzFind NEON\n"));
|
||||
f = LzFind_SaturSub_128;
|
||||
}
|
||||
// f = 0; // for debug
|
||||
}
|
||||
#else // MY_CPU_ARM_OR_ARM64
|
||||
if (CPU_IsSupported_SSE41())
|
||||
{
|
||||
// #pragma message ("=== LzFind SSE41")
|
||||
PRF(printf("\n=== LzFind SSE41\n"));
|
||||
f = LzFind_SaturSub_128;
|
||||
|
||||
#ifdef USE_LZFIND_SATUR_SUB_256
|
||||
if (CPU_IsSupported_AVX2())
|
||||
{
|
||||
// #pragma message ("=== LzFind AVX2")
|
||||
PRF(printf("\n=== LzFind AVX2\n"));
|
||||
f = LzFind_SaturSub_256;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#endif // MY_CPU_ARM_OR_ARM64
|
||||
g_LzFind_SaturSub = f;
|
||||
#endif // USE_LZFIND_SATUR_SUB_128
|
||||
#endif // FORCE_LZFIND_SATUR_SUB_128
|
||||
}
|
||||
|
||||
|
||||
#undef MOVE_POS
|
||||
#undef MOVE_POS_RET
|
||||
#undef PRF
|
||||
|
||||
126
extern/lzma/LzFind.h
vendored
126
extern/lzma/LzFind.h
vendored
@@ -1,76 +1,121 @@
|
||||
/* LzFind.h -- Match finder for LZ algorithms
|
||||
2008-10-04 : Igor Pavlov : Public domain */
|
||||
2023-03-04 : Igor Pavlov : Public domain */
|
||||
|
||||
#ifndef __LZFIND_H
|
||||
#define __LZFIND_H
|
||||
#ifndef ZIP7_INC_LZ_FIND_H
|
||||
#define ZIP7_INC_LZ_FIND_H
|
||||
|
||||
#include "Types.h"
|
||||
#include "7zTypes.h"
|
||||
|
||||
EXTERN_C_BEGIN
|
||||
|
||||
typedef UInt32 CLzRef;
|
||||
|
||||
typedef struct _CMatchFinder
|
||||
typedef struct
|
||||
{
|
||||
Byte *buffer;
|
||||
const Byte *buffer;
|
||||
UInt32 pos;
|
||||
UInt32 posLimit;
|
||||
UInt32 streamPos;
|
||||
UInt32 streamPos; /* wrap over Zero is allowed (streamPos < pos). Use (UInt32)(streamPos - pos) */
|
||||
UInt32 lenLimit;
|
||||
|
||||
UInt32 cyclicBufferPos;
|
||||
UInt32 cyclicBufferSize; /* it must be = (historySize + 1) */
|
||||
|
||||
Byte streamEndWasReached;
|
||||
Byte btMode;
|
||||
Byte bigHash;
|
||||
Byte directInput;
|
||||
|
||||
UInt32 matchMaxLen;
|
||||
CLzRef *hash;
|
||||
CLzRef *son;
|
||||
UInt32 hashMask;
|
||||
UInt32 cutValue;
|
||||
|
||||
Byte *bufferBase;
|
||||
ISeqInStream *stream;
|
||||
int streamEndWasReached;
|
||||
|
||||
Byte *bufBase;
|
||||
ISeqInStreamPtr stream;
|
||||
|
||||
UInt32 blockSize;
|
||||
UInt32 keepSizeBefore;
|
||||
UInt32 keepSizeAfter;
|
||||
|
||||
UInt32 numHashBytes;
|
||||
int directInput;
|
||||
int btMode;
|
||||
/* int skipModeBits; */
|
||||
int bigHash;
|
||||
size_t directInputRem;
|
||||
UInt32 historySize;
|
||||
UInt32 fixedHashSize;
|
||||
UInt32 hashSizeSum;
|
||||
UInt32 numSons;
|
||||
Byte numHashBytes_Min;
|
||||
Byte numHashOutBits;
|
||||
Byte _pad2_[2];
|
||||
SRes result;
|
||||
UInt32 crc[256];
|
||||
size_t numRefs;
|
||||
|
||||
UInt64 expectedDataSize;
|
||||
} CMatchFinder;
|
||||
|
||||
#define Inline_MatchFinder_GetPointerToCurrentPos(p) ((p)->buffer)
|
||||
#define Inline_MatchFinder_GetIndexByte(p, index) ((p)->buffer[(Int32)(index)])
|
||||
#define Inline_MatchFinder_GetPointerToCurrentPos(p) ((const Byte *)(p)->buffer)
|
||||
|
||||
#define Inline_MatchFinder_GetNumAvailableBytes(p) ((p)->streamPos - (p)->pos)
|
||||
#define Inline_MatchFinder_GetNumAvailableBytes(p) ((UInt32)((p)->streamPos - (p)->pos))
|
||||
|
||||
/*
|
||||
#define Inline_MatchFinder_IsFinishedOK(p) \
|
||||
((p)->streamEndWasReached \
|
||||
&& (p)->streamPos == (p)->pos \
|
||||
&& (!(p)->directInput || (p)->directInputRem == 0))
|
||||
*/
|
||||
|
||||
int MatchFinder_NeedMove(CMatchFinder *p);
|
||||
Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p);
|
||||
/* Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p); */
|
||||
void MatchFinder_MoveBlock(CMatchFinder *p);
|
||||
void MatchFinder_ReadIfRequired(CMatchFinder *p);
|
||||
|
||||
void MatchFinder_Construct(CMatchFinder *p);
|
||||
|
||||
/* Conditions:
|
||||
historySize <= 3 GB
|
||||
keepAddBufferBefore + matchMaxLen + keepAddBufferAfter < 511MB
|
||||
/* (directInput = 0) is default value.
|
||||
It's required to provide correct (directInput) value
|
||||
before calling MatchFinder_Create().
|
||||
You can set (directInput) by any of the following calls:
|
||||
- MatchFinder_SET_DIRECT_INPUT_BUF()
|
||||
- MatchFinder_SET_STREAM()
|
||||
- MatchFinder_SET_STREAM_MODE()
|
||||
*/
|
||||
|
||||
#define MatchFinder_SET_DIRECT_INPUT_BUF(p, _src_, _srcLen_) { \
|
||||
(p)->stream = NULL; \
|
||||
(p)->directInput = 1; \
|
||||
(p)->buffer = (_src_); \
|
||||
(p)->directInputRem = (_srcLen_); }
|
||||
|
||||
/*
|
||||
#define MatchFinder_SET_STREAM_MODE(p) { \
|
||||
(p)->directInput = 0; }
|
||||
*/
|
||||
|
||||
#define MatchFinder_SET_STREAM(p, _stream_) { \
|
||||
(p)->stream = _stream_; \
|
||||
(p)->directInput = 0; }
|
||||
|
||||
|
||||
int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
|
||||
UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter,
|
||||
ISzAlloc *alloc);
|
||||
void MatchFinder_Free(CMatchFinder *p, ISzAlloc *alloc);
|
||||
void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, UInt32 numItems);
|
||||
void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue);
|
||||
ISzAllocPtr alloc);
|
||||
void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc);
|
||||
void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems);
|
||||
|
||||
/*
|
||||
#define MatchFinder_INIT_POS(p, val) \
|
||||
(p)->pos = (val); \
|
||||
(p)->streamPos = (val);
|
||||
*/
|
||||
|
||||
// void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue);
|
||||
#define MatchFinder_REDUCE_OFFSETS(p, subValue) \
|
||||
(p)->pos -= (subValue); \
|
||||
(p)->streamPos -= (subValue);
|
||||
|
||||
|
||||
UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *buffer, CLzRef *son,
|
||||
UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue,
|
||||
size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue,
|
||||
UInt32 *distances, UInt32 maxLen);
|
||||
|
||||
/*
|
||||
@@ -80,28 +125,35 @@ Conditions:
|
||||
*/
|
||||
|
||||
typedef void (*Mf_Init_Func)(void *object);
|
||||
typedef Byte (*Mf_GetIndexByte_Func)(void *object, Int32 index);
|
||||
typedef UInt32 (*Mf_GetNumAvailableBytes_Func)(void *object);
|
||||
typedef const Byte * (*Mf_GetPointerToCurrentPos_Func)(void *object);
|
||||
typedef UInt32 (*Mf_GetMatches_Func)(void *object, UInt32 *distances);
|
||||
typedef UInt32 * (*Mf_GetMatches_Func)(void *object, UInt32 *distances);
|
||||
typedef void (*Mf_Skip_Func)(void *object, UInt32);
|
||||
|
||||
typedef struct _IMatchFinder
|
||||
typedef struct
|
||||
{
|
||||
Mf_Init_Func Init;
|
||||
Mf_GetIndexByte_Func GetIndexByte;
|
||||
Mf_GetNumAvailableBytes_Func GetNumAvailableBytes;
|
||||
Mf_GetPointerToCurrentPos_Func GetPointerToCurrentPos;
|
||||
Mf_GetMatches_Func GetMatches;
|
||||
Mf_Skip_Func Skip;
|
||||
} IMatchFinder;
|
||||
} IMatchFinder2;
|
||||
|
||||
void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable);
|
||||
void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder2 *vTable);
|
||||
|
||||
void MatchFinder_Init_LowHash(CMatchFinder *p);
|
||||
void MatchFinder_Init_HighHash(CMatchFinder *p);
|
||||
void MatchFinder_Init_4(CMatchFinder *p);
|
||||
void MatchFinder_Init(CMatchFinder *p);
|
||||
UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);
|
||||
UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);
|
||||
|
||||
UInt32* Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);
|
||||
UInt32* Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);
|
||||
|
||||
void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num);
|
||||
void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num);
|
||||
|
||||
void LzFindPrepare(void);
|
||||
|
||||
EXTERN_C_END
|
||||
|
||||
#endif
|
||||
|
||||
1406
extern/lzma/LzFindMt.c
vendored
Normal file
1406
extern/lzma/LzFindMt.c
vendored
Normal file
@@ -0,0 +1,1406 @@
|
||||
/* LzFindMt.c -- multithreaded Match finder for LZ algorithms
|
||||
2023-04-02 : Igor Pavlov : Public domain */
|
||||
|
||||
#include "Precomp.h"
|
||||
|
||||
// #include <stdio.h>
|
||||
|
||||
#include "CpuArch.h"
|
||||
|
||||
#include "LzHash.h"
|
||||
#include "LzFindMt.h"
|
||||
|
||||
// #define LOG_ITERS
|
||||
|
||||
// #define LOG_THREAD
|
||||
|
||||
#ifdef LOG_THREAD
|
||||
#include <stdio.h>
|
||||
#define PRF(x) x
|
||||
#else
|
||||
#define PRF(x)
|
||||
#endif
|
||||
|
||||
#ifdef LOG_ITERS
|
||||
#include <stdio.h>
|
||||
extern UInt64 g_NumIters_Tree;
|
||||
extern UInt64 g_NumIters_Loop;
|
||||
extern UInt64 g_NumIters_Bytes;
|
||||
#define LOG_ITER(x) x
|
||||
#else
|
||||
#define LOG_ITER(x)
|
||||
#endif
|
||||
|
||||
#define kMtHashBlockSize ((UInt32)1 << 17)
|
||||
#define kMtHashNumBlocks (1 << 1)
|
||||
|
||||
#define GET_HASH_BLOCK_OFFSET(i) (((i) & (kMtHashNumBlocks - 1)) * kMtHashBlockSize)
|
||||
|
||||
#define kMtBtBlockSize ((UInt32)1 << 16)
|
||||
#define kMtBtNumBlocks (1 << 4)
|
||||
|
||||
#define GET_BT_BLOCK_OFFSET(i) (((i) & (kMtBtNumBlocks - 1)) * (size_t)kMtBtBlockSize)
|
||||
|
||||
/*
|
||||
HASH functions:
|
||||
We use raw 8/16 bits from a[1] and a[2],
|
||||
xored with crc(a[0]) and crc(a[3]).
|
||||
We check a[0], a[3] only. We don't need to compare a[1] and a[2] in matches.
|
||||
our crc() function provides one-to-one correspondence for low 8-bit values:
|
||||
(crc[0...0xFF] & 0xFF) <-> [0...0xFF]
|
||||
*/
|
||||
|
||||
#define MF(mt) ((mt)->MatchFinder)
|
||||
#define MF_CRC (p->crc)
|
||||
|
||||
// #define MF(mt) (&(mt)->MatchFinder)
|
||||
// #define MF_CRC (p->MatchFinder.crc)
|
||||
|
||||
#define MT_HASH2_CALC \
|
||||
h2 = (MF_CRC[cur[0]] ^ cur[1]) & (kHash2Size - 1);
|
||||
|
||||
#define MT_HASH3_CALC { \
|
||||
UInt32 temp = MF_CRC[cur[0]] ^ cur[1]; \
|
||||
h2 = temp & (kHash2Size - 1); \
|
||||
h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); }
|
||||
|
||||
/*
|
||||
#define MT_HASH3_CALC__NO_2 { \
|
||||
UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
|
||||
h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); }
|
||||
|
||||
#define MT_HASH4_CALC { \
|
||||
UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
|
||||
h2 = temp & (kHash2Size - 1); \
|
||||
temp ^= ((UInt32)cur[2] << 8); \
|
||||
h3 = temp & (kHash3Size - 1); \
|
||||
h4 = (temp ^ (p->crc[cur[3]] << kLzHash_CrcShift_1)) & p->hash4Mask; }
|
||||
// (kHash4Size - 1);
|
||||
*/
|
||||
|
||||
|
||||
Z7_NO_INLINE
|
||||
static void MtSync_Construct(CMtSync *p)
|
||||
{
|
||||
p->affinity = 0;
|
||||
p->wasCreated = False;
|
||||
p->csWasInitialized = False;
|
||||
p->csWasEntered = False;
|
||||
Thread_CONSTRUCT(&p->thread)
|
||||
Event_Construct(&p->canStart);
|
||||
Event_Construct(&p->wasStopped);
|
||||
Semaphore_Construct(&p->freeSemaphore);
|
||||
Semaphore_Construct(&p->filledSemaphore);
|
||||
}
|
||||
|
||||
|
||||
#define DEBUG_BUFFER_LOCK // define it to debug lock state
|
||||
|
||||
#ifdef DEBUG_BUFFER_LOCK
|
||||
#include <stdlib.h>
|
||||
#define BUFFER_MUST_BE_LOCKED(p) if (!(p)->csWasEntered) exit(1);
|
||||
#define BUFFER_MUST_BE_UNLOCKED(p) if ( (p)->csWasEntered) exit(1);
|
||||
#else
|
||||
#define BUFFER_MUST_BE_LOCKED(p)
|
||||
#define BUFFER_MUST_BE_UNLOCKED(p)
|
||||
#endif
|
||||
|
||||
#define LOCK_BUFFER(p) { \
|
||||
BUFFER_MUST_BE_UNLOCKED(p); \
|
||||
CriticalSection_Enter(&(p)->cs); \
|
||||
(p)->csWasEntered = True; }
|
||||
|
||||
#define UNLOCK_BUFFER(p) { \
|
||||
BUFFER_MUST_BE_LOCKED(p); \
|
||||
CriticalSection_Leave(&(p)->cs); \
|
||||
(p)->csWasEntered = False; }
|
||||
|
||||
|
||||
Z7_NO_INLINE
|
||||
static UInt32 MtSync_GetNextBlock(CMtSync *p)
|
||||
{
|
||||
UInt32 numBlocks = 0;
|
||||
if (p->needStart)
|
||||
{
|
||||
BUFFER_MUST_BE_UNLOCKED(p)
|
||||
p->numProcessedBlocks = 1;
|
||||
p->needStart = False;
|
||||
p->stopWriting = False;
|
||||
p->exit = False;
|
||||
Event_Reset(&p->wasStopped);
|
||||
Event_Set(&p->canStart);
|
||||
}
|
||||
else
|
||||
{
|
||||
UNLOCK_BUFFER(p)
|
||||
// we free current block
|
||||
numBlocks = p->numProcessedBlocks++;
|
||||
Semaphore_Release1(&p->freeSemaphore);
|
||||
}
|
||||
|
||||
// buffer is UNLOCKED here
|
||||
Semaphore_Wait(&p->filledSemaphore);
|
||||
LOCK_BUFFER(p)
|
||||
return numBlocks;
|
||||
}
|
||||
|
||||
|
||||
/* if Writing (Processing) thread was started, we must call MtSync_StopWriting() */
|
||||
|
||||
Z7_NO_INLINE
|
||||
static void MtSync_StopWriting(CMtSync *p)
|
||||
{
|
||||
if (!Thread_WasCreated(&p->thread) || p->needStart)
|
||||
return;
|
||||
|
||||
PRF(printf("\nMtSync_StopWriting %p\n", p));
|
||||
|
||||
if (p->csWasEntered)
|
||||
{
|
||||
/* we don't use buffer in this thread after StopWriting().
|
||||
So we UNLOCK buffer.
|
||||
And we restore default UNLOCKED state for stopped thread */
|
||||
UNLOCK_BUFFER(p)
|
||||
}
|
||||
|
||||
/* We send (p->stopWriting) message and release freeSemaphore
|
||||
to free current block.
|
||||
So the thread will see (p->stopWriting) at some
|
||||
iteration after Wait(freeSemaphore).
|
||||
The thread doesn't need to fill all avail free blocks,
|
||||
so we can get fast thread stop.
|
||||
*/
|
||||
|
||||
p->stopWriting = True;
|
||||
Semaphore_Release1(&p->freeSemaphore); // check semaphore count !!!
|
||||
|
||||
PRF(printf("\nMtSync_StopWriting %p : Event_Wait(&p->wasStopped)\n", p));
|
||||
Event_Wait(&p->wasStopped);
|
||||
PRF(printf("\nMtSync_StopWriting %p : Event_Wait() finsihed\n", p));
|
||||
|
||||
/* 21.03 : we don't restore samaphore counters here.
|
||||
We will recreate and reinit samaphores in next start */
|
||||
|
||||
p->needStart = True;
|
||||
}
|
||||
|
||||
|
||||
Z7_NO_INLINE
|
||||
static void MtSync_Destruct(CMtSync *p)
|
||||
{
|
||||
PRF(printf("\nMtSync_Destruct %p\n", p));
|
||||
|
||||
if (Thread_WasCreated(&p->thread))
|
||||
{
|
||||
/* we want thread to be in Stopped state before sending EXIT command.
|
||||
note: stop(btSync) will stop (htSync) also */
|
||||
MtSync_StopWriting(p);
|
||||
/* thread in Stopped state here : (p->needStart == true) */
|
||||
p->exit = True;
|
||||
// if (p->needStart) // it's (true)
|
||||
Event_Set(&p->canStart); // we send EXIT command to thread
|
||||
Thread_Wait_Close(&p->thread); // we wait thread finishing
|
||||
}
|
||||
|
||||
if (p->csWasInitialized)
|
||||
{
|
||||
CriticalSection_Delete(&p->cs);
|
||||
p->csWasInitialized = False;
|
||||
}
|
||||
p->csWasEntered = False;
|
||||
|
||||
Event_Close(&p->canStart);
|
||||
Event_Close(&p->wasStopped);
|
||||
Semaphore_Close(&p->freeSemaphore);
|
||||
Semaphore_Close(&p->filledSemaphore);
|
||||
|
||||
p->wasCreated = False;
|
||||
}
|
||||
|
||||
|
||||
// #define RINOK_THREAD(x) { if ((x) != 0) return SZ_ERROR_THREAD; }
|
||||
// we want to get real system error codes here instead of SZ_ERROR_THREAD
|
||||
#define RINOK_THREAD(x) RINOK_WRes(x)
|
||||
|
||||
|
||||
// call it before each new file (when new starting is required):
|
||||
Z7_NO_INLINE
|
||||
static SRes MtSync_Init(CMtSync *p, UInt32 numBlocks)
|
||||
{
|
||||
WRes wres;
|
||||
// BUFFER_MUST_BE_UNLOCKED(p)
|
||||
if (!p->needStart || p->csWasEntered)
|
||||
return SZ_ERROR_FAIL;
|
||||
wres = Semaphore_OptCreateInit(&p->freeSemaphore, numBlocks, numBlocks);
|
||||
if (wres == 0)
|
||||
wres = Semaphore_OptCreateInit(&p->filledSemaphore, 0, numBlocks);
|
||||
return MY_SRes_HRESULT_FROM_WRes(wres);
|
||||
}
|
||||
|
||||
|
||||
static WRes MtSync_Create_WRes(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj)
|
||||
{
|
||||
WRes wres;
|
||||
|
||||
if (p->wasCreated)
|
||||
return SZ_OK;
|
||||
|
||||
RINOK_THREAD(CriticalSection_Init(&p->cs))
|
||||
p->csWasInitialized = True;
|
||||
p->csWasEntered = False;
|
||||
|
||||
RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->canStart))
|
||||
RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->wasStopped))
|
||||
|
||||
p->needStart = True;
|
||||
p->exit = True; /* p->exit is unused before (canStart) Event.
|
||||
But in case of some unexpected code failure we will get fast exit from thread */
|
||||
|
||||
// return ERROR_TOO_MANY_POSTS; // for debug
|
||||
// return EINVAL; // for debug
|
||||
|
||||
if (p->affinity != 0)
|
||||
wres = Thread_Create_With_Affinity(&p->thread, startAddress, obj, (CAffinityMask)p->affinity);
|
||||
else
|
||||
wres = Thread_Create(&p->thread, startAddress, obj);
|
||||
|
||||
RINOK_THREAD(wres)
|
||||
p->wasCreated = True;
|
||||
return SZ_OK;
|
||||
}
|
||||
|
||||
|
||||
Z7_NO_INLINE
|
||||
static SRes MtSync_Create(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj)
|
||||
{
|
||||
const WRes wres = MtSync_Create_WRes(p, startAddress, obj);
|
||||
if (wres == 0)
|
||||
return 0;
|
||||
MtSync_Destruct(p);
|
||||
return MY_SRes_HRESULT_FROM_WRes(wres);
|
||||
}
|
||||
|
||||
|
||||
// ---------- HASH THREAD ----------
|
||||
|
||||
#define kMtMaxValForNormalize 0xFFFFFFFF
|
||||
// #define kMtMaxValForNormalize ((1 << 21)) // for debug
|
||||
// #define kNormalizeAlign (1 << 7) // alignment for speculated accesses
|
||||
|
||||
#ifdef MY_CPU_LE_UNALIGN
|
||||
#define GetUi24hi_from32(p) ((UInt32)GetUi32(p) >> 8)
|
||||
#else
|
||||
#define GetUi24hi_from32(p) ((p)[1] ^ ((UInt32)(p)[2] << 8) ^ ((UInt32)(p)[3] << 16))
|
||||
#endif
|
||||
|
||||
#define GetHeads_DECL(name) \
|
||||
static void GetHeads ## name(const Byte *p, UInt32 pos, \
|
||||
UInt32 *hash, UInt32 hashMask, UInt32 *heads, UInt32 numHeads, const UInt32 *crc)
|
||||
|
||||
#define GetHeads_LOOP(v) \
|
||||
for (; numHeads != 0; numHeads--) { \
|
||||
const UInt32 value = (v); \
|
||||
p++; \
|
||||
*heads++ = pos - hash[value]; \
|
||||
hash[value] = pos++; }
|
||||
|
||||
#define DEF_GetHeads2(name, v, action) \
|
||||
GetHeads_DECL(name) { action \
|
||||
GetHeads_LOOP(v) }
|
||||
|
||||
#define DEF_GetHeads(name, v) DEF_GetHeads2(name, v, ;)
|
||||
|
||||
DEF_GetHeads2(2, GetUi16(p), UNUSED_VAR(hashMask); UNUSED_VAR(crc); )
|
||||
DEF_GetHeads(3, (crc[p[0]] ^ GetUi16(p + 1)) & hashMask)
|
||||
DEF_GetHeads2(3b, GetUi16(p) ^ ((UInt32)(p)[2] << 16), UNUSED_VAR(hashMask); UNUSED_VAR(crc); )
|
||||
// BT3 is not good for crc collisions for big hashMask values.
|
||||
|
||||
/*
|
||||
GetHeads_DECL(3b)
|
||||
{
|
||||
UNUSED_VAR(hashMask);
|
||||
UNUSED_VAR(crc);
|
||||
{
|
||||
const Byte *pLim = p + numHeads;
|
||||
if (numHeads == 0)
|
||||
return;
|
||||
pLim--;
|
||||
while (p < pLim)
|
||||
{
|
||||
UInt32 v1 = GetUi32(p);
|
||||
UInt32 v0 = v1 & 0xFFFFFF;
|
||||
UInt32 h0, h1;
|
||||
p += 2;
|
||||
v1 >>= 8;
|
||||
h0 = hash[v0]; hash[v0] = pos; heads[0] = pos - h0; pos++;
|
||||
h1 = hash[v1]; hash[v1] = pos; heads[1] = pos - h1; pos++;
|
||||
heads += 2;
|
||||
}
|
||||
if (p == pLim)
|
||||
{
|
||||
UInt32 v0 = GetUi16(p) ^ ((UInt32)(p)[2] << 16);
|
||||
*heads = pos - hash[v0];
|
||||
hash[v0] = pos;
|
||||
}
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
/*
|
||||
GetHeads_DECL(4)
|
||||
{
|
||||
unsigned sh = 0;
|
||||
UNUSED_VAR(crc)
|
||||
while ((hashMask & 0x80000000) == 0)
|
||||
{
|
||||
hashMask <<= 1;
|
||||
sh++;
|
||||
}
|
||||
GetHeads_LOOP((GetUi32(p) * 0xa54a1) >> sh)
|
||||
}
|
||||
#define GetHeads4b GetHeads4
|
||||
*/
|
||||
|
||||
#define USE_GetHeads_LOCAL_CRC
|
||||
|
||||
#ifdef USE_GetHeads_LOCAL_CRC
|
||||
|
||||
GetHeads_DECL(4)
|
||||
{
|
||||
UInt32 crc0[256];
|
||||
UInt32 crc1[256];
|
||||
{
|
||||
unsigned i;
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
UInt32 v = crc[i];
|
||||
crc0[i] = v & hashMask;
|
||||
crc1[i] = (v << kLzHash_CrcShift_1) & hashMask;
|
||||
// crc1[i] = rotlFixed(v, 8) & hashMask;
|
||||
}
|
||||
}
|
||||
GetHeads_LOOP(crc0[p[0]] ^ crc1[p[3]] ^ (UInt32)GetUi16(p+1))
|
||||
}
|
||||
|
||||
GetHeads_DECL(4b)
|
||||
{
|
||||
UInt32 crc0[256];
|
||||
{
|
||||
unsigned i;
|
||||
for (i = 0; i < 256; i++)
|
||||
crc0[i] = crc[i] & hashMask;
|
||||
}
|
||||
GetHeads_LOOP(crc0[p[0]] ^ GetUi24hi_from32(p))
|
||||
}
|
||||
|
||||
GetHeads_DECL(5)
|
||||
{
|
||||
UInt32 crc0[256];
|
||||
UInt32 crc1[256];
|
||||
UInt32 crc2[256];
|
||||
{
|
||||
unsigned i;
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
UInt32 v = crc[i];
|
||||
crc0[i] = v & hashMask;
|
||||
crc1[i] = (v << kLzHash_CrcShift_1) & hashMask;
|
||||
crc2[i] = (v << kLzHash_CrcShift_2) & hashMask;
|
||||
}
|
||||
}
|
||||
GetHeads_LOOP(crc0[p[0]] ^ crc1[p[3]] ^ crc2[p[4]] ^ (UInt32)GetUi16(p+1))
|
||||
}
|
||||
|
||||
GetHeads_DECL(5b)
|
||||
{
|
||||
UInt32 crc0[256];
|
||||
UInt32 crc1[256];
|
||||
{
|
||||
unsigned i;
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
UInt32 v = crc[i];
|
||||
crc0[i] = v & hashMask;
|
||||
crc1[i] = (v << kLzHash_CrcShift_1) & hashMask;
|
||||
}
|
||||
}
|
||||
GetHeads_LOOP(crc0[p[0]] ^ crc1[p[4]] ^ GetUi24hi_from32(p))
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
DEF_GetHeads(4, (crc[p[0]] ^ (crc[p[3]] << kLzHash_CrcShift_1) ^ (UInt32)GetUi16(p+1)) & hashMask)
|
||||
DEF_GetHeads(4b, (crc[p[0]] ^ GetUi24hi_from32(p)) & hashMask)
|
||||
DEF_GetHeads(5, (crc[p[0]] ^ (crc[p[3]] << kLzHash_CrcShift_1) ^ (crc[p[4]] << kLzHash_CrcShift_2) ^ (UInt32)GetUi16(p + 1)) & hashMask)
|
||||
DEF_GetHeads(5b, (crc[p[0]] ^ (crc[p[4]] << kLzHash_CrcShift_1) ^ GetUi24hi_from32(p)) & hashMask)
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
static void HashThreadFunc(CMatchFinderMt *mt)
|
||||
{
|
||||
CMtSync *p = &mt->hashSync;
|
||||
PRF(printf("\nHashThreadFunc\n"));
|
||||
|
||||
for (;;)
|
||||
{
|
||||
UInt32 blockIndex = 0;
|
||||
PRF(printf("\nHashThreadFunc : Event_Wait(&p->canStart)\n"));
|
||||
Event_Wait(&p->canStart);
|
||||
PRF(printf("\nHashThreadFunc : Event_Wait(&p->canStart) : after \n"));
|
||||
if (p->exit)
|
||||
{
|
||||
PRF(printf("\nHashThreadFunc : exit \n"));
|
||||
return;
|
||||
}
|
||||
|
||||
MatchFinder_Init_HighHash(MF(mt));
|
||||
|
||||
for (;;)
|
||||
{
|
||||
PRF(printf("Hash thread block = %d pos = %d\n", (unsigned)blockIndex, mt->MatchFinder->pos));
|
||||
|
||||
{
|
||||
CMatchFinder *mf = MF(mt);
|
||||
if (MatchFinder_NeedMove(mf))
|
||||
{
|
||||
CriticalSection_Enter(&mt->btSync.cs);
|
||||
CriticalSection_Enter(&mt->hashSync.cs);
|
||||
{
|
||||
const Byte *beforePtr = Inline_MatchFinder_GetPointerToCurrentPos(mf);
|
||||
ptrdiff_t offset;
|
||||
MatchFinder_MoveBlock(mf);
|
||||
offset = beforePtr - Inline_MatchFinder_GetPointerToCurrentPos(mf);
|
||||
mt->pointerToCurPos -= offset;
|
||||
mt->buffer -= offset;
|
||||
}
|
||||
CriticalSection_Leave(&mt->hashSync.cs);
|
||||
CriticalSection_Leave(&mt->btSync.cs);
|
||||
continue;
|
||||
}
|
||||
|
||||
Semaphore_Wait(&p->freeSemaphore);
|
||||
|
||||
if (p->exit) // exit is unexpected here. But we check it here for some failure case
|
||||
return;
|
||||
|
||||
// for faster stop : we check (p->stopWriting) after Wait(freeSemaphore)
|
||||
if (p->stopWriting)
|
||||
break;
|
||||
|
||||
MatchFinder_ReadIfRequired(mf);
|
||||
{
|
||||
UInt32 *heads = mt->hashBuf + GET_HASH_BLOCK_OFFSET(blockIndex++);
|
||||
UInt32 num = Inline_MatchFinder_GetNumAvailableBytes(mf);
|
||||
heads[0] = 2;
|
||||
heads[1] = num;
|
||||
|
||||
/* heads[1] contains the number of avail bytes:
|
||||
if (avail < mf->numHashBytes) :
|
||||
{
|
||||
it means that stream was finished
|
||||
HASH_THREAD and BT_TREAD must move position for heads[1] (avail) bytes.
|
||||
HASH_THREAD doesn't stop,
|
||||
HASH_THREAD fills only the header (2 numbers) for all next blocks:
|
||||
{2, NumHashBytes - 1}, {2,0}, {2,0}, ... , {2,0}
|
||||
}
|
||||
else
|
||||
{
|
||||
HASH_THREAD and BT_TREAD must move position for (heads[0] - 2) bytes;
|
||||
}
|
||||
*/
|
||||
|
||||
if (num >= mf->numHashBytes)
|
||||
{
|
||||
num = num - mf->numHashBytes + 1;
|
||||
if (num > kMtHashBlockSize - 2)
|
||||
num = kMtHashBlockSize - 2;
|
||||
|
||||
if (mf->pos > (UInt32)kMtMaxValForNormalize - num)
|
||||
{
|
||||
const UInt32 subValue = (mf->pos - mf->historySize - 1); // & ~(UInt32)(kNormalizeAlign - 1);
|
||||
MatchFinder_REDUCE_OFFSETS(mf, subValue)
|
||||
MatchFinder_Normalize3(subValue, mf->hash + mf->fixedHashSize, (size_t)mf->hashMask + 1);
|
||||
}
|
||||
|
||||
heads[0] = 2 + num;
|
||||
mt->GetHeadsFunc(mf->buffer, mf->pos, mf->hash + mf->fixedHashSize, mf->hashMask, heads + 2, num, mf->crc);
|
||||
}
|
||||
|
||||
mf->pos += num; // wrap over zero is allowed at the end of stream
|
||||
mf->buffer += num;
|
||||
}
|
||||
}
|
||||
|
||||
Semaphore_Release1(&p->filledSemaphore);
|
||||
} // for() processing end
|
||||
|
||||
// p->numBlocks_Sent = blockIndex;
|
||||
Event_Set(&p->wasStopped);
|
||||
} // for() thread end
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// ---------- BT THREAD ----------
|
||||
|
||||
/* we use one variable instead of two (cyclicBufferPos == pos) before CyclicBuf wrap.
|
||||
here we define fixed offset of (p->pos) from (p->cyclicBufferPos) */
|
||||
#define CYC_TO_POS_OFFSET 0
|
||||
// #define CYC_TO_POS_OFFSET 1 // for debug
|
||||
|
||||
#define MFMT_GM_INLINE
|
||||
|
||||
#ifdef MFMT_GM_INLINE
|
||||
|
||||
/*
|
||||
we use size_t for (pos) instead of UInt32
|
||||
to eliminate "movsx" BUG in old MSVC x64 compiler.
|
||||
*/
|
||||
|
||||
|
||||
UInt32 * Z7_FASTCALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,
|
||||
UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,
|
||||
size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,
|
||||
UInt32 *posRes);
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
static void BtGetMatches(CMatchFinderMt *p, UInt32 *d)
|
||||
{
|
||||
UInt32 numProcessed = 0;
|
||||
UInt32 curPos = 2;
|
||||
|
||||
/* GetMatchesSpec() functions don't create (len = 1)
|
||||
in [len, dist] match pairs, if (p->numHashBytes >= 2)
|
||||
Also we suppose here that (matchMaxLen >= 2).
|
||||
So the following code for (reserve) is not required
|
||||
UInt32 reserve = (p->matchMaxLen * 2);
|
||||
const UInt32 kNumHashBytes_Max = 5; // BT_HASH_BYTES_MAX
|
||||
if (reserve < kNumHashBytes_Max - 1)
|
||||
reserve = kNumHashBytes_Max - 1;
|
||||
const UInt32 limit = kMtBtBlockSize - (reserve);
|
||||
*/
|
||||
|
||||
const UInt32 limit = kMtBtBlockSize - (p->matchMaxLen * 2);
|
||||
|
||||
d[1] = p->hashNumAvail;
|
||||
|
||||
if (p->failure_BT)
|
||||
{
|
||||
// printf("\n == 1 BtGetMatches() p->failure_BT\n");
|
||||
d[0] = 0;
|
||||
// d[1] = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
while (curPos < limit)
|
||||
{
|
||||
if (p->hashBufPos == p->hashBufPosLimit)
|
||||
{
|
||||
// MatchFinderMt_GetNextBlock_Hash(p);
|
||||
UInt32 avail;
|
||||
{
|
||||
const UInt32 bi = MtSync_GetNextBlock(&p->hashSync);
|
||||
const UInt32 k = GET_HASH_BLOCK_OFFSET(bi);
|
||||
const UInt32 *h = p->hashBuf + k;
|
||||
avail = h[1];
|
||||
p->hashBufPosLimit = k + h[0];
|
||||
p->hashNumAvail = avail;
|
||||
p->hashBufPos = k + 2;
|
||||
}
|
||||
|
||||
{
|
||||
/* we must prevent UInt32 overflow for avail total value,
|
||||
if avail was increased with new hash block */
|
||||
UInt32 availSum = numProcessed + avail;
|
||||
if (availSum < numProcessed)
|
||||
availSum = (UInt32)(Int32)-1;
|
||||
d[1] = availSum;
|
||||
}
|
||||
|
||||
if (avail >= p->numHashBytes)
|
||||
continue;
|
||||
|
||||
// if (p->hashBufPos != p->hashBufPosLimit) exit(1);
|
||||
|
||||
/* (avail < p->numHashBytes)
|
||||
It means that stream was finished.
|
||||
And (avail) - is a number of remaining bytes,
|
||||
we fill (d) for (avail) bytes for LZ_THREAD (receiver).
|
||||
but we don't update (p->pos) and (p->cyclicBufferPos) here in BT_THREAD */
|
||||
|
||||
/* here we suppose that we have space enough:
|
||||
(kMtBtBlockSize - curPos >= p->hashNumAvail) */
|
||||
p->hashNumAvail = 0;
|
||||
d[0] = curPos + avail;
|
||||
d += curPos;
|
||||
for (; avail != 0; avail--)
|
||||
*d++ = 0;
|
||||
return;
|
||||
}
|
||||
{
|
||||
UInt32 size = p->hashBufPosLimit - p->hashBufPos;
|
||||
UInt32 pos = p->pos;
|
||||
UInt32 cyclicBufferPos = p->cyclicBufferPos;
|
||||
UInt32 lenLimit = p->matchMaxLen;
|
||||
if (lenLimit >= p->hashNumAvail)
|
||||
lenLimit = p->hashNumAvail;
|
||||
{
|
||||
UInt32 size2 = p->hashNumAvail - lenLimit + 1;
|
||||
if (size2 < size)
|
||||
size = size2;
|
||||
size2 = p->cyclicBufferSize - cyclicBufferPos;
|
||||
if (size2 < size)
|
||||
size = size2;
|
||||
}
|
||||
|
||||
if (pos > (UInt32)kMtMaxValForNormalize - size)
|
||||
{
|
||||
const UInt32 subValue = (pos - p->cyclicBufferSize); // & ~(UInt32)(kNormalizeAlign - 1);
|
||||
pos -= subValue;
|
||||
p->pos = pos;
|
||||
MatchFinder_Normalize3(subValue, p->son, (size_t)p->cyclicBufferSize * 2);
|
||||
}
|
||||
|
||||
#ifndef MFMT_GM_INLINE
|
||||
while (curPos < limit && size-- != 0)
|
||||
{
|
||||
UInt32 *startDistances = d + curPos;
|
||||
UInt32 num = (UInt32)(GetMatchesSpec1(lenLimit, pos - p->hashBuf[p->hashBufPos++],
|
||||
pos, p->buffer, p->son, cyclicBufferPos, p->cyclicBufferSize, p->cutValue,
|
||||
startDistances + 1, p->numHashBytes - 1) - startDistances);
|
||||
*startDistances = num - 1;
|
||||
curPos += num;
|
||||
cyclicBufferPos++;
|
||||
pos++;
|
||||
p->buffer++;
|
||||
}
|
||||
#else
|
||||
{
|
||||
UInt32 posRes = pos;
|
||||
const UInt32 *d_end;
|
||||
{
|
||||
d_end = GetMatchesSpecN_2(
|
||||
p->buffer + lenLimit - 1,
|
||||
pos, p->buffer, p->son, p->cutValue, d + curPos,
|
||||
p->numHashBytes - 1, p->hashBuf + p->hashBufPos,
|
||||
d + limit, p->hashBuf + p->hashBufPos + size,
|
||||
cyclicBufferPos, p->cyclicBufferSize,
|
||||
&posRes);
|
||||
}
|
||||
{
|
||||
if (!d_end)
|
||||
{
|
||||
// printf("\n == 2 BtGetMatches() p->failure_BT\n");
|
||||
// internal data failure
|
||||
p->failure_BT = True;
|
||||
d[0] = 0;
|
||||
// d[1] = 0;
|
||||
return;
|
||||
}
|
||||
}
|
||||
curPos = (UInt32)(d_end - d);
|
||||
{
|
||||
const UInt32 processed = posRes - pos;
|
||||
pos = posRes;
|
||||
p->hashBufPos += processed;
|
||||
cyclicBufferPos += processed;
|
||||
p->buffer += processed;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
{
|
||||
const UInt32 processed = pos - p->pos;
|
||||
numProcessed += processed;
|
||||
p->hashNumAvail -= processed;
|
||||
p->pos = pos;
|
||||
}
|
||||
if (cyclicBufferPos == p->cyclicBufferSize)
|
||||
cyclicBufferPos = 0;
|
||||
p->cyclicBufferPos = cyclicBufferPos;
|
||||
}
|
||||
}
|
||||
|
||||
d[0] = curPos;
|
||||
}
|
||||
|
||||
|
||||
static void BtFillBlock(CMatchFinderMt *p, UInt32 globalBlockIndex)
|
||||
{
|
||||
CMtSync *sync = &p->hashSync;
|
||||
|
||||
BUFFER_MUST_BE_UNLOCKED(sync)
|
||||
|
||||
if (!sync->needStart)
|
||||
{
|
||||
LOCK_BUFFER(sync)
|
||||
}
|
||||
|
||||
BtGetMatches(p, p->btBuf + GET_BT_BLOCK_OFFSET(globalBlockIndex));
|
||||
|
||||
/* We suppose that we have called GetNextBlock() from start.
|
||||
So buffer is LOCKED */
|
||||
|
||||
UNLOCK_BUFFER(sync)
|
||||
}
|
||||
|
||||
|
||||
Z7_NO_INLINE
|
||||
static void BtThreadFunc(CMatchFinderMt *mt)
|
||||
{
|
||||
CMtSync *p = &mt->btSync;
|
||||
for (;;)
|
||||
{
|
||||
UInt32 blockIndex = 0;
|
||||
Event_Wait(&p->canStart);
|
||||
|
||||
for (;;)
|
||||
{
|
||||
PRF(printf(" BT thread block = %d pos = %d\n", (unsigned)blockIndex, mt->pos));
|
||||
/* (p->exit == true) is possible after (p->canStart) at first loop iteration
|
||||
and is unexpected after more Wait(freeSemaphore) iterations */
|
||||
if (p->exit)
|
||||
return;
|
||||
|
||||
Semaphore_Wait(&p->freeSemaphore);
|
||||
|
||||
// for faster stop : we check (p->stopWriting) after Wait(freeSemaphore)
|
||||
if (p->stopWriting)
|
||||
break;
|
||||
|
||||
BtFillBlock(mt, blockIndex++);
|
||||
|
||||
Semaphore_Release1(&p->filledSemaphore);
|
||||
}
|
||||
|
||||
// we stop HASH_THREAD here
|
||||
MtSync_StopWriting(&mt->hashSync);
|
||||
|
||||
// p->numBlocks_Sent = blockIndex;
|
||||
Event_Set(&p->wasStopped);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void MatchFinderMt_Construct(CMatchFinderMt *p)
|
||||
{
|
||||
p->hashBuf = NULL;
|
||||
MtSync_Construct(&p->hashSync);
|
||||
MtSync_Construct(&p->btSync);
|
||||
}
|
||||
|
||||
static void MatchFinderMt_FreeMem(CMatchFinderMt *p, ISzAllocPtr alloc)
|
||||
{
|
||||
ISzAlloc_Free(alloc, p->hashBuf);
|
||||
p->hashBuf = NULL;
|
||||
}
|
||||
|
||||
void MatchFinderMt_Destruct(CMatchFinderMt *p, ISzAllocPtr alloc)
|
||||
{
|
||||
/*
|
||||
HASH_THREAD can use CriticalSection(s) btSync.cs and hashSync.cs.
|
||||
So we must be sure that HASH_THREAD will not use CriticalSection(s)
|
||||
after deleting CriticalSection here.
|
||||
|
||||
we call ReleaseStream(p)
|
||||
that calls StopWriting(btSync)
|
||||
that calls StopWriting(hashSync), if it's required to stop HASH_THREAD.
|
||||
after StopWriting() it's safe to destruct MtSync(s) in any order */
|
||||
|
||||
MatchFinderMt_ReleaseStream(p);
|
||||
|
||||
MtSync_Destruct(&p->btSync);
|
||||
MtSync_Destruct(&p->hashSync);
|
||||
|
||||
LOG_ITER(
|
||||
printf("\nTree %9d * %7d iter = %9d = sum : bytes = %9d\n",
|
||||
(UInt32)(g_NumIters_Tree / 1000),
|
||||
(UInt32)(((UInt64)g_NumIters_Loop * 1000) / (g_NumIters_Tree + 1)),
|
||||
(UInt32)(g_NumIters_Loop / 1000),
|
||||
(UInt32)(g_NumIters_Bytes / 1000)
|
||||
));
|
||||
|
||||
MatchFinderMt_FreeMem(p, alloc);
|
||||
}
|
||||
|
||||
|
||||
#define kHashBufferSize (kMtHashBlockSize * kMtHashNumBlocks)
|
||||
#define kBtBufferSize (kMtBtBlockSize * kMtBtNumBlocks)
|
||||
|
||||
|
||||
static THREAD_FUNC_DECL HashThreadFunc2(void *p) { HashThreadFunc((CMatchFinderMt *)p); return 0; }
|
||||
static THREAD_FUNC_DECL BtThreadFunc2(void *p)
|
||||
{
|
||||
Byte allocaDummy[0x180];
|
||||
unsigned i = 0;
|
||||
for (i = 0; i < 16; i++)
|
||||
allocaDummy[i] = (Byte)0;
|
||||
if (allocaDummy[0] == 0)
|
||||
BtThreadFunc((CMatchFinderMt *)p);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
SRes MatchFinderMt_Create(CMatchFinderMt *p, UInt32 historySize, UInt32 keepAddBufferBefore,
|
||||
UInt32 matchMaxLen, UInt32 keepAddBufferAfter, ISzAllocPtr alloc)
|
||||
{
|
||||
CMatchFinder *mf = MF(p);
|
||||
p->historySize = historySize;
|
||||
if (kMtBtBlockSize <= matchMaxLen * 4)
|
||||
return SZ_ERROR_PARAM;
|
||||
if (!p->hashBuf)
|
||||
{
|
||||
p->hashBuf = (UInt32 *)ISzAlloc_Alloc(alloc, ((size_t)kHashBufferSize + (size_t)kBtBufferSize) * sizeof(UInt32));
|
||||
if (!p->hashBuf)
|
||||
return SZ_ERROR_MEM;
|
||||
p->btBuf = p->hashBuf + kHashBufferSize;
|
||||
}
|
||||
keepAddBufferBefore += (kHashBufferSize + kBtBufferSize);
|
||||
keepAddBufferAfter += kMtHashBlockSize;
|
||||
if (!MatchFinder_Create(mf, historySize, keepAddBufferBefore, matchMaxLen, keepAddBufferAfter, alloc))
|
||||
return SZ_ERROR_MEM;
|
||||
|
||||
RINOK(MtSync_Create(&p->hashSync, HashThreadFunc2, p))
|
||||
RINOK(MtSync_Create(&p->btSync, BtThreadFunc2, p))
|
||||
return SZ_OK;
|
||||
}
|
||||
|
||||
|
||||
SRes MatchFinderMt_InitMt(CMatchFinderMt *p)
|
||||
{
|
||||
RINOK(MtSync_Init(&p->hashSync, kMtHashNumBlocks))
|
||||
return MtSync_Init(&p->btSync, kMtBtNumBlocks);
|
||||
}
|
||||
|
||||
|
||||
static void MatchFinderMt_Init(CMatchFinderMt *p)
|
||||
{
|
||||
CMatchFinder *mf = MF(p);
|
||||
|
||||
p->btBufPos =
|
||||
p->btBufPosLimit = NULL;
|
||||
p->hashBufPos =
|
||||
p->hashBufPosLimit = 0;
|
||||
p->hashNumAvail = 0; // 21.03
|
||||
|
||||
p->failure_BT = False;
|
||||
|
||||
/* Init without data reading. We don't want to read data in this thread */
|
||||
MatchFinder_Init_4(mf);
|
||||
|
||||
MatchFinder_Init_LowHash(mf);
|
||||
|
||||
p->pointerToCurPos = Inline_MatchFinder_GetPointerToCurrentPos(mf);
|
||||
p->btNumAvailBytes = 0;
|
||||
p->failure_LZ_BT = False;
|
||||
// p->failure_LZ_LZ = False;
|
||||
|
||||
p->lzPos =
|
||||
1; // optimal smallest value
|
||||
// 0; // for debug: ignores match to start
|
||||
// kNormalizeAlign; // for debug
|
||||
|
||||
p->hash = mf->hash;
|
||||
p->fixedHashSize = mf->fixedHashSize;
|
||||
// p->hash4Mask = mf->hash4Mask;
|
||||
p->crc = mf->crc;
|
||||
// memcpy(p->crc, mf->crc, sizeof(mf->crc));
|
||||
|
||||
p->son = mf->son;
|
||||
p->matchMaxLen = mf->matchMaxLen;
|
||||
p->numHashBytes = mf->numHashBytes;
|
||||
|
||||
/* (mf->pos) and (mf->streamPos) were already initialized to 1 in MatchFinder_Init_4() */
|
||||
// mf->streamPos = mf->pos = 1; // optimal smallest value
|
||||
// 0; // for debug: ignores match to start
|
||||
// kNormalizeAlign; // for debug
|
||||
|
||||
/* we must init (p->pos = mf->pos) for BT, because
|
||||
BT code needs (p->pos == delta_value_for_empty_hash_record == mf->pos) */
|
||||
p->pos = mf->pos; // do not change it
|
||||
|
||||
p->cyclicBufferPos = (p->pos - CYC_TO_POS_OFFSET);
|
||||
p->cyclicBufferSize = mf->cyclicBufferSize;
|
||||
p->buffer = mf->buffer;
|
||||
p->cutValue = mf->cutValue;
|
||||
// p->son[0] = p->son[1] = 0; // unused: to init skipped record for speculated accesses.
|
||||
}
|
||||
|
||||
|
||||
/* ReleaseStream is required to finish multithreading */
|
||||
void MatchFinderMt_ReleaseStream(CMatchFinderMt *p)
|
||||
{
|
||||
// Sleep(1); // for debug
|
||||
MtSync_StopWriting(&p->btSync);
|
||||
// Sleep(200); // for debug
|
||||
/* p->MatchFinder->ReleaseStream(); */
|
||||
}
|
||||
|
||||
|
||||
Z7_NO_INLINE
|
||||
static UInt32 MatchFinderMt_GetNextBlock_Bt(CMatchFinderMt *p)
|
||||
{
|
||||
if (p->failure_LZ_BT)
|
||||
p->btBufPos = p->failureBuf;
|
||||
else
|
||||
{
|
||||
const UInt32 bi = MtSync_GetNextBlock(&p->btSync);
|
||||
const UInt32 *bt = p->btBuf + GET_BT_BLOCK_OFFSET(bi);
|
||||
{
|
||||
const UInt32 numItems = bt[0];
|
||||
p->btBufPosLimit = bt + numItems;
|
||||
p->btNumAvailBytes = bt[1];
|
||||
p->btBufPos = bt + 2;
|
||||
if (numItems < 2 || numItems > kMtBtBlockSize)
|
||||
{
|
||||
p->failureBuf[0] = 0;
|
||||
p->btBufPos = p->failureBuf;
|
||||
p->btBufPosLimit = p->failureBuf + 1;
|
||||
p->failure_LZ_BT = True;
|
||||
// p->btNumAvailBytes = 0;
|
||||
/* we don't want to decrease AvailBytes, that was load before.
|
||||
that can be unxepected for the code that have loaded anopther value before */
|
||||
}
|
||||
}
|
||||
|
||||
if (p->lzPos >= (UInt32)kMtMaxValForNormalize - (UInt32)kMtBtBlockSize)
|
||||
{
|
||||
/* we don't check (lzPos) over exact avail bytes in (btBuf).
|
||||
(fixedHashSize) is small, so normalization is fast */
|
||||
const UInt32 subValue = (p->lzPos - p->historySize - 1); // & ~(UInt32)(kNormalizeAlign - 1);
|
||||
p->lzPos -= subValue;
|
||||
MatchFinder_Normalize3(subValue, p->hash, p->fixedHashSize);
|
||||
}
|
||||
}
|
||||
return p->btNumAvailBytes;
|
||||
}
|
||||
|
||||
|
||||
|
||||
static const Byte * MatchFinderMt_GetPointerToCurrentPos(CMatchFinderMt *p)
|
||||
{
|
||||
return p->pointerToCurPos;
|
||||
}
|
||||
|
||||
|
||||
#define GET_NEXT_BLOCK_IF_REQUIRED if (p->btBufPos == p->btBufPosLimit) MatchFinderMt_GetNextBlock_Bt(p);
|
||||
|
||||
|
||||
static UInt32 MatchFinderMt_GetNumAvailableBytes(CMatchFinderMt *p)
|
||||
{
|
||||
if (p->btBufPos != p->btBufPosLimit)
|
||||
return p->btNumAvailBytes;
|
||||
return MatchFinderMt_GetNextBlock_Bt(p);
|
||||
}
|
||||
|
||||
|
||||
// #define CHECK_FAILURE_LZ(_match_, _pos_) if (_match_ >= _pos_) { p->failure_LZ_LZ = True; return d; }
|
||||
#define CHECK_FAILURE_LZ(_match_, _pos_)
|
||||
|
||||
static UInt32 * MixMatches2(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d)
|
||||
{
|
||||
UInt32 h2, c2;
|
||||
UInt32 *hash = p->hash;
|
||||
const Byte *cur = p->pointerToCurPos;
|
||||
const UInt32 m = p->lzPos;
|
||||
MT_HASH2_CALC
|
||||
|
||||
c2 = hash[h2];
|
||||
hash[h2] = m;
|
||||
|
||||
if (c2 >= matchMinPos)
|
||||
{
|
||||
CHECK_FAILURE_LZ(c2, m)
|
||||
if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0])
|
||||
{
|
||||
*d++ = 2;
|
||||
*d++ = m - c2 - 1;
|
||||
}
|
||||
}
|
||||
|
||||
return d;
|
||||
}
|
||||
|
||||
static UInt32 * MixMatches3(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d)
|
||||
{
|
||||
UInt32 h2, h3, c2, c3;
|
||||
UInt32 *hash = p->hash;
|
||||
const Byte *cur = p->pointerToCurPos;
|
||||
const UInt32 m = p->lzPos;
|
||||
MT_HASH3_CALC
|
||||
|
||||
c2 = hash[h2];
|
||||
c3 = (hash + kFix3HashSize)[h3];
|
||||
|
||||
hash[h2] = m;
|
||||
(hash + kFix3HashSize)[h3] = m;
|
||||
|
||||
if (c2 >= matchMinPos)
|
||||
{
|
||||
CHECK_FAILURE_LZ(c2, m)
|
||||
if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0])
|
||||
{
|
||||
d[1] = m - c2 - 1;
|
||||
if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 2] == cur[2])
|
||||
{
|
||||
d[0] = 3;
|
||||
return d + 2;
|
||||
}
|
||||
d[0] = 2;
|
||||
d += 2;
|
||||
}
|
||||
}
|
||||
|
||||
if (c3 >= matchMinPos)
|
||||
{
|
||||
CHECK_FAILURE_LZ(c3, m)
|
||||
if (cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0])
|
||||
{
|
||||
*d++ = 3;
|
||||
*d++ = m - c3 - 1;
|
||||
}
|
||||
}
|
||||
|
||||
return d;
|
||||
}
|
||||
|
||||
|
||||
#define INCREASE_LZ_POS p->lzPos++; p->pointerToCurPos++;
|
||||
|
||||
/*
|
||||
static
|
||||
UInt32* MatchFinderMt_GetMatches_Bt4(CMatchFinderMt *p, UInt32 *d)
|
||||
{
|
||||
const UInt32 *bt = p->btBufPos;
|
||||
const UInt32 len = *bt++;
|
||||
const UInt32 *btLim = bt + len;
|
||||
UInt32 matchMinPos;
|
||||
UInt32 avail = p->btNumAvailBytes - 1;
|
||||
p->btBufPos = btLim;
|
||||
|
||||
{
|
||||
p->btNumAvailBytes = avail;
|
||||
|
||||
#define BT_HASH_BYTES_MAX 5
|
||||
|
||||
matchMinPos = p->lzPos;
|
||||
|
||||
if (len != 0)
|
||||
matchMinPos -= bt[1];
|
||||
else if (avail < (BT_HASH_BYTES_MAX - 1) - 1)
|
||||
{
|
||||
INCREASE_LZ_POS
|
||||
return d;
|
||||
}
|
||||
else
|
||||
{
|
||||
const UInt32 hs = p->historySize;
|
||||
if (matchMinPos > hs)
|
||||
matchMinPos -= hs;
|
||||
else
|
||||
matchMinPos = 1;
|
||||
}
|
||||
}
|
||||
|
||||
for (;;)
|
||||
{
|
||||
|
||||
UInt32 h2, h3, c2, c3;
|
||||
UInt32 *hash = p->hash;
|
||||
const Byte *cur = p->pointerToCurPos;
|
||||
UInt32 m = p->lzPos;
|
||||
MT_HASH3_CALC
|
||||
|
||||
c2 = hash[h2];
|
||||
c3 = (hash + kFix3HashSize)[h3];
|
||||
|
||||
hash[h2] = m;
|
||||
(hash + kFix3HashSize)[h3] = m;
|
||||
|
||||
if (c2 >= matchMinPos && cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0])
|
||||
{
|
||||
d[1] = m - c2 - 1;
|
||||
if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 2] == cur[2])
|
||||
{
|
||||
d[0] = 3;
|
||||
d += 2;
|
||||
break;
|
||||
}
|
||||
// else
|
||||
{
|
||||
d[0] = 2;
|
||||
d += 2;
|
||||
}
|
||||
}
|
||||
if (c3 >= matchMinPos && cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0])
|
||||
{
|
||||
*d++ = 3;
|
||||
*d++ = m - c3 - 1;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (len != 0)
|
||||
{
|
||||
do
|
||||
{
|
||||
const UInt32 v0 = bt[0];
|
||||
const UInt32 v1 = bt[1];
|
||||
bt += 2;
|
||||
d[0] = v0;
|
||||
d[1] = v1;
|
||||
d += 2;
|
||||
}
|
||||
while (bt != btLim);
|
||||
}
|
||||
INCREASE_LZ_POS
|
||||
return d;
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
static UInt32 * MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d)
|
||||
{
|
||||
UInt32 h2, h3, /* h4, */ c2, c3 /* , c4 */;
|
||||
UInt32 *hash = p->hash;
|
||||
const Byte *cur = p->pointerToCurPos;
|
||||
const UInt32 m = p->lzPos;
|
||||
MT_HASH3_CALC
|
||||
// MT_HASH4_CALC
|
||||
c2 = hash[h2];
|
||||
c3 = (hash + kFix3HashSize)[h3];
|
||||
// c4 = (hash + kFix4HashSize)[h4];
|
||||
|
||||
hash[h2] = m;
|
||||
(hash + kFix3HashSize)[h3] = m;
|
||||
// (hash + kFix4HashSize)[h4] = m;
|
||||
|
||||
// #define BT5_USE_H2
|
||||
// #ifdef BT5_USE_H2
|
||||
if (c2 >= matchMinPos && cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0])
|
||||
{
|
||||
d[1] = m - c2 - 1;
|
||||
if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 2] == cur[2])
|
||||
{
|
||||
// d[0] = (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 3] == cur[3]) ? 4 : 3;
|
||||
// return d + 2;
|
||||
|
||||
if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 3] == cur[3])
|
||||
{
|
||||
d[0] = 4;
|
||||
return d + 2;
|
||||
}
|
||||
d[0] = 3;
|
||||
d += 2;
|
||||
|
||||
#ifdef BT5_USE_H4
|
||||
if (c4 >= matchMinPos)
|
||||
if (
|
||||
cur[(ptrdiff_t)c4 - (ptrdiff_t)m] == cur[0] &&
|
||||
cur[(ptrdiff_t)c4 - (ptrdiff_t)m + 3] == cur[3]
|
||||
)
|
||||
{
|
||||
*d++ = 4;
|
||||
*d++ = m - c4 - 1;
|
||||
}
|
||||
#endif
|
||||
return d;
|
||||
}
|
||||
d[0] = 2;
|
||||
d += 2;
|
||||
}
|
||||
// #endif
|
||||
|
||||
if (c3 >= matchMinPos && cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0])
|
||||
{
|
||||
d[1] = m - c3 - 1;
|
||||
if (cur[(ptrdiff_t)c3 - (ptrdiff_t)m + 3] == cur[3])
|
||||
{
|
||||
d[0] = 4;
|
||||
return d + 2;
|
||||
}
|
||||
d[0] = 3;
|
||||
d += 2;
|
||||
}
|
||||
|
||||
#ifdef BT5_USE_H4
|
||||
if (c4 >= matchMinPos)
|
||||
if (
|
||||
cur[(ptrdiff_t)c4 - (ptrdiff_t)m] == cur[0] &&
|
||||
cur[(ptrdiff_t)c4 - (ptrdiff_t)m + 3] == cur[3]
|
||||
)
|
||||
{
|
||||
*d++ = 4;
|
||||
*d++ = m - c4 - 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
return d;
|
||||
}
|
||||
|
||||
|
||||
static UInt32 * MatchFinderMt2_GetMatches(CMatchFinderMt *p, UInt32 *d)
|
||||
{
|
||||
const UInt32 *bt = p->btBufPos;
|
||||
const UInt32 len = *bt++;
|
||||
const UInt32 *btLim = bt + len;
|
||||
p->btBufPos = btLim;
|
||||
p->btNumAvailBytes--;
|
||||
INCREASE_LZ_POS
|
||||
{
|
||||
while (bt != btLim)
|
||||
{
|
||||
const UInt32 v0 = bt[0];
|
||||
const UInt32 v1 = bt[1];
|
||||
bt += 2;
|
||||
d[0] = v0;
|
||||
d[1] = v1;
|
||||
d += 2;
|
||||
}
|
||||
}
|
||||
return d;
|
||||
}
|
||||
|
||||
|
||||
|
||||
static UInt32 * MatchFinderMt_GetMatches(CMatchFinderMt *p, UInt32 *d)
|
||||
{
|
||||
const UInt32 *bt = p->btBufPos;
|
||||
UInt32 len = *bt++;
|
||||
const UInt32 avail = p->btNumAvailBytes - 1;
|
||||
p->btNumAvailBytes = avail;
|
||||
p->btBufPos = bt + len;
|
||||
if (len == 0)
|
||||
{
|
||||
#define BT_HASH_BYTES_MAX 5
|
||||
if (avail >= (BT_HASH_BYTES_MAX - 1) - 1)
|
||||
{
|
||||
UInt32 m = p->lzPos;
|
||||
if (m > p->historySize)
|
||||
m -= p->historySize;
|
||||
else
|
||||
m = 1;
|
||||
d = p->MixMatchesFunc(p, m, d);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
first match pair from BinTree: (match_len, match_dist),
|
||||
(match_len >= numHashBytes).
|
||||
MixMatchesFunc() inserts only hash matches that are nearer than (match_dist)
|
||||
*/
|
||||
d = p->MixMatchesFunc(p, p->lzPos - bt[1], d);
|
||||
// if (d) // check for failure
|
||||
do
|
||||
{
|
||||
const UInt32 v0 = bt[0];
|
||||
const UInt32 v1 = bt[1];
|
||||
bt += 2;
|
||||
d[0] = v0;
|
||||
d[1] = v1;
|
||||
d += 2;
|
||||
}
|
||||
while (len -= 2);
|
||||
}
|
||||
INCREASE_LZ_POS
|
||||
return d;
|
||||
}
|
||||
|
||||
#define SKIP_HEADER2_MT do { GET_NEXT_BLOCK_IF_REQUIRED
|
||||
#define SKIP_HEADER_MT(n) SKIP_HEADER2_MT if (p->btNumAvailBytes-- >= (n)) { const Byte *cur = p->pointerToCurPos; UInt32 *hash = p->hash;
|
||||
#define SKIP_FOOTER_MT } INCREASE_LZ_POS p->btBufPos += (size_t)*p->btBufPos + 1; } while (--num != 0);
|
||||
|
||||
static void MatchFinderMt0_Skip(CMatchFinderMt *p, UInt32 num)
|
||||
{
|
||||
SKIP_HEADER2_MT { p->btNumAvailBytes--;
|
||||
SKIP_FOOTER_MT
|
||||
}
|
||||
|
||||
static void MatchFinderMt2_Skip(CMatchFinderMt *p, UInt32 num)
|
||||
{
|
||||
SKIP_HEADER_MT(2)
|
||||
UInt32 h2;
|
||||
MT_HASH2_CALC
|
||||
hash[h2] = p->lzPos;
|
||||
SKIP_FOOTER_MT
|
||||
}
|
||||
|
||||
static void MatchFinderMt3_Skip(CMatchFinderMt *p, UInt32 num)
|
||||
{
|
||||
SKIP_HEADER_MT(3)
|
||||
UInt32 h2, h3;
|
||||
MT_HASH3_CALC
|
||||
(hash + kFix3HashSize)[h3] =
|
||||
hash[ h2] =
|
||||
p->lzPos;
|
||||
SKIP_FOOTER_MT
|
||||
}
|
||||
|
||||
/*
|
||||
// MatchFinderMt4_Skip() is similar to MatchFinderMt3_Skip().
|
||||
// The difference is that MatchFinderMt3_Skip() updates hash for last 3 bytes of stream.
|
||||
|
||||
static void MatchFinderMt4_Skip(CMatchFinderMt *p, UInt32 num)
|
||||
{
|
||||
SKIP_HEADER_MT(4)
|
||||
UInt32 h2, h3; // h4
|
||||
MT_HASH3_CALC
|
||||
// MT_HASH4_CALC
|
||||
// (hash + kFix4HashSize)[h4] =
|
||||
(hash + kFix3HashSize)[h3] =
|
||||
hash[ h2] =
|
||||
p->lzPos;
|
||||
SKIP_FOOTER_MT
|
||||
}
|
||||
*/
|
||||
|
||||
void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder2 *vTable)
|
||||
{
|
||||
vTable->Init = (Mf_Init_Func)MatchFinderMt_Init;
|
||||
vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinderMt_GetNumAvailableBytes;
|
||||
vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinderMt_GetPointerToCurrentPos;
|
||||
vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt_GetMatches;
|
||||
|
||||
switch (MF(p)->numHashBytes)
|
||||
{
|
||||
case 2:
|
||||
p->GetHeadsFunc = GetHeads2;
|
||||
p->MixMatchesFunc = (Mf_Mix_Matches)NULL;
|
||||
vTable->Skip = (Mf_Skip_Func)MatchFinderMt0_Skip;
|
||||
vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt2_GetMatches;
|
||||
break;
|
||||
case 3:
|
||||
p->GetHeadsFunc = MF(p)->bigHash ? GetHeads3b : GetHeads3;
|
||||
p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches2;
|
||||
vTable->Skip = (Mf_Skip_Func)MatchFinderMt2_Skip;
|
||||
break;
|
||||
case 4:
|
||||
p->GetHeadsFunc = MF(p)->bigHash ? GetHeads4b : GetHeads4;
|
||||
|
||||
// it's fast inline version of GetMatches()
|
||||
// vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt_GetMatches_Bt4;
|
||||
|
||||
p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches3;
|
||||
vTable->Skip = (Mf_Skip_Func)MatchFinderMt3_Skip;
|
||||
break;
|
||||
default:
|
||||
p->GetHeadsFunc = MF(p)->bigHash ? GetHeads5b : GetHeads5;
|
||||
p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches4;
|
||||
vTable->Skip =
|
||||
(Mf_Skip_Func)MatchFinderMt3_Skip;
|
||||
// (Mf_Skip_Func)MatchFinderMt4_Skip;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
#undef RINOK_THREAD
|
||||
#undef PRF
|
||||
#undef MF
|
||||
#undef GetUi24hi_from32
|
||||
#undef LOCK_BUFFER
|
||||
#undef UNLOCK_BUFFER
|
||||
109
extern/lzma/LzFindMt.h
vendored
Normal file
109
extern/lzma/LzFindMt.h
vendored
Normal file
@@ -0,0 +1,109 @@
|
||||
/* LzFindMt.h -- multithreaded Match finder for LZ algorithms
|
||||
2023-03-05 : Igor Pavlov : Public domain */
|
||||
|
||||
#ifndef ZIP7_INC_LZ_FIND_MT_H
|
||||
#define ZIP7_INC_LZ_FIND_MT_H
|
||||
|
||||
#include "LzFind.h"
|
||||
#include "Threads.h"
|
||||
|
||||
EXTERN_C_BEGIN
|
||||
|
||||
typedef struct
|
||||
{
|
||||
UInt32 numProcessedBlocks;
|
||||
CThread thread;
|
||||
UInt64 affinity;
|
||||
|
||||
BoolInt wasCreated;
|
||||
BoolInt needStart;
|
||||
BoolInt csWasInitialized;
|
||||
BoolInt csWasEntered;
|
||||
|
||||
BoolInt exit;
|
||||
BoolInt stopWriting;
|
||||
|
||||
CAutoResetEvent canStart;
|
||||
CAutoResetEvent wasStopped;
|
||||
CSemaphore freeSemaphore;
|
||||
CSemaphore filledSemaphore;
|
||||
CCriticalSection cs;
|
||||
// UInt32 numBlocks_Sent;
|
||||
} CMtSync;
|
||||
|
||||
typedef UInt32 * (*Mf_Mix_Matches)(void *p, UInt32 matchMinPos, UInt32 *distances);
|
||||
|
||||
/* kMtCacheLineDummy must be >= size_of_CPU_cache_line */
|
||||
#define kMtCacheLineDummy 128
|
||||
|
||||
typedef void (*Mf_GetHeads)(const Byte *buffer, UInt32 pos,
|
||||
UInt32 *hash, UInt32 hashMask, UInt32 *heads, UInt32 numHeads, const UInt32 *crc);
|
||||
|
||||
typedef struct
|
||||
{
|
||||
/* LZ */
|
||||
const Byte *pointerToCurPos;
|
||||
UInt32 *btBuf;
|
||||
const UInt32 *btBufPos;
|
||||
const UInt32 *btBufPosLimit;
|
||||
UInt32 lzPos;
|
||||
UInt32 btNumAvailBytes;
|
||||
|
||||
UInt32 *hash;
|
||||
UInt32 fixedHashSize;
|
||||
// UInt32 hash4Mask;
|
||||
UInt32 historySize;
|
||||
const UInt32 *crc;
|
||||
|
||||
Mf_Mix_Matches MixMatchesFunc;
|
||||
UInt32 failure_LZ_BT; // failure in BT transfered to LZ
|
||||
// UInt32 failure_LZ_LZ; // failure in LZ tables
|
||||
UInt32 failureBuf[1];
|
||||
// UInt32 crc[256];
|
||||
|
||||
/* LZ + BT */
|
||||
CMtSync btSync;
|
||||
Byte btDummy[kMtCacheLineDummy];
|
||||
|
||||
/* BT */
|
||||
UInt32 *hashBuf;
|
||||
UInt32 hashBufPos;
|
||||
UInt32 hashBufPosLimit;
|
||||
UInt32 hashNumAvail;
|
||||
UInt32 failure_BT;
|
||||
|
||||
|
||||
CLzRef *son;
|
||||
UInt32 matchMaxLen;
|
||||
UInt32 numHashBytes;
|
||||
UInt32 pos;
|
||||
const Byte *buffer;
|
||||
UInt32 cyclicBufferPos;
|
||||
UInt32 cyclicBufferSize; /* it must be = (historySize + 1) */
|
||||
UInt32 cutValue;
|
||||
|
||||
/* BT + Hash */
|
||||
CMtSync hashSync;
|
||||
/* Byte hashDummy[kMtCacheLineDummy]; */
|
||||
|
||||
/* Hash */
|
||||
Mf_GetHeads GetHeadsFunc;
|
||||
CMatchFinder *MatchFinder;
|
||||
// CMatchFinder MatchFinder;
|
||||
} CMatchFinderMt;
|
||||
|
||||
// only for Mt part
|
||||
void MatchFinderMt_Construct(CMatchFinderMt *p);
|
||||
void MatchFinderMt_Destruct(CMatchFinderMt *p, ISzAllocPtr alloc);
|
||||
|
||||
SRes MatchFinderMt_Create(CMatchFinderMt *p, UInt32 historySize, UInt32 keepAddBufferBefore,
|
||||
UInt32 matchMaxLen, UInt32 keepAddBufferAfter, ISzAllocPtr alloc);
|
||||
void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder2 *vTable);
|
||||
|
||||
/* call MatchFinderMt_InitMt() before IMatchFinder::Init() */
|
||||
SRes MatchFinderMt_InitMt(CMatchFinderMt *p);
|
||||
void MatchFinderMt_ReleaseStream(CMatchFinderMt *p);
|
||||
|
||||
EXTERN_C_END
|
||||
|
||||
#endif
|
||||
578
extern/lzma/LzFindOpt.c
vendored
Normal file
578
extern/lzma/LzFindOpt.c
vendored
Normal file
@@ -0,0 +1,578 @@
|
||||
/* LzFindOpt.c -- multithreaded Match finder for LZ algorithms
|
||||
2023-04-02 : Igor Pavlov : Public domain */
|
||||
|
||||
#include "Precomp.h"
|
||||
|
||||
#include "CpuArch.h"
|
||||
#include "LzFind.h"
|
||||
|
||||
// #include "LzFindMt.h"
|
||||
|
||||
// #define LOG_ITERS
|
||||
|
||||
// #define LOG_THREAD
|
||||
|
||||
#ifdef LOG_THREAD
|
||||
#include <stdio.h>
|
||||
#define PRF(x) x
|
||||
#else
|
||||
// #define PRF(x)
|
||||
#endif
|
||||
|
||||
#ifdef LOG_ITERS
|
||||
#include <stdio.h>
|
||||
UInt64 g_NumIters_Tree;
|
||||
UInt64 g_NumIters_Loop;
|
||||
UInt64 g_NumIters_Bytes;
|
||||
#define LOG_ITER(x) x
|
||||
#else
|
||||
#define LOG_ITER(x)
|
||||
#endif
|
||||
|
||||
// ---------- BT THREAD ----------
|
||||
|
||||
#define USE_SON_PREFETCH
|
||||
#define USE_LONG_MATCH_OPT
|
||||
|
||||
#define kEmptyHashValue 0
|
||||
|
||||
// #define CYC_TO_POS_OFFSET 0
|
||||
|
||||
// #define CYC_TO_POS_OFFSET 1 // for debug
|
||||
|
||||
/*
|
||||
Z7_NO_INLINE
|
||||
UInt32 * Z7_FASTCALL GetMatchesSpecN_1(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,
|
||||
UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size, UInt32 *posRes)
|
||||
{
|
||||
do
|
||||
{
|
||||
UInt32 delta;
|
||||
if (hash == size)
|
||||
break;
|
||||
delta = *hash++;
|
||||
|
||||
if (delta == 0 || delta > (UInt32)pos)
|
||||
return NULL;
|
||||
|
||||
lenLimit++;
|
||||
|
||||
if (delta == (UInt32)pos)
|
||||
{
|
||||
CLzRef *ptr1 = son + ((size_t)pos << 1) - CYC_TO_POS_OFFSET * 2;
|
||||
*d++ = 0;
|
||||
ptr1[0] = kEmptyHashValue;
|
||||
ptr1[1] = kEmptyHashValue;
|
||||
}
|
||||
else
|
||||
{
|
||||
UInt32 *_distances = ++d;
|
||||
|
||||
CLzRef *ptr0 = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2 + 1;
|
||||
CLzRef *ptr1 = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2;
|
||||
|
||||
const Byte *len0 = cur, *len1 = cur;
|
||||
UInt32 cutValue = _cutValue;
|
||||
const Byte *maxLen = cur + _maxLen;
|
||||
|
||||
for (LOG_ITER(g_NumIters_Tree++);;)
|
||||
{
|
||||
LOG_ITER(g_NumIters_Loop++);
|
||||
{
|
||||
const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
|
||||
CLzRef *pair = son + ((size_t)(((ptrdiff_t)pos - CYC_TO_POS_OFFSET) + diff) << 1);
|
||||
const Byte *len = (len0 < len1 ? len0 : len1);
|
||||
|
||||
#ifdef USE_SON_PREFETCH
|
||||
const UInt32 pair0 = *pair;
|
||||
#endif
|
||||
|
||||
if (len[diff] == len[0])
|
||||
{
|
||||
if (++len != lenLimit && len[diff] == len[0])
|
||||
while (++len != lenLimit)
|
||||
{
|
||||
LOG_ITER(g_NumIters_Bytes++);
|
||||
if (len[diff] != len[0])
|
||||
break;
|
||||
}
|
||||
if (maxLen < len)
|
||||
{
|
||||
maxLen = len;
|
||||
*d++ = (UInt32)(len - cur);
|
||||
*d++ = delta - 1;
|
||||
|
||||
if (len == lenLimit)
|
||||
{
|
||||
const UInt32 pair1 = pair[1];
|
||||
*ptr1 =
|
||||
#ifdef USE_SON_PREFETCH
|
||||
pair0;
|
||||
#else
|
||||
pair[0];
|
||||
#endif
|
||||
*ptr0 = pair1;
|
||||
|
||||
_distances[-1] = (UInt32)(d - _distances);
|
||||
|
||||
#ifdef USE_LONG_MATCH_OPT
|
||||
|
||||
if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)
|
||||
break;
|
||||
|
||||
{
|
||||
for (;;)
|
||||
{
|
||||
hash++;
|
||||
pos++;
|
||||
cur++;
|
||||
lenLimit++;
|
||||
{
|
||||
CLzRef *ptr = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2;
|
||||
#if 0
|
||||
*(UInt64 *)(void *)ptr = ((const UInt64 *)(const void *)ptr)[diff];
|
||||
#else
|
||||
const UInt32 p0 = ptr[0 + (diff * 2)];
|
||||
const UInt32 p1 = ptr[1 + (diff * 2)];
|
||||
ptr[0] = p0;
|
||||
ptr[1] = p1;
|
||||
// ptr[0] = ptr[0 + (diff * 2)];
|
||||
// ptr[1] = ptr[1 + (diff * 2)];
|
||||
#endif
|
||||
}
|
||||
// PrintSon(son + 2, pos - 1);
|
||||
// printf("\npos = %x delta = %x\n", pos, delta);
|
||||
len++;
|
||||
*d++ = 2;
|
||||
*d++ = (UInt32)(len - cur);
|
||||
*d++ = delta - 1;
|
||||
if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)
|
||||
break;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
const UInt32 curMatch = (UInt32)pos - delta; // (UInt32)(pos + diff);
|
||||
if (len[diff] < len[0])
|
||||
{
|
||||
delta = pair[1];
|
||||
if (delta >= curMatch)
|
||||
return NULL;
|
||||
*ptr1 = curMatch;
|
||||
ptr1 = pair + 1;
|
||||
len1 = len;
|
||||
}
|
||||
else
|
||||
{
|
||||
delta = *pair;
|
||||
if (delta >= curMatch)
|
||||
return NULL;
|
||||
*ptr0 = curMatch;
|
||||
ptr0 = pair;
|
||||
len0 = len;
|
||||
}
|
||||
|
||||
delta = (UInt32)pos - delta;
|
||||
|
||||
if (--cutValue == 0 || delta >= pos)
|
||||
{
|
||||
*ptr0 = *ptr1 = kEmptyHashValue;
|
||||
_distances[-1] = (UInt32)(d - _distances);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
} // for (tree iterations)
|
||||
}
|
||||
pos++;
|
||||
cur++;
|
||||
}
|
||||
while (d < limit);
|
||||
*posRes = (UInt32)pos;
|
||||
return d;
|
||||
}
|
||||
*/
|
||||
|
||||
/* define cbs if you use 2 functions.
|
||||
GetMatchesSpecN_1() : (pos < _cyclicBufferSize)
|
||||
GetMatchesSpecN_2() : (pos >= _cyclicBufferSize)
|
||||
|
||||
do not define cbs if you use 1 function:
|
||||
GetMatchesSpecN_2()
|
||||
*/
|
||||
|
||||
// #define cbs _cyclicBufferSize
|
||||
|
||||
/*
|
||||
we use size_t for (pos) and (_cyclicBufferPos_ instead of UInt32
|
||||
to eliminate "movsx" BUG in old MSVC x64 compiler.
|
||||
*/
|
||||
|
||||
UInt32 * Z7_FASTCALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,
|
||||
UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,
|
||||
size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,
|
||||
UInt32 *posRes);
|
||||
|
||||
Z7_NO_INLINE
|
||||
UInt32 * Z7_FASTCALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,
|
||||
UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,
|
||||
size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,
|
||||
UInt32 *posRes)
|
||||
{
|
||||
do // while (hash != size)
|
||||
{
|
||||
UInt32 delta;
|
||||
|
||||
#ifndef cbs
|
||||
UInt32 cbs;
|
||||
#endif
|
||||
|
||||
if (hash == size)
|
||||
break;
|
||||
|
||||
delta = *hash++;
|
||||
|
||||
if (delta == 0)
|
||||
return NULL;
|
||||
|
||||
lenLimit++;
|
||||
|
||||
#ifndef cbs
|
||||
cbs = _cyclicBufferSize;
|
||||
if ((UInt32)pos < cbs)
|
||||
{
|
||||
if (delta > (UInt32)pos)
|
||||
return NULL;
|
||||
cbs = (UInt32)pos;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (delta >= cbs)
|
||||
{
|
||||
CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
|
||||
*d++ = 0;
|
||||
ptr1[0] = kEmptyHashValue;
|
||||
ptr1[1] = kEmptyHashValue;
|
||||
}
|
||||
else
|
||||
{
|
||||
UInt32 *_distances = ++d;
|
||||
|
||||
CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
|
||||
CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
|
||||
|
||||
UInt32 cutValue = _cutValue;
|
||||
const Byte *len0 = cur, *len1 = cur;
|
||||
const Byte *maxLen = cur + _maxLen;
|
||||
|
||||
// if (cutValue == 0) { *ptr0 = *ptr1 = kEmptyHashValue; } else
|
||||
for (LOG_ITER(g_NumIters_Tree++);;)
|
||||
{
|
||||
LOG_ITER(g_NumIters_Loop++);
|
||||
{
|
||||
// SPEC code
|
||||
CLzRef *pair = son + ((size_t)((ptrdiff_t)_cyclicBufferPos - (ptrdiff_t)delta
|
||||
+ (ptrdiff_t)(UInt32)(_cyclicBufferPos < delta ? cbs : 0)
|
||||
) << 1);
|
||||
|
||||
const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
|
||||
const Byte *len = (len0 < len1 ? len0 : len1);
|
||||
|
||||
#ifdef USE_SON_PREFETCH
|
||||
const UInt32 pair0 = *pair;
|
||||
#endif
|
||||
|
||||
if (len[diff] == len[0])
|
||||
{
|
||||
if (++len != lenLimit && len[diff] == len[0])
|
||||
while (++len != lenLimit)
|
||||
{
|
||||
LOG_ITER(g_NumIters_Bytes++);
|
||||
if (len[diff] != len[0])
|
||||
break;
|
||||
}
|
||||
if (maxLen < len)
|
||||
{
|
||||
maxLen = len;
|
||||
*d++ = (UInt32)(len - cur);
|
||||
*d++ = delta - 1;
|
||||
|
||||
if (len == lenLimit)
|
||||
{
|
||||
const UInt32 pair1 = pair[1];
|
||||
*ptr1 =
|
||||
#ifdef USE_SON_PREFETCH
|
||||
pair0;
|
||||
#else
|
||||
pair[0];
|
||||
#endif
|
||||
*ptr0 = pair1;
|
||||
|
||||
_distances[-1] = (UInt32)(d - _distances);
|
||||
|
||||
#ifdef USE_LONG_MATCH_OPT
|
||||
|
||||
if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)
|
||||
break;
|
||||
|
||||
{
|
||||
for (;;)
|
||||
{
|
||||
*d++ = 2;
|
||||
*d++ = (UInt32)(lenLimit - cur);
|
||||
*d++ = delta - 1;
|
||||
cur++;
|
||||
lenLimit++;
|
||||
// SPEC
|
||||
_cyclicBufferPos++;
|
||||
{
|
||||
// SPEC code
|
||||
CLzRef *dest = son + ((size_t)(_cyclicBufferPos) << 1);
|
||||
const CLzRef *src = dest + ((diff
|
||||
+ (ptrdiff_t)(UInt32)((_cyclicBufferPos < delta) ? cbs : 0)) << 1);
|
||||
// CLzRef *ptr = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2;
|
||||
#if 0
|
||||
*(UInt64 *)(void *)dest = *((const UInt64 *)(const void *)src);
|
||||
#else
|
||||
const UInt32 p0 = src[0];
|
||||
const UInt32 p1 = src[1];
|
||||
dest[0] = p0;
|
||||
dest[1] = p1;
|
||||
#endif
|
||||
}
|
||||
pos++;
|
||||
hash++;
|
||||
if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)
|
||||
break;
|
||||
} // for() end for long matches
|
||||
}
|
||||
#endif
|
||||
|
||||
break; // break from TREE iterations
|
||||
}
|
||||
}
|
||||
}
|
||||
{
|
||||
const UInt32 curMatch = (UInt32)pos - delta; // (UInt32)(pos + diff);
|
||||
if (len[diff] < len[0])
|
||||
{
|
||||
delta = pair[1];
|
||||
*ptr1 = curMatch;
|
||||
ptr1 = pair + 1;
|
||||
len1 = len;
|
||||
if (delta >= curMatch)
|
||||
return NULL;
|
||||
}
|
||||
else
|
||||
{
|
||||
delta = *pair;
|
||||
*ptr0 = curMatch;
|
||||
ptr0 = pair;
|
||||
len0 = len;
|
||||
if (delta >= curMatch)
|
||||
return NULL;
|
||||
}
|
||||
delta = (UInt32)pos - delta;
|
||||
|
||||
if (--cutValue == 0 || delta >= cbs)
|
||||
{
|
||||
*ptr0 = *ptr1 = kEmptyHashValue;
|
||||
_distances[-1] = (UInt32)(d - _distances);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
} // for (tree iterations)
|
||||
}
|
||||
pos++;
|
||||
_cyclicBufferPos++;
|
||||
cur++;
|
||||
}
|
||||
while (d < limit);
|
||||
*posRes = (UInt32)pos;
|
||||
return d;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
typedef UInt32 uint32plus; // size_t
|
||||
|
||||
UInt32 * Z7_FASTCALL GetMatchesSpecN_3(uint32plus lenLimit, size_t pos, const Byte *cur, CLzRef *son,
|
||||
UInt32 _cutValue, UInt32 *d, uint32plus _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,
|
||||
size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,
|
||||
UInt32 *posRes)
|
||||
{
|
||||
do // while (hash != size)
|
||||
{
|
||||
UInt32 delta;
|
||||
|
||||
#ifndef cbs
|
||||
UInt32 cbs;
|
||||
#endif
|
||||
|
||||
if (hash == size)
|
||||
break;
|
||||
|
||||
delta = *hash++;
|
||||
|
||||
if (delta == 0)
|
||||
return NULL;
|
||||
|
||||
#ifndef cbs
|
||||
cbs = _cyclicBufferSize;
|
||||
if ((UInt32)pos < cbs)
|
||||
{
|
||||
if (delta > (UInt32)pos)
|
||||
return NULL;
|
||||
cbs = (UInt32)pos;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (delta >= cbs)
|
||||
{
|
||||
CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
|
||||
*d++ = 0;
|
||||
ptr1[0] = kEmptyHashValue;
|
||||
ptr1[1] = kEmptyHashValue;
|
||||
}
|
||||
else
|
||||
{
|
||||
CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
|
||||
CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
|
||||
UInt32 *_distances = ++d;
|
||||
uint32plus len0 = 0, len1 = 0;
|
||||
UInt32 cutValue = _cutValue;
|
||||
uint32plus maxLen = _maxLen;
|
||||
// lenLimit++; // const Byte *lenLimit = cur + _lenLimit;
|
||||
|
||||
for (LOG_ITER(g_NumIters_Tree++);;)
|
||||
{
|
||||
LOG_ITER(g_NumIters_Loop++);
|
||||
{
|
||||
// const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
|
||||
CLzRef *pair = son + ((size_t)((ptrdiff_t)_cyclicBufferPos - delta
|
||||
+ (ptrdiff_t)(UInt32)(_cyclicBufferPos < delta ? cbs : 0)
|
||||
) << 1);
|
||||
const Byte *pb = cur - delta;
|
||||
uint32plus len = (len0 < len1 ? len0 : len1);
|
||||
|
||||
#ifdef USE_SON_PREFETCH
|
||||
const UInt32 pair0 = *pair;
|
||||
#endif
|
||||
|
||||
if (pb[len] == cur[len])
|
||||
{
|
||||
if (++len != lenLimit && pb[len] == cur[len])
|
||||
while (++len != lenLimit)
|
||||
if (pb[len] != cur[len])
|
||||
break;
|
||||
if (maxLen < len)
|
||||
{
|
||||
maxLen = len;
|
||||
*d++ = (UInt32)len;
|
||||
*d++ = delta - 1;
|
||||
if (len == lenLimit)
|
||||
{
|
||||
{
|
||||
const UInt32 pair1 = pair[1];
|
||||
*ptr0 = pair1;
|
||||
*ptr1 =
|
||||
#ifdef USE_SON_PREFETCH
|
||||
pair0;
|
||||
#else
|
||||
pair[0];
|
||||
#endif
|
||||
}
|
||||
|
||||
_distances[-1] = (UInt32)(d - _distances);
|
||||
|
||||
#ifdef USE_LONG_MATCH_OPT
|
||||
|
||||
if (hash == size || *hash != delta || pb[lenLimit] != cur[lenLimit] || d >= limit)
|
||||
break;
|
||||
|
||||
{
|
||||
const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
|
||||
for (;;)
|
||||
{
|
||||
*d++ = 2;
|
||||
*d++ = (UInt32)lenLimit;
|
||||
*d++ = delta - 1;
|
||||
_cyclicBufferPos++;
|
||||
{
|
||||
CLzRef *dest = son + ((size_t)_cyclicBufferPos << 1);
|
||||
const CLzRef *src = dest + ((diff +
|
||||
(ptrdiff_t)(UInt32)(_cyclicBufferPos < delta ? cbs : 0)) << 1);
|
||||
#if 0
|
||||
*(UInt64 *)(void *)dest = *((const UInt64 *)(const void *)src);
|
||||
#else
|
||||
const UInt32 p0 = src[0];
|
||||
const UInt32 p1 = src[1];
|
||||
dest[0] = p0;
|
||||
dest[1] = p1;
|
||||
#endif
|
||||
}
|
||||
hash++;
|
||||
pos++;
|
||||
cur++;
|
||||
pb++;
|
||||
if (hash == size || *hash != delta || pb[lenLimit] != cur[lenLimit] || d >= limit)
|
||||
break;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
{
|
||||
const UInt32 curMatch = (UInt32)pos - delta;
|
||||
if (pb[len] < cur[len])
|
||||
{
|
||||
delta = pair[1];
|
||||
*ptr1 = curMatch;
|
||||
ptr1 = pair + 1;
|
||||
len1 = len;
|
||||
}
|
||||
else
|
||||
{
|
||||
delta = *pair;
|
||||
*ptr0 = curMatch;
|
||||
ptr0 = pair;
|
||||
len0 = len;
|
||||
}
|
||||
|
||||
{
|
||||
if (delta >= curMatch)
|
||||
return NULL;
|
||||
delta = (UInt32)pos - delta;
|
||||
if (delta >= cbs
|
||||
// delta >= _cyclicBufferSize || delta >= pos
|
||||
|| --cutValue == 0)
|
||||
{
|
||||
*ptr0 = *ptr1 = kEmptyHashValue;
|
||||
_distances[-1] = (UInt32)(d - _distances);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} // for (tree iterations)
|
||||
}
|
||||
pos++;
|
||||
_cyclicBufferPos++;
|
||||
cur++;
|
||||
}
|
||||
while (d < limit);
|
||||
*posRes = (UInt32)pos;
|
||||
return d;
|
||||
}
|
||||
*/
|
||||
66
extern/lzma/LzHash.h
vendored
66
extern/lzma/LzHash.h
vendored
@@ -1,54 +1,34 @@
|
||||
/* LzHash.h -- HASH functions for LZ algorithms
|
||||
2008-10-04 : Igor Pavlov : Public domain */
|
||||
/* LzHash.h -- HASH constants for LZ algorithms
|
||||
2023-03-05 : Igor Pavlov : Public domain */
|
||||
|
||||
#ifndef __LZHASH_H
|
||||
#define __LZHASH_H
|
||||
#ifndef ZIP7_INC_LZ_HASH_H
|
||||
#define ZIP7_INC_LZ_HASH_H
|
||||
|
||||
/*
|
||||
(kHash2Size >= (1 << 8)) : Required
|
||||
(kHash3Size >= (1 << 16)) : Required
|
||||
*/
|
||||
|
||||
#define kHash2Size (1 << 10)
|
||||
#define kHash3Size (1 << 16)
|
||||
#define kHash4Size (1 << 20)
|
||||
// #define kHash4Size (1 << 20)
|
||||
|
||||
#define kFix3HashSize (kHash2Size)
|
||||
#define kFix4HashSize (kHash2Size + kHash3Size)
|
||||
#define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size)
|
||||
// #define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size)
|
||||
|
||||
#define HASH2_CALC hashValue = cur[0] | ((UInt32)cur[1] << 8);
|
||||
/*
|
||||
We use up to 3 crc values for hash:
|
||||
crc0
|
||||
crc1 << Shift_1
|
||||
crc2 << Shift_2
|
||||
(Shift_1 = 5) and (Shift_2 = 10) is good tradeoff.
|
||||
Small values for Shift are not good for collision rate.
|
||||
Big value for Shift_2 increases the minimum size
|
||||
of hash table, that will be slow for small files.
|
||||
*/
|
||||
|
||||
#define HASH3_CALC { \
|
||||
UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
|
||||
hash2Value = temp & (kHash2Size - 1); \
|
||||
hashValue = (temp ^ ((UInt32)cur[2] << 8)) & p->hashMask; }
|
||||
|
||||
#define HASH4_CALC { \
|
||||
UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
|
||||
hash2Value = temp & (kHash2Size - 1); \
|
||||
hash3Value = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); \
|
||||
hashValue = (temp ^ ((UInt32)cur[2] << 8) ^ (p->crc[cur[3]] << 5)) & p->hashMask; }
|
||||
|
||||
#define HASH5_CALC { \
|
||||
UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
|
||||
hash2Value = temp & (kHash2Size - 1); \
|
||||
hash3Value = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); \
|
||||
hash4Value = (temp ^ ((UInt32)cur[2] << 8) ^ (p->crc[cur[3]] << 5)); \
|
||||
hashValue = (hash4Value ^ (p->crc[cur[4]] << 3)) & p->hashMask; \
|
||||
hash4Value &= (kHash4Size - 1); }
|
||||
|
||||
/* #define HASH_ZIP_CALC hashValue = ((cur[0] | ((UInt32)cur[1] << 8)) ^ p->crc[cur[2]]) & 0xFFFF; */
|
||||
#define HASH_ZIP_CALC hashValue = ((cur[2] | ((UInt32)cur[0] << 8)) ^ p->crc[cur[1]]) & 0xFFFF;
|
||||
|
||||
|
||||
#define MT_HASH2_CALC \
|
||||
hash2Value = (p->crc[cur[0]] ^ cur[1]) & (kHash2Size - 1);
|
||||
|
||||
#define MT_HASH3_CALC { \
|
||||
UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
|
||||
hash2Value = temp & (kHash2Size - 1); \
|
||||
hash3Value = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); }
|
||||
|
||||
#define MT_HASH4_CALC { \
|
||||
UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
|
||||
hash2Value = temp & (kHash2Size - 1); \
|
||||
hash3Value = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); \
|
||||
hash4Value = (temp ^ ((UInt32)cur[2] << 8) ^ (p->crc[cur[3]] << 5)) & (kHash4Size - 1); }
|
||||
#define kLzHash_CrcShift_1 5
|
||||
#define kLzHash_CrcShift_2 10
|
||||
|
||||
#endif
|
||||
|
||||
1070
extern/lzma/LzmaDec.c
vendored
1070
extern/lzma/LzmaDec.c
vendored
@@ -1,81 +1,107 @@
|
||||
/* LzmaDec.c -- LZMA Decoder
|
||||
2008-11-06 : Igor Pavlov : Public domain */
|
||||
2023-04-07 : Igor Pavlov : Public domain */
|
||||
|
||||
#include "LzmaDec.h"
|
||||
#include "Precomp.h"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#define kNumTopBits 24
|
||||
#define kTopValue ((UInt32)1 << kNumTopBits)
|
||||
/* #include "CpuArch.h" */
|
||||
#include "LzmaDec.h"
|
||||
|
||||
// #define kNumTopBits 24
|
||||
#define kTopValue ((UInt32)1 << 24)
|
||||
|
||||
#define kNumBitModelTotalBits 11
|
||||
#define kBitModelTotal (1 << kNumBitModelTotalBits)
|
||||
#define kNumMoveBits 5
|
||||
|
||||
#define RC_INIT_SIZE 5
|
||||
|
||||
#ifndef Z7_LZMA_DEC_OPT
|
||||
|
||||
#define kNumMoveBits 5
|
||||
#define NORMALIZE if (range < kTopValue) { range <<= 8; code = (code << 8) | (*buf++); }
|
||||
|
||||
#define IF_BIT_0(p) ttt = *(p); NORMALIZE; bound = (range >> kNumBitModelTotalBits) * ttt; if (code < bound)
|
||||
#define IF_BIT_0(p) ttt = *(p); NORMALIZE; bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound)
|
||||
#define UPDATE_0(p) range = bound; *(p) = (CLzmaProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits));
|
||||
#define UPDATE_1(p) range -= bound; code -= bound; *(p) = (CLzmaProb)(ttt - (ttt >> kNumMoveBits));
|
||||
#define GET_BIT2(p, i, A0, A1) IF_BIT_0(p) \
|
||||
{ UPDATE_0(p); i = (i + i); A0; } else \
|
||||
{ UPDATE_1(p); i = (i + i) + 1; A1; }
|
||||
#define GET_BIT(p, i) GET_BIT2(p, i, ; , ;)
|
||||
{ UPDATE_0(p) i = (i + i); A0; } else \
|
||||
{ UPDATE_1(p) i = (i + i) + 1; A1; }
|
||||
|
||||
#define TREE_GET_BIT(probs, i) { GET_BIT2(probs + i, i, ;, ;); }
|
||||
|
||||
#define REV_BIT(p, i, A0, A1) IF_BIT_0(p + i) \
|
||||
{ UPDATE_0(p + i) A0; } else \
|
||||
{ UPDATE_1(p + i) A1; }
|
||||
#define REV_BIT_VAR( p, i, m) REV_BIT(p, i, i += m; m += m, m += m; i += m; )
|
||||
#define REV_BIT_CONST(p, i, m) REV_BIT(p, i, i += m; , i += m * 2; )
|
||||
#define REV_BIT_LAST( p, i, m) REV_BIT(p, i, i -= m , ; )
|
||||
|
||||
#define TREE_GET_BIT(probs, i) { GET_BIT((probs + i), i); }
|
||||
#define TREE_DECODE(probs, limit, i) \
|
||||
{ i = 1; do { TREE_GET_BIT(probs, i); } while (i < limit); i -= limit; }
|
||||
|
||||
/* #define _LZMA_SIZE_OPT */
|
||||
/* #define Z7_LZMA_SIZE_OPT */
|
||||
|
||||
#ifdef _LZMA_SIZE_OPT
|
||||
#ifdef Z7_LZMA_SIZE_OPT
|
||||
#define TREE_6_DECODE(probs, i) TREE_DECODE(probs, (1 << 6), i)
|
||||
#else
|
||||
#define TREE_6_DECODE(probs, i) \
|
||||
{ i = 1; \
|
||||
TREE_GET_BIT(probs, i); \
|
||||
TREE_GET_BIT(probs, i); \
|
||||
TREE_GET_BIT(probs, i); \
|
||||
TREE_GET_BIT(probs, i); \
|
||||
TREE_GET_BIT(probs, i); \
|
||||
TREE_GET_BIT(probs, i); \
|
||||
TREE_GET_BIT(probs, i) \
|
||||
TREE_GET_BIT(probs, i) \
|
||||
TREE_GET_BIT(probs, i) \
|
||||
TREE_GET_BIT(probs, i) \
|
||||
TREE_GET_BIT(probs, i) \
|
||||
TREE_GET_BIT(probs, i) \
|
||||
i -= 0x40; }
|
||||
#endif
|
||||
|
||||
#define NORMALIZE_CHECK if (range < kTopValue) { if (buf >= bufLimit) return DUMMY_ERROR; range <<= 8; code = (code << 8) | (*buf++); }
|
||||
#define NORMAL_LITER_DEC TREE_GET_BIT(prob, symbol)
|
||||
#define MATCHED_LITER_DEC \
|
||||
matchByte += matchByte; \
|
||||
bit = offs; \
|
||||
offs &= matchByte; \
|
||||
probLit = prob + (offs + bit + symbol); \
|
||||
GET_BIT2(probLit, symbol, offs ^= bit; , ;)
|
||||
|
||||
#define IF_BIT_0_CHECK(p) ttt = *(p); NORMALIZE_CHECK; bound = (range >> kNumBitModelTotalBits) * ttt; if (code < bound)
|
||||
#endif // Z7_LZMA_DEC_OPT
|
||||
|
||||
|
||||
#define NORMALIZE_CHECK if (range < kTopValue) { if (buf >= bufLimit) return DUMMY_INPUT_EOF; range <<= 8; code = (code << 8) | (*buf++); }
|
||||
|
||||
#define IF_BIT_0_CHECK(p) ttt = *(p); NORMALIZE_CHECK bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound)
|
||||
#define UPDATE_0_CHECK range = bound;
|
||||
#define UPDATE_1_CHECK range -= bound; code -= bound;
|
||||
#define GET_BIT2_CHECK(p, i, A0, A1) IF_BIT_0_CHECK(p) \
|
||||
{ UPDATE_0_CHECK; i = (i + i); A0; } else \
|
||||
{ UPDATE_1_CHECK; i = (i + i) + 1; A1; }
|
||||
{ UPDATE_0_CHECK i = (i + i); A0; } else \
|
||||
{ UPDATE_1_CHECK i = (i + i) + 1; A1; }
|
||||
#define GET_BIT_CHECK(p, i) GET_BIT2_CHECK(p, i, ; , ;)
|
||||
#define TREE_DECODE_CHECK(probs, limit, i) \
|
||||
{ i = 1; do { GET_BIT_CHECK(probs + i, i) } while (i < limit); i -= limit; }
|
||||
|
||||
|
||||
#define REV_BIT_CHECK(p, i, m) IF_BIT_0_CHECK(p + i) \
|
||||
{ UPDATE_0_CHECK i += m; m += m; } else \
|
||||
{ UPDATE_1_CHECK m += m; i += m; }
|
||||
|
||||
|
||||
#define kNumPosBitsMax 4
|
||||
#define kNumPosStatesMax (1 << kNumPosBitsMax)
|
||||
|
||||
#define kLenNumLowBits 3
|
||||
#define kLenNumLowSymbols (1 << kLenNumLowBits)
|
||||
#define kLenNumMidBits 3
|
||||
#define kLenNumMidSymbols (1 << kLenNumMidBits)
|
||||
#define kLenNumHighBits 8
|
||||
#define kLenNumHighSymbols (1 << kLenNumHighBits)
|
||||
|
||||
#define LenChoice 0
|
||||
#define LenChoice2 (LenChoice + 1)
|
||||
#define LenLow (LenChoice2 + 1)
|
||||
#define LenMid (LenLow + (kNumPosStatesMax << kLenNumLowBits))
|
||||
#define LenHigh (LenMid + (kNumPosStatesMax << kLenNumMidBits))
|
||||
#define LenLow 0
|
||||
#define LenHigh (LenLow + 2 * (kNumPosStatesMax << kLenNumLowBits))
|
||||
#define kNumLenProbs (LenHigh + kLenNumHighSymbols)
|
||||
|
||||
#define LenChoice LenLow
|
||||
#define LenChoice2 (LenLow + (1 << kLenNumLowBits))
|
||||
|
||||
#define kNumStates 12
|
||||
#define kNumStates2 16
|
||||
#define kNumLitStates 7
|
||||
|
||||
#define kStartPosModelIndex 4
|
||||
@@ -89,60 +115,130 @@
|
||||
#define kAlignTableSize (1 << kNumAlignBits)
|
||||
|
||||
#define kMatchMinLen 2
|
||||
#define kMatchSpecLenStart (kMatchMinLen + kLenNumLowSymbols + kLenNumMidSymbols + kLenNumHighSymbols)
|
||||
#define kMatchSpecLenStart (kMatchMinLen + kLenNumLowSymbols * 2 + kLenNumHighSymbols)
|
||||
|
||||
#define IsMatch 0
|
||||
#define IsRep (IsMatch + (kNumStates << kNumPosBitsMax))
|
||||
#define kMatchSpecLen_Error_Data (1 << 9)
|
||||
#define kMatchSpecLen_Error_Fail (kMatchSpecLen_Error_Data - 1)
|
||||
|
||||
/* External ASM code needs same CLzmaProb array layout. So don't change it. */
|
||||
|
||||
/* (probs_1664) is faster and better for code size at some platforms */
|
||||
/*
|
||||
#ifdef MY_CPU_X86_OR_AMD64
|
||||
*/
|
||||
#define kStartOffset 1664
|
||||
#define GET_PROBS p->probs_1664
|
||||
/*
|
||||
#define GET_PROBS p->probs + kStartOffset
|
||||
#else
|
||||
#define kStartOffset 0
|
||||
#define GET_PROBS p->probs
|
||||
#endif
|
||||
*/
|
||||
|
||||
#define SpecPos (-kStartOffset)
|
||||
#define IsRep0Long (SpecPos + kNumFullDistances)
|
||||
#define RepLenCoder (IsRep0Long + (kNumStates2 << kNumPosBitsMax))
|
||||
#define LenCoder (RepLenCoder + kNumLenProbs)
|
||||
#define IsMatch (LenCoder + kNumLenProbs)
|
||||
#define Align (IsMatch + (kNumStates2 << kNumPosBitsMax))
|
||||
#define IsRep (Align + kAlignTableSize)
|
||||
#define IsRepG0 (IsRep + kNumStates)
|
||||
#define IsRepG1 (IsRepG0 + kNumStates)
|
||||
#define IsRepG2 (IsRepG1 + kNumStates)
|
||||
#define IsRep0Long (IsRepG2 + kNumStates)
|
||||
#define PosSlot (IsRep0Long + (kNumStates << kNumPosBitsMax))
|
||||
#define SpecPos (PosSlot + (kNumLenToPosStates << kNumPosSlotBits))
|
||||
#define Align (SpecPos + kNumFullDistances - kEndPosModelIndex)
|
||||
#define LenCoder (Align + kAlignTableSize)
|
||||
#define RepLenCoder (LenCoder + kNumLenProbs)
|
||||
#define Literal (RepLenCoder + kNumLenProbs)
|
||||
#define PosSlot (IsRepG2 + kNumStates)
|
||||
#define Literal (PosSlot + (kNumLenToPosStates << kNumPosSlotBits))
|
||||
#define NUM_BASE_PROBS (Literal + kStartOffset)
|
||||
|
||||
#define LZMA_BASE_SIZE 1846
|
||||
#define LZMA_LIT_SIZE 768
|
||||
|
||||
#define LzmaProps_GetNumProbs(p) ((UInt32)LZMA_BASE_SIZE + (LZMA_LIT_SIZE << ((p)->lc + (p)->lp)))
|
||||
|
||||
#if Literal != LZMA_BASE_SIZE
|
||||
StopCompilingDueBUG
|
||||
#if Align != 0 && kStartOffset != 0
|
||||
#error Stop_Compiling_Bad_LZMA_kAlign
|
||||
#endif
|
||||
|
||||
static const Byte kLiteralNextStates[kNumStates * 2] =
|
||||
{
|
||||
0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5,
|
||||
7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10
|
||||
};
|
||||
#if NUM_BASE_PROBS != 1984
|
||||
#error Stop_Compiling_Bad_LZMA_PROBS
|
||||
#endif
|
||||
|
||||
|
||||
#define LZMA_LIT_SIZE 0x300
|
||||
|
||||
#define LzmaProps_GetNumProbs(p) (NUM_BASE_PROBS + ((UInt32)LZMA_LIT_SIZE << ((p)->lc + (p)->lp)))
|
||||
|
||||
|
||||
#define CALC_POS_STATE(processedPos, pbMask) (((processedPos) & (pbMask)) << 4)
|
||||
#define COMBINED_PS_STATE (posState + state)
|
||||
#define GET_LEN_STATE (posState)
|
||||
|
||||
#define LZMA_DIC_MIN (1 << 12)
|
||||
|
||||
/* First LZMA-symbol is always decoded.
|
||||
And it decodes new LZMA-symbols while (buf < bufLimit), but "buf" is without last normalization
|
||||
Out:
|
||||
Result:
|
||||
SZ_OK - OK
|
||||
SZ_ERROR_DATA - Error
|
||||
p->remainLen:
|
||||
< kMatchSpecLenStart : normal remain
|
||||
= kMatchSpecLenStart : finished
|
||||
= kMatchSpecLenStart + 1 : Flush marker
|
||||
= kMatchSpecLenStart + 2 : State Init Marker
|
||||
/*
|
||||
p->remainLen : shows status of LZMA decoder:
|
||||
< kMatchSpecLenStart : the number of bytes to be copied with (p->rep0) offset
|
||||
= kMatchSpecLenStart : the LZMA stream was finished with end mark
|
||||
= kMatchSpecLenStart + 1 : need init range coder
|
||||
= kMatchSpecLenStart + 2 : need init range coder and state
|
||||
= kMatchSpecLen_Error_Fail : Internal Code Failure
|
||||
= kMatchSpecLen_Error_Data + [0 ... 273] : LZMA Data Error
|
||||
*/
|
||||
|
||||
static int MY_FAST_CALL LzmaDec_DecodeReal(CLzmaDec *p, SizeT limit, const Byte *bufLimit)
|
||||
{
|
||||
CLzmaProb *probs = p->probs;
|
||||
/* ---------- LZMA_DECODE_REAL ---------- */
|
||||
/*
|
||||
LzmaDec_DecodeReal_3() can be implemented in external ASM file.
|
||||
3 - is the code compatibility version of that function for check at link time.
|
||||
*/
|
||||
|
||||
unsigned state = p->state;
|
||||
#define LZMA_DECODE_REAL LzmaDec_DecodeReal_3
|
||||
|
||||
/*
|
||||
LZMA_DECODE_REAL()
|
||||
In:
|
||||
RangeCoder is normalized
|
||||
if (p->dicPos == limit)
|
||||
{
|
||||
LzmaDec_TryDummy() was called before to exclude LITERAL and MATCH-REP cases.
|
||||
So first symbol can be only MATCH-NON-REP. And if that MATCH-NON-REP symbol
|
||||
is not END_OF_PAYALOAD_MARKER, then the function doesn't write any byte to dictionary,
|
||||
the function returns SZ_OK, and the caller can use (p->remainLen) and (p->reps[0]) later.
|
||||
}
|
||||
|
||||
Processing:
|
||||
The first LZMA symbol will be decoded in any case.
|
||||
All main checks for limits are at the end of main loop,
|
||||
It decodes additional LZMA-symbols while (p->buf < bufLimit && dicPos < limit),
|
||||
RangeCoder is still without last normalization when (p->buf < bufLimit) is being checked.
|
||||
But if (p->buf < bufLimit), the caller provided at least (LZMA_REQUIRED_INPUT_MAX + 1) bytes for
|
||||
next iteration before limit (bufLimit + LZMA_REQUIRED_INPUT_MAX),
|
||||
that is enough for worst case LZMA symbol with one additional RangeCoder normalization for one bit.
|
||||
So that function never reads bufLimit [LZMA_REQUIRED_INPUT_MAX] byte.
|
||||
|
||||
Out:
|
||||
RangeCoder is normalized
|
||||
Result:
|
||||
SZ_OK - OK
|
||||
p->remainLen:
|
||||
< kMatchSpecLenStart : the number of bytes to be copied with (p->reps[0]) offset
|
||||
= kMatchSpecLenStart : the LZMA stream was finished with end mark
|
||||
|
||||
SZ_ERROR_DATA - error, when the MATCH-Symbol refers out of dictionary
|
||||
p->remainLen : undefined
|
||||
p->reps[*] : undefined
|
||||
*/
|
||||
|
||||
|
||||
#ifdef Z7_LZMA_DEC_OPT
|
||||
|
||||
int Z7_FASTCALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit);
|
||||
|
||||
#else
|
||||
|
||||
static
|
||||
int Z7_FASTCALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit)
|
||||
{
|
||||
CLzmaProb *probs = GET_PROBS;
|
||||
unsigned state = (unsigned)p->state;
|
||||
UInt32 rep0 = p->reps[0], rep1 = p->reps[1], rep2 = p->reps[2], rep3 = p->reps[3];
|
||||
unsigned pbMask = ((unsigned)1 << (p->prop.pb)) - 1;
|
||||
unsigned lpMask = ((unsigned)1 << (p->prop.lp)) - 1;
|
||||
unsigned lc = p->prop.lc;
|
||||
unsigned lpMask = ((unsigned)0x100 << p->prop.lp) - ((unsigned)0x100 >> lc);
|
||||
|
||||
Byte *dic = p->dic;
|
||||
SizeT dicBufSize = p->dicBufSize;
|
||||
@@ -161,99 +257,126 @@ static int MY_FAST_CALL LzmaDec_DecodeReal(CLzmaDec *p, SizeT limit, const Byte
|
||||
CLzmaProb *prob;
|
||||
UInt32 bound;
|
||||
unsigned ttt;
|
||||
unsigned posState = processedPos & pbMask;
|
||||
unsigned posState = CALC_POS_STATE(processedPos, pbMask);
|
||||
|
||||
prob = probs + IsMatch + (state << kNumPosBitsMax) + posState;
|
||||
prob = probs + IsMatch + COMBINED_PS_STATE;
|
||||
IF_BIT_0(prob)
|
||||
{
|
||||
unsigned symbol;
|
||||
UPDATE_0(prob);
|
||||
UPDATE_0(prob)
|
||||
prob = probs + Literal;
|
||||
if (checkDicSize != 0 || processedPos != 0)
|
||||
prob += (LZMA_LIT_SIZE * (((processedPos & lpMask) << lc) +
|
||||
(dic[(dicPos == 0 ? dicBufSize : dicPos) - 1] >> (8 - lc))));
|
||||
if (processedPos != 0 || checkDicSize != 0)
|
||||
prob += (UInt32)3 * ((((processedPos << 8) + dic[(dicPos == 0 ? dicBufSize : dicPos) - 1]) & lpMask) << lc);
|
||||
processedPos++;
|
||||
|
||||
if (state < kNumLitStates)
|
||||
{
|
||||
state -= (state < 4) ? state : 3;
|
||||
symbol = 1;
|
||||
do { GET_BIT(prob + symbol, symbol) } while (symbol < 0x100);
|
||||
#ifdef Z7_LZMA_SIZE_OPT
|
||||
do { NORMAL_LITER_DEC } while (symbol < 0x100);
|
||||
#else
|
||||
NORMAL_LITER_DEC
|
||||
NORMAL_LITER_DEC
|
||||
NORMAL_LITER_DEC
|
||||
NORMAL_LITER_DEC
|
||||
NORMAL_LITER_DEC
|
||||
NORMAL_LITER_DEC
|
||||
NORMAL_LITER_DEC
|
||||
NORMAL_LITER_DEC
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
unsigned matchByte = p->dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)];
|
||||
unsigned matchByte = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)];
|
||||
unsigned offs = 0x100;
|
||||
state -= (state < 10) ? 3 : 6;
|
||||
symbol = 1;
|
||||
#ifdef Z7_LZMA_SIZE_OPT
|
||||
do
|
||||
{
|
||||
unsigned bit;
|
||||
CLzmaProb *probLit;
|
||||
matchByte <<= 1;
|
||||
bit = (matchByte & offs);
|
||||
probLit = prob + offs + bit + symbol;
|
||||
GET_BIT2(probLit, symbol, offs &= ~bit, offs &= bit)
|
||||
MATCHED_LITER_DEC
|
||||
}
|
||||
while (symbol < 0x100);
|
||||
#else
|
||||
{
|
||||
unsigned bit;
|
||||
CLzmaProb *probLit;
|
||||
MATCHED_LITER_DEC
|
||||
MATCHED_LITER_DEC
|
||||
MATCHED_LITER_DEC
|
||||
MATCHED_LITER_DEC
|
||||
MATCHED_LITER_DEC
|
||||
MATCHED_LITER_DEC
|
||||
MATCHED_LITER_DEC
|
||||
MATCHED_LITER_DEC
|
||||
}
|
||||
#endif
|
||||
}
|
||||
dic[dicPos++] = (Byte)symbol;
|
||||
processedPos++;
|
||||
|
||||
state = kLiteralNextStates[state];
|
||||
/* if (state < 4) state = 0; else if (state < 10) state -= 3; else state -= 6; */
|
||||
dic[dicPos++] = (Byte)symbol;
|
||||
continue;
|
||||
}
|
||||
else
|
||||
|
||||
{
|
||||
UPDATE_1(prob);
|
||||
UPDATE_1(prob)
|
||||
prob = probs + IsRep + state;
|
||||
IF_BIT_0(prob)
|
||||
{
|
||||
UPDATE_0(prob);
|
||||
UPDATE_0(prob)
|
||||
state += kNumStates;
|
||||
prob = probs + LenCoder;
|
||||
}
|
||||
else
|
||||
{
|
||||
UPDATE_1(prob);
|
||||
if (checkDicSize == 0 && processedPos == 0)
|
||||
return SZ_ERROR_DATA;
|
||||
UPDATE_1(prob)
|
||||
prob = probs + IsRepG0 + state;
|
||||
IF_BIT_0(prob)
|
||||
{
|
||||
UPDATE_0(prob);
|
||||
prob = probs + IsRep0Long + (state << kNumPosBitsMax) + posState;
|
||||
UPDATE_0(prob)
|
||||
prob = probs + IsRep0Long + COMBINED_PS_STATE;
|
||||
IF_BIT_0(prob)
|
||||
{
|
||||
UPDATE_0(prob);
|
||||
dic[dicPos] = dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)];
|
||||
UPDATE_0(prob)
|
||||
|
||||
// that case was checked before with kBadRepCode
|
||||
// if (checkDicSize == 0 && processedPos == 0) { len = kMatchSpecLen_Error_Data + 1; break; }
|
||||
// The caller doesn't allow (dicPos == limit) case here
|
||||
// so we don't need the following check:
|
||||
// if (dicPos == limit) { state = state < kNumLitStates ? 9 : 11; len = 1; break; }
|
||||
|
||||
dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)];
|
||||
dicPos++;
|
||||
processedPos++;
|
||||
state = state < kNumLitStates ? 9 : 11;
|
||||
continue;
|
||||
}
|
||||
UPDATE_1(prob);
|
||||
UPDATE_1(prob)
|
||||
}
|
||||
else
|
||||
{
|
||||
UInt32 distance;
|
||||
UPDATE_1(prob);
|
||||
UPDATE_1(prob)
|
||||
prob = probs + IsRepG1 + state;
|
||||
IF_BIT_0(prob)
|
||||
{
|
||||
UPDATE_0(prob);
|
||||
UPDATE_0(prob)
|
||||
distance = rep1;
|
||||
}
|
||||
else
|
||||
{
|
||||
UPDATE_1(prob);
|
||||
UPDATE_1(prob)
|
||||
prob = probs + IsRepG2 + state;
|
||||
IF_BIT_0(prob)
|
||||
{
|
||||
UPDATE_0(prob);
|
||||
UPDATE_0(prob)
|
||||
distance = rep2;
|
||||
}
|
||||
else
|
||||
{
|
||||
UPDATE_1(prob);
|
||||
UPDATE_1(prob)
|
||||
distance = rep3;
|
||||
rep3 = rep2;
|
||||
}
|
||||
@@ -265,63 +388,101 @@ static int MY_FAST_CALL LzmaDec_DecodeReal(CLzmaDec *p, SizeT limit, const Byte
|
||||
state = state < kNumLitStates ? 8 : 11;
|
||||
prob = probs + RepLenCoder;
|
||||
}
|
||||
|
||||
#ifdef Z7_LZMA_SIZE_OPT
|
||||
{
|
||||
unsigned limit, offset;
|
||||
unsigned lim, offset;
|
||||
CLzmaProb *probLen = prob + LenChoice;
|
||||
IF_BIT_0(probLen)
|
||||
{
|
||||
UPDATE_0(probLen);
|
||||
probLen = prob + LenLow + (posState << kLenNumLowBits);
|
||||
UPDATE_0(probLen)
|
||||
probLen = prob + LenLow + GET_LEN_STATE;
|
||||
offset = 0;
|
||||
limit = (1 << kLenNumLowBits);
|
||||
lim = (1 << kLenNumLowBits);
|
||||
}
|
||||
else
|
||||
{
|
||||
UPDATE_1(probLen);
|
||||
UPDATE_1(probLen)
|
||||
probLen = prob + LenChoice2;
|
||||
IF_BIT_0(probLen)
|
||||
{
|
||||
UPDATE_0(probLen);
|
||||
probLen = prob + LenMid + (posState << kLenNumMidBits);
|
||||
UPDATE_0(probLen)
|
||||
probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits);
|
||||
offset = kLenNumLowSymbols;
|
||||
limit = (1 << kLenNumMidBits);
|
||||
lim = (1 << kLenNumLowBits);
|
||||
}
|
||||
else
|
||||
{
|
||||
UPDATE_1(probLen);
|
||||
UPDATE_1(probLen)
|
||||
probLen = prob + LenHigh;
|
||||
offset = kLenNumLowSymbols + kLenNumMidSymbols;
|
||||
limit = (1 << kLenNumHighBits);
|
||||
offset = kLenNumLowSymbols * 2;
|
||||
lim = (1 << kLenNumHighBits);
|
||||
}
|
||||
}
|
||||
TREE_DECODE(probLen, limit, len);
|
||||
TREE_DECODE(probLen, lim, len)
|
||||
len += offset;
|
||||
}
|
||||
#else
|
||||
{
|
||||
CLzmaProb *probLen = prob + LenChoice;
|
||||
IF_BIT_0(probLen)
|
||||
{
|
||||
UPDATE_0(probLen)
|
||||
probLen = prob + LenLow + GET_LEN_STATE;
|
||||
len = 1;
|
||||
TREE_GET_BIT(probLen, len)
|
||||
TREE_GET_BIT(probLen, len)
|
||||
TREE_GET_BIT(probLen, len)
|
||||
len -= 8;
|
||||
}
|
||||
else
|
||||
{
|
||||
UPDATE_1(probLen)
|
||||
probLen = prob + LenChoice2;
|
||||
IF_BIT_0(probLen)
|
||||
{
|
||||
UPDATE_0(probLen)
|
||||
probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits);
|
||||
len = 1;
|
||||
TREE_GET_BIT(probLen, len)
|
||||
TREE_GET_BIT(probLen, len)
|
||||
TREE_GET_BIT(probLen, len)
|
||||
}
|
||||
else
|
||||
{
|
||||
UPDATE_1(probLen)
|
||||
probLen = prob + LenHigh;
|
||||
TREE_DECODE(probLen, (1 << kLenNumHighBits), len)
|
||||
len += kLenNumLowSymbols * 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (state >= kNumStates)
|
||||
{
|
||||
UInt32 distance;
|
||||
prob = probs + PosSlot +
|
||||
((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits);
|
||||
TREE_6_DECODE(prob, distance);
|
||||
TREE_6_DECODE(prob, distance)
|
||||
if (distance >= kStartPosModelIndex)
|
||||
{
|
||||
unsigned posSlot = (unsigned)distance;
|
||||
int numDirectBits = (int)(((distance >> 1) - 1));
|
||||
unsigned numDirectBits = (unsigned)(((distance >> 1) - 1));
|
||||
distance = (2 | (distance & 1));
|
||||
if (posSlot < kEndPosModelIndex)
|
||||
{
|
||||
distance <<= numDirectBits;
|
||||
prob = probs + SpecPos + distance - posSlot - 1;
|
||||
prob = probs + SpecPos;
|
||||
{
|
||||
UInt32 mask = 1;
|
||||
unsigned i = 1;
|
||||
UInt32 m = 1;
|
||||
distance++;
|
||||
do
|
||||
{
|
||||
GET_BIT2(prob + i, i, ; , distance |= mask);
|
||||
mask <<= 1;
|
||||
REV_BIT_VAR(prob, distance, m)
|
||||
}
|
||||
while (--numDirectBits != 0);
|
||||
while (--numDirectBits);
|
||||
distance -= m;
|
||||
}
|
||||
}
|
||||
else
|
||||
@@ -348,57 +509,70 @@ static int MY_FAST_CALL LzmaDec_DecodeReal(CLzmaDec *p, SizeT limit, const Byte
|
||||
}
|
||||
*/
|
||||
}
|
||||
while (--numDirectBits != 0);
|
||||
while (--numDirectBits);
|
||||
prob = probs + Align;
|
||||
distance <<= kNumAlignBits;
|
||||
{
|
||||
unsigned i = 1;
|
||||
GET_BIT2(prob + i, i, ; , distance |= 1);
|
||||
GET_BIT2(prob + i, i, ; , distance |= 2);
|
||||
GET_BIT2(prob + i, i, ; , distance |= 4);
|
||||
GET_BIT2(prob + i, i, ; , distance |= 8);
|
||||
REV_BIT_CONST(prob, i, 1)
|
||||
REV_BIT_CONST(prob, i, 2)
|
||||
REV_BIT_CONST(prob, i, 4)
|
||||
REV_BIT_LAST (prob, i, 8)
|
||||
distance |= i;
|
||||
}
|
||||
if (distance == (UInt32)0xFFFFFFFF)
|
||||
{
|
||||
len += kMatchSpecLenStart;
|
||||
len = kMatchSpecLenStart;
|
||||
state -= kNumStates;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
rep3 = rep2;
|
||||
rep2 = rep1;
|
||||
rep1 = rep0;
|
||||
rep0 = distance + 1;
|
||||
if (checkDicSize == 0)
|
||||
{
|
||||
if (distance >= processedPos)
|
||||
return SZ_ERROR_DATA;
|
||||
}
|
||||
else if (distance >= checkDicSize)
|
||||
return SZ_ERROR_DATA;
|
||||
state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3;
|
||||
/* state = kLiteralNextStates[state]; */
|
||||
if (distance >= (checkDicSize == 0 ? processedPos: checkDicSize))
|
||||
{
|
||||
len += kMatchSpecLen_Error_Data + kMatchMinLen;
|
||||
// len = kMatchSpecLen_Error_Data;
|
||||
// len += kMatchMinLen;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
len += kMatchMinLen;
|
||||
|
||||
if (limit == dicPos)
|
||||
return SZ_ERROR_DATA;
|
||||
{
|
||||
SizeT rem = limit - dicPos;
|
||||
unsigned curLen = ((rem < len) ? (unsigned)rem : len);
|
||||
SizeT pos = (dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0);
|
||||
SizeT rem;
|
||||
unsigned curLen;
|
||||
SizeT pos;
|
||||
|
||||
if ((rem = limit - dicPos) == 0)
|
||||
{
|
||||
/*
|
||||
We stop decoding and return SZ_OK, and we can resume decoding later.
|
||||
Any error conditions can be tested later in caller code.
|
||||
For more strict mode we can stop decoding with error
|
||||
// len += kMatchSpecLen_Error_Data;
|
||||
*/
|
||||
break;
|
||||
}
|
||||
|
||||
curLen = ((rem < len) ? (unsigned)rem : len);
|
||||
pos = dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0);
|
||||
|
||||
processedPos += curLen;
|
||||
processedPos += (UInt32)curLen;
|
||||
|
||||
len -= curLen;
|
||||
if (pos + curLen <= dicBufSize)
|
||||
if (curLen <= dicBufSize - pos)
|
||||
{
|
||||
Byte *dest = dic + dicPos;
|
||||
ptrdiff_t src = (ptrdiff_t)pos - (ptrdiff_t)dicPos;
|
||||
const Byte *lim = dest + curLen;
|
||||
dicPos += curLen;
|
||||
dicPos += (SizeT)curLen;
|
||||
do
|
||||
*(dest) = (Byte)*(dest + src);
|
||||
while (++dest != lim);
|
||||
@@ -417,108 +591,153 @@ static int MY_FAST_CALL LzmaDec_DecodeReal(CLzmaDec *p, SizeT limit, const Byte
|
||||
}
|
||||
}
|
||||
while (dicPos < limit && buf < bufLimit);
|
||||
NORMALIZE;
|
||||
|
||||
NORMALIZE
|
||||
|
||||
p->buf = buf;
|
||||
p->range = range;
|
||||
p->code = code;
|
||||
p->remainLen = len;
|
||||
p->remainLen = (UInt32)len; // & (kMatchSpecLen_Error_Data - 1); // we can write real length for error matches too.
|
||||
p->dicPos = dicPos;
|
||||
p->processedPos = processedPos;
|
||||
p->reps[0] = rep0;
|
||||
p->reps[1] = rep1;
|
||||
p->reps[2] = rep2;
|
||||
p->reps[3] = rep3;
|
||||
p->state = state;
|
||||
|
||||
p->state = (UInt32)state;
|
||||
if (len >= kMatchSpecLen_Error_Data)
|
||||
return SZ_ERROR_DATA;
|
||||
return SZ_OK;
|
||||
}
|
||||
#endif
|
||||
|
||||
static void MY_FAST_CALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit)
|
||||
|
||||
|
||||
static void Z7_FASTCALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit)
|
||||
{
|
||||
if (p->remainLen != 0 && p->remainLen < kMatchSpecLenStart)
|
||||
unsigned len = (unsigned)p->remainLen;
|
||||
if (len == 0 /* || len >= kMatchSpecLenStart */)
|
||||
return;
|
||||
{
|
||||
Byte *dic = p->dic;
|
||||
SizeT dicPos = p->dicPos;
|
||||
SizeT dicBufSize = p->dicBufSize;
|
||||
unsigned len = p->remainLen;
|
||||
UInt32 rep0 = p->reps[0];
|
||||
if (limit - dicPos < len)
|
||||
len = (unsigned)(limit - dicPos);
|
||||
Byte *dic;
|
||||
SizeT dicBufSize;
|
||||
SizeT rep0; /* we use SizeT to avoid the BUG of VC14 for AMD64 */
|
||||
{
|
||||
SizeT rem = limit - dicPos;
|
||||
if (rem < len)
|
||||
{
|
||||
len = (unsigned)(rem);
|
||||
if (len == 0)
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= len)
|
||||
p->checkDicSize = p->prop.dicSize;
|
||||
|
||||
p->processedPos += len;
|
||||
p->remainLen -= len;
|
||||
while (len-- != 0)
|
||||
p->processedPos += (UInt32)len;
|
||||
p->remainLen -= (UInt32)len;
|
||||
dic = p->dic;
|
||||
rep0 = p->reps[0];
|
||||
dicBufSize = p->dicBufSize;
|
||||
do
|
||||
{
|
||||
dic[dicPos] = dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)];
|
||||
dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)];
|
||||
dicPos++;
|
||||
}
|
||||
while (--len);
|
||||
p->dicPos = dicPos;
|
||||
}
|
||||
}
|
||||
|
||||
static int MY_FAST_CALL LzmaDec_DecodeReal2(CLzmaDec *p, SizeT limit, const Byte *bufLimit)
|
||||
{
|
||||
do
|
||||
{
|
||||
SizeT limit2 = limit;
|
||||
if (p->checkDicSize == 0)
|
||||
{
|
||||
UInt32 rem = p->prop.dicSize - p->processedPos;
|
||||
if (limit - p->dicPos > rem)
|
||||
limit2 = p->dicPos + rem;
|
||||
}
|
||||
RINOK(LzmaDec_DecodeReal(p, limit2, bufLimit));
|
||||
if (p->processedPos >= p->prop.dicSize)
|
||||
p->checkDicSize = p->prop.dicSize;
|
||||
LzmaDec_WriteRem(p, limit);
|
||||
}
|
||||
while (p->dicPos < limit && p->buf < bufLimit && p->remainLen < kMatchSpecLenStart);
|
||||
|
||||
if (p->remainLen > kMatchSpecLenStart)
|
||||
/*
|
||||
At staring of new stream we have one of the following symbols:
|
||||
- Literal - is allowed
|
||||
- Non-Rep-Match - is allowed only if it's end marker symbol
|
||||
- Rep-Match - is not allowed
|
||||
We use early check of (RangeCoder:Code) over kBadRepCode to simplify main decoding code
|
||||
*/
|
||||
|
||||
#define kRange0 0xFFFFFFFF
|
||||
#define kBound0 ((kRange0 >> kNumBitModelTotalBits) << (kNumBitModelTotalBits - 1))
|
||||
#define kBadRepCode (kBound0 + (((kRange0 - kBound0) >> kNumBitModelTotalBits) << (kNumBitModelTotalBits - 1)))
|
||||
#if kBadRepCode != (0xC0000000 - 0x400)
|
||||
#error Stop_Compiling_Bad_LZMA_Check
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
LzmaDec_DecodeReal2():
|
||||
It calls LZMA_DECODE_REAL() and it adjusts limit according (p->checkDicSize).
|
||||
|
||||
We correct (p->checkDicSize) after LZMA_DECODE_REAL() and in LzmaDec_WriteRem(),
|
||||
and we support the following state of (p->checkDicSize):
|
||||
if (total_processed < p->prop.dicSize) then
|
||||
{
|
||||
p->remainLen = kMatchSpecLenStart;
|
||||
(total_processed == p->processedPos)
|
||||
(p->checkDicSize == 0)
|
||||
}
|
||||
else
|
||||
(p->checkDicSize == p->prop.dicSize)
|
||||
*/
|
||||
|
||||
static int Z7_FASTCALL LzmaDec_DecodeReal2(CLzmaDec *p, SizeT limit, const Byte *bufLimit)
|
||||
{
|
||||
if (p->checkDicSize == 0)
|
||||
{
|
||||
UInt32 rem = p->prop.dicSize - p->processedPos;
|
||||
if (limit - p->dicPos > rem)
|
||||
limit = p->dicPos + rem;
|
||||
}
|
||||
{
|
||||
int res = LZMA_DECODE_REAL(p, limit, bufLimit);
|
||||
if (p->checkDicSize == 0 && p->processedPos >= p->prop.dicSize)
|
||||
p->checkDicSize = p->prop.dicSize;
|
||||
return res;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
typedef enum
|
||||
{
|
||||
DUMMY_ERROR, /* unexpected end of input stream */
|
||||
DUMMY_INPUT_EOF, /* need more input data */
|
||||
DUMMY_LIT,
|
||||
DUMMY_MATCH,
|
||||
DUMMY_REP
|
||||
} ELzmaDummy;
|
||||
|
||||
static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inSize)
|
||||
|
||||
#define IS_DUMMY_END_MARKER_POSSIBLE(dummyRes) ((dummyRes) == DUMMY_MATCH)
|
||||
|
||||
static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byte **bufOut)
|
||||
{
|
||||
UInt32 range = p->range;
|
||||
UInt32 code = p->code;
|
||||
const Byte *bufLimit = buf + inSize;
|
||||
CLzmaProb *probs = p->probs;
|
||||
unsigned state = p->state;
|
||||
const Byte *bufLimit = *bufOut;
|
||||
const CLzmaProb *probs = GET_PROBS;
|
||||
unsigned state = (unsigned)p->state;
|
||||
ELzmaDummy res;
|
||||
|
||||
for (;;)
|
||||
{
|
||||
CLzmaProb *prob;
|
||||
const CLzmaProb *prob;
|
||||
UInt32 bound;
|
||||
unsigned ttt;
|
||||
unsigned posState = (p->processedPos) & ((1 << p->prop.pb) - 1);
|
||||
unsigned posState = CALC_POS_STATE(p->processedPos, ((unsigned)1 << p->prop.pb) - 1);
|
||||
|
||||
prob = probs + IsMatch + (state << kNumPosBitsMax) + posState;
|
||||
prob = probs + IsMatch + COMBINED_PS_STATE;
|
||||
IF_BIT_0_CHECK(prob)
|
||||
{
|
||||
UPDATE_0_CHECK
|
||||
|
||||
/* if (bufLimit - buf >= 7) return DUMMY_LIT; */
|
||||
|
||||
prob = probs + Literal;
|
||||
if (p->checkDicSize != 0 || p->processedPos != 0)
|
||||
prob += (LZMA_LIT_SIZE *
|
||||
((((p->processedPos) & ((1 << (p->prop.lp)) - 1)) << p->prop.lc) +
|
||||
(p->dic[(p->dicPos == 0 ? p->dicBufSize : p->dicPos) - 1] >> (8 - p->prop.lc))));
|
||||
prob += ((UInt32)LZMA_LIT_SIZE *
|
||||
((((p->processedPos) & (((unsigned)1 << (p->prop.lp)) - 1)) << p->prop.lc) +
|
||||
((unsigned)p->dic[(p->dicPos == 0 ? p->dicBufSize : p->dicPos) - 1] >> (8 - p->prop.lc))));
|
||||
|
||||
if (state < kNumLitStates)
|
||||
{
|
||||
@@ -528,17 +747,18 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inS
|
||||
else
|
||||
{
|
||||
unsigned matchByte = p->dic[p->dicPos - p->reps[0] +
|
||||
((p->dicPos < p->reps[0]) ? p->dicBufSize : 0)];
|
||||
(p->dicPos < p->reps[0] ? p->dicBufSize : 0)];
|
||||
unsigned offs = 0x100;
|
||||
unsigned symbol = 1;
|
||||
do
|
||||
{
|
||||
unsigned bit;
|
||||
CLzmaProb *probLit;
|
||||
matchByte <<= 1;
|
||||
bit = (matchByte & offs);
|
||||
probLit = prob + offs + bit + symbol;
|
||||
GET_BIT2_CHECK(probLit, symbol, offs &= ~bit, offs &= bit)
|
||||
const CLzmaProb *probLit;
|
||||
matchByte += matchByte;
|
||||
bit = offs;
|
||||
offs &= matchByte;
|
||||
probLit = prob + (offs + bit + symbol);
|
||||
GET_BIT2_CHECK(probLit, symbol, offs ^= bit; , ; )
|
||||
}
|
||||
while (symbol < 0x100);
|
||||
}
|
||||
@@ -547,55 +767,54 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inS
|
||||
else
|
||||
{
|
||||
unsigned len;
|
||||
UPDATE_1_CHECK;
|
||||
UPDATE_1_CHECK
|
||||
|
||||
prob = probs + IsRep + state;
|
||||
IF_BIT_0_CHECK(prob)
|
||||
{
|
||||
UPDATE_0_CHECK;
|
||||
UPDATE_0_CHECK
|
||||
state = 0;
|
||||
prob = probs + LenCoder;
|
||||
res = DUMMY_MATCH;
|
||||
}
|
||||
else
|
||||
{
|
||||
UPDATE_1_CHECK;
|
||||
UPDATE_1_CHECK
|
||||
res = DUMMY_REP;
|
||||
prob = probs + IsRepG0 + state;
|
||||
IF_BIT_0_CHECK(prob)
|
||||
{
|
||||
UPDATE_0_CHECK;
|
||||
prob = probs + IsRep0Long + (state << kNumPosBitsMax) + posState;
|
||||
UPDATE_0_CHECK
|
||||
prob = probs + IsRep0Long + COMBINED_PS_STATE;
|
||||
IF_BIT_0_CHECK(prob)
|
||||
{
|
||||
UPDATE_0_CHECK;
|
||||
NORMALIZE_CHECK;
|
||||
return DUMMY_REP;
|
||||
UPDATE_0_CHECK
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
UPDATE_1_CHECK;
|
||||
UPDATE_1_CHECK
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
UPDATE_1_CHECK;
|
||||
UPDATE_1_CHECK
|
||||
prob = probs + IsRepG1 + state;
|
||||
IF_BIT_0_CHECK(prob)
|
||||
{
|
||||
UPDATE_0_CHECK;
|
||||
UPDATE_0_CHECK
|
||||
}
|
||||
else
|
||||
{
|
||||
UPDATE_1_CHECK;
|
||||
UPDATE_1_CHECK
|
||||
prob = probs + IsRepG2 + state;
|
||||
IF_BIT_0_CHECK(prob)
|
||||
{
|
||||
UPDATE_0_CHECK;
|
||||
UPDATE_0_CHECK
|
||||
}
|
||||
else
|
||||
{
|
||||
UPDATE_1_CHECK;
|
||||
UPDATE_1_CHECK
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -604,34 +823,34 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inS
|
||||
}
|
||||
{
|
||||
unsigned limit, offset;
|
||||
CLzmaProb *probLen = prob + LenChoice;
|
||||
const CLzmaProb *probLen = prob + LenChoice;
|
||||
IF_BIT_0_CHECK(probLen)
|
||||
{
|
||||
UPDATE_0_CHECK;
|
||||
probLen = prob + LenLow + (posState << kLenNumLowBits);
|
||||
UPDATE_0_CHECK
|
||||
probLen = prob + LenLow + GET_LEN_STATE;
|
||||
offset = 0;
|
||||
limit = 1 << kLenNumLowBits;
|
||||
}
|
||||
else
|
||||
{
|
||||
UPDATE_1_CHECK;
|
||||
UPDATE_1_CHECK
|
||||
probLen = prob + LenChoice2;
|
||||
IF_BIT_0_CHECK(probLen)
|
||||
{
|
||||
UPDATE_0_CHECK;
|
||||
probLen = prob + LenMid + (posState << kLenNumMidBits);
|
||||
UPDATE_0_CHECK
|
||||
probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits);
|
||||
offset = kLenNumLowSymbols;
|
||||
limit = 1 << kLenNumMidBits;
|
||||
limit = 1 << kLenNumLowBits;
|
||||
}
|
||||
else
|
||||
{
|
||||
UPDATE_1_CHECK;
|
||||
UPDATE_1_CHECK
|
||||
probLen = prob + LenHigh;
|
||||
offset = kLenNumLowSymbols + kLenNumMidSymbols;
|
||||
offset = kLenNumLowSymbols * 2;
|
||||
limit = 1 << kLenNumHighBits;
|
||||
}
|
||||
}
|
||||
TREE_DECODE_CHECK(probLen, limit, len);
|
||||
TREE_DECODE_CHECK(probLen, limit, len)
|
||||
len += offset;
|
||||
}
|
||||
|
||||
@@ -639,18 +858,16 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inS
|
||||
{
|
||||
unsigned posSlot;
|
||||
prob = probs + PosSlot +
|
||||
((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) <<
|
||||
((len < kNumLenToPosStates - 1 ? len : kNumLenToPosStates - 1) <<
|
||||
kNumPosSlotBits);
|
||||
TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot);
|
||||
TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot)
|
||||
if (posSlot >= kStartPosModelIndex)
|
||||
{
|
||||
int numDirectBits = ((posSlot >> 1) - 1);
|
||||
|
||||
/* if (bufLimit - buf >= 8) return DUMMY_MATCH; */
|
||||
unsigned numDirectBits = ((posSlot >> 1) - 1);
|
||||
|
||||
if (posSlot < kEndPosModelIndex)
|
||||
{
|
||||
prob = probs + SpecPos + ((2 | (posSlot & 1)) << numDirectBits) - posSlot - 1;
|
||||
prob = probs + SpecPos + ((2 | (posSlot & 1)) << numDirectBits);
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -662,48 +879,44 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inS
|
||||
code -= range & (((code - range) >> 31) - 1);
|
||||
/* if (code >= range) code -= range; */
|
||||
}
|
||||
while (--numDirectBits != 0);
|
||||
while (--numDirectBits);
|
||||
prob = probs + Align;
|
||||
numDirectBits = kNumAlignBits;
|
||||
}
|
||||
{
|
||||
unsigned i = 1;
|
||||
unsigned m = 1;
|
||||
do
|
||||
{
|
||||
GET_BIT_CHECK(prob + i, i);
|
||||
REV_BIT_CHECK(prob, i, m)
|
||||
}
|
||||
while (--numDirectBits != 0);
|
||||
while (--numDirectBits);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
NORMALIZE_CHECK;
|
||||
NORMALIZE_CHECK
|
||||
|
||||
*bufOut = buf;
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
static void LzmaDec_InitRc(CLzmaDec *p, const Byte *data)
|
||||
void LzmaDec_InitDicAndState(CLzmaDec *p, BoolInt initDic, BoolInt initState);
|
||||
void LzmaDec_InitDicAndState(CLzmaDec *p, BoolInt initDic, BoolInt initState)
|
||||
{
|
||||
p->code = ((UInt32)data[1] << 24) | ((UInt32)data[2] << 16) | ((UInt32)data[3] << 8) | ((UInt32)data[4]);
|
||||
p->range = 0xFFFFFFFF;
|
||||
p->needFlush = 0;
|
||||
}
|
||||
|
||||
void LzmaDec_InitDicAndState(CLzmaDec *p, Bool initDic, Bool initState)
|
||||
{
|
||||
p->needFlush = 1;
|
||||
p->remainLen = 0;
|
||||
p->remainLen = kMatchSpecLenStart + 1;
|
||||
p->tempBufSize = 0;
|
||||
|
||||
if (initDic)
|
||||
{
|
||||
p->processedPos = 0;
|
||||
p->checkDicSize = 0;
|
||||
p->needInitState = 1;
|
||||
p->remainLen = kMatchSpecLenStart + 2;
|
||||
}
|
||||
if (initState)
|
||||
p->needInitState = 1;
|
||||
p->remainLen = kMatchSpecLenStart + 2;
|
||||
}
|
||||
|
||||
void LzmaDec_Init(CLzmaDec *p)
|
||||
@@ -712,48 +925,96 @@ void LzmaDec_Init(CLzmaDec *p)
|
||||
LzmaDec_InitDicAndState(p, True, True);
|
||||
}
|
||||
|
||||
static void LzmaDec_InitStateReal(CLzmaDec *p)
|
||||
{
|
||||
UInt32 numProbs = Literal + ((UInt32)LZMA_LIT_SIZE << (p->prop.lc + p->prop.lp));
|
||||
UInt32 i;
|
||||
CLzmaProb *probs = p->probs;
|
||||
for (i = 0; i < numProbs; i++)
|
||||
probs[i] = kBitModelTotal >> 1;
|
||||
p->reps[0] = p->reps[1] = p->reps[2] = p->reps[3] = 1;
|
||||
p->state = 0;
|
||||
p->needInitState = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
LZMA supports optional end_marker.
|
||||
So the decoder can lookahead for one additional LZMA-Symbol to check end_marker.
|
||||
That additional LZMA-Symbol can require up to LZMA_REQUIRED_INPUT_MAX bytes in input stream.
|
||||
When the decoder reaches dicLimit, it looks (finishMode) parameter:
|
||||
if (finishMode == LZMA_FINISH_ANY), the decoder doesn't lookahead
|
||||
if (finishMode != LZMA_FINISH_ANY), the decoder lookahead, if end_marker is possible for current position
|
||||
|
||||
When the decoder lookahead, and the lookahead symbol is not end_marker, we have two ways:
|
||||
1) Strict mode (default) : the decoder returns SZ_ERROR_DATA.
|
||||
2) The relaxed mode (alternative mode) : we could return SZ_OK, and the caller
|
||||
must check (status) value. The caller can show the error,
|
||||
if the end of stream is expected, and the (status) is noit
|
||||
LZMA_STATUS_FINISHED_WITH_MARK or LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK.
|
||||
*/
|
||||
|
||||
|
||||
#define RETURN_NOT_FINISHED_FOR_FINISH \
|
||||
*status = LZMA_STATUS_NOT_FINISHED; \
|
||||
return SZ_ERROR_DATA; // for strict mode
|
||||
// return SZ_OK; // for relaxed mode
|
||||
|
||||
|
||||
SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *srcLen,
|
||||
ELzmaFinishMode finishMode, ELzmaStatus *status)
|
||||
{
|
||||
SizeT inSize = *srcLen;
|
||||
(*srcLen) = 0;
|
||||
LzmaDec_WriteRem(p, dicLimit);
|
||||
|
||||
*status = LZMA_STATUS_NOT_SPECIFIED;
|
||||
|
||||
while (p->remainLen != kMatchSpecLenStart)
|
||||
if (p->remainLen > kMatchSpecLenStart)
|
||||
{
|
||||
int checkEndMarkNow;
|
||||
if (p->remainLen > kMatchSpecLenStart + 2)
|
||||
return p->remainLen == kMatchSpecLen_Error_Fail ? SZ_ERROR_FAIL : SZ_ERROR_DATA;
|
||||
|
||||
if (p->needFlush != 0)
|
||||
{
|
||||
for (; inSize > 0 && p->tempBufSize < RC_INIT_SIZE; (*srcLen)++, inSize--)
|
||||
p->tempBuf[p->tempBufSize++] = *src++;
|
||||
if (p->tempBufSize < RC_INIT_SIZE)
|
||||
{
|
||||
*status = LZMA_STATUS_NEEDS_MORE_INPUT;
|
||||
return SZ_OK;
|
||||
}
|
||||
if (p->tempBuf[0] != 0)
|
||||
return SZ_ERROR_DATA;
|
||||
for (; inSize > 0 && p->tempBufSize < RC_INIT_SIZE; (*srcLen)++, inSize--)
|
||||
p->tempBuf[p->tempBufSize++] = *src++;
|
||||
if (p->tempBufSize != 0 && p->tempBuf[0] != 0)
|
||||
return SZ_ERROR_DATA;
|
||||
if (p->tempBufSize < RC_INIT_SIZE)
|
||||
{
|
||||
*status = LZMA_STATUS_NEEDS_MORE_INPUT;
|
||||
return SZ_OK;
|
||||
}
|
||||
p->code =
|
||||
((UInt32)p->tempBuf[1] << 24)
|
||||
| ((UInt32)p->tempBuf[2] << 16)
|
||||
| ((UInt32)p->tempBuf[3] << 8)
|
||||
| ((UInt32)p->tempBuf[4]);
|
||||
|
||||
LzmaDec_InitRc(p, p->tempBuf);
|
||||
p->tempBufSize = 0;
|
||||
}
|
||||
if (p->checkDicSize == 0
|
||||
&& p->processedPos == 0
|
||||
&& p->code >= kBadRepCode)
|
||||
return SZ_ERROR_DATA;
|
||||
|
||||
p->range = 0xFFFFFFFF;
|
||||
p->tempBufSize = 0;
|
||||
|
||||
if (p->remainLen > kMatchSpecLenStart + 1)
|
||||
{
|
||||
SizeT numProbs = LzmaProps_GetNumProbs(&p->prop);
|
||||
SizeT i;
|
||||
CLzmaProb *probs = p->probs;
|
||||
for (i = 0; i < numProbs; i++)
|
||||
probs[i] = kBitModelTotal >> 1;
|
||||
p->reps[0] = p->reps[1] = p->reps[2] = p->reps[3] = 1;
|
||||
p->state = 0;
|
||||
}
|
||||
|
||||
p->remainLen = 0;
|
||||
}
|
||||
|
||||
for (;;)
|
||||
{
|
||||
if (p->remainLen == kMatchSpecLenStart)
|
||||
{
|
||||
if (p->code != 0)
|
||||
return SZ_ERROR_DATA;
|
||||
*status = LZMA_STATUS_FINISHED_WITH_MARK;
|
||||
return SZ_OK;
|
||||
}
|
||||
|
||||
LzmaDec_WriteRem(p, dicLimit);
|
||||
|
||||
{
|
||||
// (p->remainLen == 0 || p->dicPos == dicLimit)
|
||||
|
||||
int checkEndMarkNow = 0;
|
||||
|
||||
checkEndMarkNow = 0;
|
||||
if (p->dicPos >= dicLimit)
|
||||
{
|
||||
if (p->remainLen == 0 && p->code == 0)
|
||||
@@ -768,83 +1029,174 @@ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *sr
|
||||
}
|
||||
if (p->remainLen != 0)
|
||||
{
|
||||
*status = LZMA_STATUS_NOT_FINISHED;
|
||||
return SZ_ERROR_DATA;
|
||||
RETURN_NOT_FINISHED_FOR_FINISH
|
||||
}
|
||||
checkEndMarkNow = 1;
|
||||
}
|
||||
|
||||
if (p->needInitState)
|
||||
LzmaDec_InitStateReal(p);
|
||||
|
||||
// (p->remainLen == 0)
|
||||
|
||||
if (p->tempBufSize == 0)
|
||||
{
|
||||
SizeT processed;
|
||||
const Byte *bufLimit;
|
||||
int dummyProcessed = -1;
|
||||
|
||||
if (inSize < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow)
|
||||
{
|
||||
int dummyRes = LzmaDec_TryDummy(p, src, inSize);
|
||||
if (dummyRes == DUMMY_ERROR)
|
||||
const Byte *bufOut = src + inSize;
|
||||
|
||||
ELzmaDummy dummyRes = LzmaDec_TryDummy(p, src, &bufOut);
|
||||
|
||||
if (dummyRes == DUMMY_INPUT_EOF)
|
||||
{
|
||||
memcpy(p->tempBuf, src, inSize);
|
||||
p->tempBufSize = (unsigned)inSize;
|
||||
size_t i;
|
||||
if (inSize >= LZMA_REQUIRED_INPUT_MAX)
|
||||
break;
|
||||
(*srcLen) += inSize;
|
||||
p->tempBufSize = (unsigned)inSize;
|
||||
for (i = 0; i < inSize; i++)
|
||||
p->tempBuf[i] = src[i];
|
||||
*status = LZMA_STATUS_NEEDS_MORE_INPUT;
|
||||
return SZ_OK;
|
||||
}
|
||||
if (checkEndMarkNow && dummyRes != DUMMY_MATCH)
|
||||
|
||||
dummyProcessed = (int)(bufOut - src);
|
||||
if ((unsigned)dummyProcessed > LZMA_REQUIRED_INPUT_MAX)
|
||||
break;
|
||||
|
||||
if (checkEndMarkNow && !IS_DUMMY_END_MARKER_POSSIBLE(dummyRes))
|
||||
{
|
||||
*status = LZMA_STATUS_NOT_FINISHED;
|
||||
return SZ_ERROR_DATA;
|
||||
unsigned i;
|
||||
(*srcLen) += (unsigned)dummyProcessed;
|
||||
p->tempBufSize = (unsigned)dummyProcessed;
|
||||
for (i = 0; i < (unsigned)dummyProcessed; i++)
|
||||
p->tempBuf[i] = src[i];
|
||||
// p->remainLen = kMatchSpecLen_Error_Data;
|
||||
RETURN_NOT_FINISHED_FOR_FINISH
|
||||
}
|
||||
|
||||
bufLimit = src;
|
||||
// we will decode only one iteration
|
||||
}
|
||||
else
|
||||
bufLimit = src + inSize - LZMA_REQUIRED_INPUT_MAX;
|
||||
|
||||
p->buf = src;
|
||||
if (LzmaDec_DecodeReal2(p, dicLimit, bufLimit) != 0)
|
||||
return SZ_ERROR_DATA;
|
||||
processed = (SizeT)(p->buf - src);
|
||||
(*srcLen) += processed;
|
||||
src += processed;
|
||||
inSize -= processed;
|
||||
}
|
||||
else
|
||||
{
|
||||
unsigned rem = p->tempBufSize, lookAhead = 0;
|
||||
while (rem < LZMA_REQUIRED_INPUT_MAX && lookAhead < inSize)
|
||||
p->tempBuf[rem++] = src[lookAhead++];
|
||||
p->tempBufSize = rem;
|
||||
if (rem < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow)
|
||||
|
||||
{
|
||||
int dummyRes = LzmaDec_TryDummy(p, p->tempBuf, rem);
|
||||
if (dummyRes == DUMMY_ERROR)
|
||||
int res = LzmaDec_DecodeReal2(p, dicLimit, bufLimit);
|
||||
|
||||
SizeT processed = (SizeT)(p->buf - src);
|
||||
|
||||
if (dummyProcessed < 0)
|
||||
{
|
||||
(*srcLen) += lookAhead;
|
||||
*status = LZMA_STATUS_NEEDS_MORE_INPUT;
|
||||
return SZ_OK;
|
||||
if (processed > inSize)
|
||||
break;
|
||||
}
|
||||
if (checkEndMarkNow && dummyRes != DUMMY_MATCH)
|
||||
else if ((unsigned)dummyProcessed != processed)
|
||||
break;
|
||||
|
||||
src += processed;
|
||||
inSize -= processed;
|
||||
(*srcLen) += processed;
|
||||
|
||||
if (res != SZ_OK)
|
||||
{
|
||||
*status = LZMA_STATUS_NOT_FINISHED;
|
||||
p->remainLen = kMatchSpecLen_Error_Data;
|
||||
return SZ_ERROR_DATA;
|
||||
}
|
||||
}
|
||||
p->buf = p->tempBuf;
|
||||
if (LzmaDec_DecodeReal2(p, dicLimit, p->buf) != 0)
|
||||
return SZ_ERROR_DATA;
|
||||
lookAhead -= (rem - (unsigned)(p->buf - p->tempBuf));
|
||||
(*srcLen) += lookAhead;
|
||||
src += lookAhead;
|
||||
inSize -= lookAhead;
|
||||
p->tempBufSize = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
{
|
||||
// we have some data in (p->tempBuf)
|
||||
// in strict mode: tempBufSize is not enough for one Symbol decoding.
|
||||
// in relaxed mode: tempBufSize not larger than required for one Symbol decoding.
|
||||
|
||||
unsigned rem = p->tempBufSize;
|
||||
unsigned ahead = 0;
|
||||
int dummyProcessed = -1;
|
||||
|
||||
while (rem < LZMA_REQUIRED_INPUT_MAX && ahead < inSize)
|
||||
p->tempBuf[rem++] = src[ahead++];
|
||||
|
||||
// ahead - the size of new data copied from (src) to (p->tempBuf)
|
||||
// rem - the size of temp buffer including new data from (src)
|
||||
|
||||
if (rem < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow)
|
||||
{
|
||||
const Byte *bufOut = p->tempBuf + rem;
|
||||
|
||||
ELzmaDummy dummyRes = LzmaDec_TryDummy(p, p->tempBuf, &bufOut);
|
||||
|
||||
if (dummyRes == DUMMY_INPUT_EOF)
|
||||
{
|
||||
if (rem >= LZMA_REQUIRED_INPUT_MAX)
|
||||
break;
|
||||
p->tempBufSize = rem;
|
||||
(*srcLen) += (SizeT)ahead;
|
||||
*status = LZMA_STATUS_NEEDS_MORE_INPUT;
|
||||
return SZ_OK;
|
||||
}
|
||||
|
||||
dummyProcessed = (int)(bufOut - p->tempBuf);
|
||||
|
||||
if ((unsigned)dummyProcessed < p->tempBufSize)
|
||||
break;
|
||||
|
||||
if (checkEndMarkNow && !IS_DUMMY_END_MARKER_POSSIBLE(dummyRes))
|
||||
{
|
||||
(*srcLen) += (unsigned)dummyProcessed - p->tempBufSize;
|
||||
p->tempBufSize = (unsigned)dummyProcessed;
|
||||
// p->remainLen = kMatchSpecLen_Error_Data;
|
||||
RETURN_NOT_FINISHED_FOR_FINISH
|
||||
}
|
||||
}
|
||||
|
||||
p->buf = p->tempBuf;
|
||||
|
||||
{
|
||||
// we decode one symbol from (p->tempBuf) here, so the (bufLimit) is equal to (p->buf)
|
||||
int res = LzmaDec_DecodeReal2(p, dicLimit, p->buf);
|
||||
|
||||
SizeT processed = (SizeT)(p->buf - p->tempBuf);
|
||||
rem = p->tempBufSize;
|
||||
|
||||
if (dummyProcessed < 0)
|
||||
{
|
||||
if (processed > LZMA_REQUIRED_INPUT_MAX)
|
||||
break;
|
||||
if (processed < rem)
|
||||
break;
|
||||
}
|
||||
else if ((unsigned)dummyProcessed != processed)
|
||||
break;
|
||||
|
||||
processed -= rem;
|
||||
|
||||
src += processed;
|
||||
inSize -= processed;
|
||||
(*srcLen) += processed;
|
||||
p->tempBufSize = 0;
|
||||
|
||||
if (res != SZ_OK)
|
||||
{
|
||||
p->remainLen = kMatchSpecLen_Error_Data;
|
||||
return SZ_ERROR_DATA;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (p->code == 0)
|
||||
*status = LZMA_STATUS_FINISHED_WITH_MARK;
|
||||
return (p->code == 0) ? SZ_OK : SZ_ERROR_DATA;
|
||||
|
||||
/* Some unexpected error: internal error of code, memory corruption or hardware failure */
|
||||
p->remainLen = kMatchSpecLen_Error_Fail;
|
||||
return SZ_ERROR_FAIL;
|
||||
}
|
||||
|
||||
|
||||
|
||||
SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status)
|
||||
{
|
||||
SizeT outSize = *destLen;
|
||||
@@ -885,19 +1237,19 @@ SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, const Byte *sr
|
||||
}
|
||||
}
|
||||
|
||||
void LzmaDec_FreeProbs(CLzmaDec *p, ISzAlloc *alloc)
|
||||
void LzmaDec_FreeProbs(CLzmaDec *p, ISzAllocPtr alloc)
|
||||
{
|
||||
alloc->Free(alloc, p->probs);
|
||||
p->probs = 0;
|
||||
ISzAlloc_Free(alloc, p->probs);
|
||||
p->probs = NULL;
|
||||
}
|
||||
|
||||
static void LzmaDec_FreeDict(CLzmaDec *p, ISzAlloc *alloc)
|
||||
static void LzmaDec_FreeDict(CLzmaDec *p, ISzAllocPtr alloc)
|
||||
{
|
||||
alloc->Free(alloc, p->dic);
|
||||
p->dic = 0;
|
||||
ISzAlloc_Free(alloc, p->dic);
|
||||
p->dic = NULL;
|
||||
}
|
||||
|
||||
void LzmaDec_Free(CLzmaDec *p, ISzAlloc *alloc)
|
||||
void LzmaDec_Free(CLzmaDec *p, ISzAllocPtr alloc)
|
||||
{
|
||||
LzmaDec_FreeProbs(p, alloc);
|
||||
LzmaDec_FreeDict(p, alloc);
|
||||
@@ -921,49 +1273,60 @@ SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size)
|
||||
if (d >= (9 * 5 * 5))
|
||||
return SZ_ERROR_UNSUPPORTED;
|
||||
|
||||
p->lc = d % 9;
|
||||
p->lc = (Byte)(d % 9);
|
||||
d /= 9;
|
||||
p->pb = d / 5;
|
||||
p->lp = d % 5;
|
||||
p->pb = (Byte)(d / 5);
|
||||
p->lp = (Byte)(d % 5);
|
||||
|
||||
return SZ_OK;
|
||||
}
|
||||
|
||||
static SRes LzmaDec_AllocateProbs2(CLzmaDec *p, const CLzmaProps *propNew, ISzAlloc *alloc)
|
||||
static SRes LzmaDec_AllocateProbs2(CLzmaDec *p, const CLzmaProps *propNew, ISzAllocPtr alloc)
|
||||
{
|
||||
UInt32 numProbs = LzmaProps_GetNumProbs(propNew);
|
||||
if (p->probs == 0 || numProbs != p->numProbs)
|
||||
if (!p->probs || numProbs != p->numProbs)
|
||||
{
|
||||
LzmaDec_FreeProbs(p, alloc);
|
||||
p->probs = (CLzmaProb *)alloc->Alloc(alloc, numProbs * sizeof(CLzmaProb));
|
||||
p->numProbs = numProbs;
|
||||
if (p->probs == 0)
|
||||
p->probs = (CLzmaProb *)ISzAlloc_Alloc(alloc, numProbs * sizeof(CLzmaProb));
|
||||
if (!p->probs)
|
||||
return SZ_ERROR_MEM;
|
||||
p->probs_1664 = p->probs + 1664;
|
||||
p->numProbs = numProbs;
|
||||
}
|
||||
return SZ_OK;
|
||||
}
|
||||
|
||||
SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAlloc *alloc)
|
||||
SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc)
|
||||
{
|
||||
CLzmaProps propNew;
|
||||
RINOK(LzmaProps_Decode(&propNew, props, propsSize));
|
||||
RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc));
|
||||
RINOK(LzmaProps_Decode(&propNew, props, propsSize))
|
||||
RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc))
|
||||
p->prop = propNew;
|
||||
return SZ_OK;
|
||||
}
|
||||
|
||||
SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAlloc *alloc)
|
||||
SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc)
|
||||
{
|
||||
CLzmaProps propNew;
|
||||
SizeT dicBufSize;
|
||||
RINOK(LzmaProps_Decode(&propNew, props, propsSize));
|
||||
RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc));
|
||||
dicBufSize = propNew.dicSize;
|
||||
if (p->dic == 0 || dicBufSize != p->dicBufSize)
|
||||
RINOK(LzmaProps_Decode(&propNew, props, propsSize))
|
||||
RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc))
|
||||
|
||||
{
|
||||
UInt32 dictSize = propNew.dicSize;
|
||||
SizeT mask = ((UInt32)1 << 12) - 1;
|
||||
if (dictSize >= ((UInt32)1 << 30)) mask = ((UInt32)1 << 22) - 1;
|
||||
else if (dictSize >= ((UInt32)1 << 22)) mask = ((UInt32)1 << 20) - 1;
|
||||
dicBufSize = ((SizeT)dictSize + mask) & ~mask;
|
||||
if (dicBufSize < dictSize)
|
||||
dicBufSize = dictSize;
|
||||
}
|
||||
|
||||
if (!p->dic || dicBufSize != p->dicBufSize)
|
||||
{
|
||||
LzmaDec_FreeDict(p, alloc);
|
||||
p->dic = (Byte *)alloc->Alloc(alloc, dicBufSize);
|
||||
if (p->dic == 0)
|
||||
p->dic = (Byte *)ISzAlloc_Alloc(alloc, dicBufSize);
|
||||
if (!p->dic)
|
||||
{
|
||||
LzmaDec_FreeProbs(p, alloc);
|
||||
return SZ_ERROR_MEM;
|
||||
@@ -976,32 +1339,25 @@ SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAll
|
||||
|
||||
SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
|
||||
const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode,
|
||||
ELzmaStatus *status, ISzAlloc *alloc)
|
||||
ELzmaStatus *status, ISzAllocPtr alloc)
|
||||
{
|
||||
CLzmaDec p;
|
||||
SRes res;
|
||||
SizeT inSize = *srcLen;
|
||||
SizeT outSize = *destLen;
|
||||
*srcLen = *destLen = 0;
|
||||
SizeT outSize = *destLen, inSize = *srcLen;
|
||||
*destLen = *srcLen = 0;
|
||||
*status = LZMA_STATUS_NOT_SPECIFIED;
|
||||
if (inSize < RC_INIT_SIZE)
|
||||
return SZ_ERROR_INPUT_EOF;
|
||||
|
||||
LzmaDec_Construct(&p);
|
||||
res = LzmaDec_AllocateProbs(&p, propData, propSize, alloc);
|
||||
if (res != 0)
|
||||
return res;
|
||||
LzmaDec_CONSTRUCT(&p)
|
||||
RINOK(LzmaDec_AllocateProbs(&p, propData, propSize, alloc))
|
||||
p.dic = dest;
|
||||
p.dicBufSize = outSize;
|
||||
|
||||
LzmaDec_Init(&p);
|
||||
|
||||
*srcLen = inSize;
|
||||
res = LzmaDec_DecodeToDic(&p, outSize, src, srcLen, finishMode, status);
|
||||
|
||||
*destLen = p.dicPos;
|
||||
if (res == SZ_OK && *status == LZMA_STATUS_NEEDS_MORE_INPUT)
|
||||
res = SZ_ERROR_INPUT_EOF;
|
||||
|
||||
(*destLen) = p.dicPos;
|
||||
LzmaDec_FreeProbs(&p, alloc);
|
||||
return res;
|
||||
}
|
||||
|
||||
68
extern/lzma/LzmaDec.h
vendored
68
extern/lzma/LzmaDec.h
vendored
@@ -1,29 +1,36 @@
|
||||
/* LzmaDec.h -- LZMA Decoder
|
||||
2008-10-04 : Igor Pavlov : Public domain */
|
||||
2023-04-02 : Igor Pavlov : Public domain */
|
||||
|
||||
#ifndef __LZMADEC_H
|
||||
#define __LZMADEC_H
|
||||
#ifndef ZIP7_INC_LZMA_DEC_H
|
||||
#define ZIP7_INC_LZMA_DEC_H
|
||||
|
||||
#include "Types.h"
|
||||
#include "7zTypes.h"
|
||||
|
||||
/* #define _LZMA_PROB32 */
|
||||
/* _LZMA_PROB32 can increase the speed on some CPUs,
|
||||
EXTERN_C_BEGIN
|
||||
|
||||
/* #define Z7_LZMA_PROB32 */
|
||||
/* Z7_LZMA_PROB32 can increase the speed on some CPUs,
|
||||
but memory usage for CLzmaDec::probs will be doubled in that case */
|
||||
|
||||
#ifdef _LZMA_PROB32
|
||||
#define CLzmaProb UInt32
|
||||
typedef
|
||||
#ifdef Z7_LZMA_PROB32
|
||||
UInt32
|
||||
#else
|
||||
#define CLzmaProb UInt16
|
||||
UInt16
|
||||
#endif
|
||||
CLzmaProb;
|
||||
|
||||
|
||||
/* ---------- LZMA Properties ---------- */
|
||||
|
||||
#define LZMA_PROPS_SIZE 5
|
||||
|
||||
typedef struct _CLzmaProps
|
||||
typedef struct
|
||||
{
|
||||
unsigned lc, lp, pb;
|
||||
Byte lc;
|
||||
Byte lp;
|
||||
Byte pb;
|
||||
Byte _pad_;
|
||||
UInt32 dicSize;
|
||||
} CLzmaProps;
|
||||
|
||||
@@ -45,32 +52,35 @@ SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size);
|
||||
|
||||
typedef struct
|
||||
{
|
||||
/* Don't change this structure. ASM code can use it. */
|
||||
CLzmaProps prop;
|
||||
CLzmaProb *probs;
|
||||
CLzmaProb *probs_1664;
|
||||
Byte *dic;
|
||||
const Byte *buf;
|
||||
UInt32 range, code;
|
||||
SizeT dicPos;
|
||||
SizeT dicBufSize;
|
||||
SizeT dicPos;
|
||||
const Byte *buf;
|
||||
UInt32 range;
|
||||
UInt32 code;
|
||||
UInt32 processedPos;
|
||||
UInt32 checkDicSize;
|
||||
unsigned state;
|
||||
UInt32 reps[4];
|
||||
unsigned remainLen;
|
||||
int needFlush;
|
||||
int needInitState;
|
||||
UInt32 state;
|
||||
UInt32 remainLen;
|
||||
|
||||
UInt32 numProbs;
|
||||
unsigned tempBufSize;
|
||||
Byte tempBuf[LZMA_REQUIRED_INPUT_MAX];
|
||||
} CLzmaDec;
|
||||
|
||||
#define LzmaDec_Construct(p) { (p)->dic = 0; (p)->probs = 0; }
|
||||
#define LzmaDec_CONSTRUCT(p) { (p)->dic = NULL; (p)->probs = NULL; }
|
||||
#define LzmaDec_Construct(p) LzmaDec_CONSTRUCT(p)
|
||||
|
||||
void LzmaDec_Init(CLzmaDec *p);
|
||||
|
||||
/* There are two types of LZMA streams:
|
||||
0) Stream with end mark. That end mark adds about 6 bytes to compressed size.
|
||||
1) Stream without end mark. You must know exact uncompressed size to decompress such stream. */
|
||||
- Stream with end mark. That end mark adds about 6 bytes to compressed size.
|
||||
- Stream without end mark. You must know exact uncompressed size to decompress such stream. */
|
||||
|
||||
typedef enum
|
||||
{
|
||||
@@ -127,11 +137,11 @@ LzmaDec_Allocate* can return:
|
||||
SZ_ERROR_UNSUPPORTED - Unsupported properties
|
||||
*/
|
||||
|
||||
SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAlloc *alloc);
|
||||
void LzmaDec_FreeProbs(CLzmaDec *p, ISzAlloc *alloc);
|
||||
SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc);
|
||||
void LzmaDec_FreeProbs(CLzmaDec *p, ISzAllocPtr alloc);
|
||||
|
||||
SRes LzmaDec_Allocate(CLzmaDec *state, const Byte *prop, unsigned propsSize, ISzAlloc *alloc);
|
||||
void LzmaDec_Free(CLzmaDec *state, ISzAlloc *alloc);
|
||||
SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc);
|
||||
void LzmaDec_Free(CLzmaDec *p, ISzAllocPtr alloc);
|
||||
|
||||
/* ---------- Dictionary Interface ---------- */
|
||||
|
||||
@@ -140,7 +150,7 @@ void LzmaDec_Free(CLzmaDec *state, ISzAlloc *alloc);
|
||||
You must work with CLzmaDec variables directly in this interface.
|
||||
|
||||
STEPS:
|
||||
LzmaDec_Constr()
|
||||
LzmaDec_Construct()
|
||||
LzmaDec_Allocate()
|
||||
for (each new stream)
|
||||
{
|
||||
@@ -172,6 +182,7 @@ Returns:
|
||||
LZMA_STATUS_NEEDS_MORE_INPUT
|
||||
LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK
|
||||
SZ_ERROR_DATA - Data error
|
||||
SZ_ERROR_FAIL - Some unexpected error: internal error of code, memory corruption or hardware failure
|
||||
*/
|
||||
|
||||
SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit,
|
||||
@@ -214,10 +225,13 @@ Returns:
|
||||
SZ_ERROR_MEM - Memory allocation error
|
||||
SZ_ERROR_UNSUPPORTED - Unsupported properties
|
||||
SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src).
|
||||
SZ_ERROR_FAIL - Some unexpected error: internal error of code, memory corruption or hardware failure
|
||||
*/
|
||||
|
||||
SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
|
||||
const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode,
|
||||
ELzmaStatus *status, ISzAlloc *alloc);
|
||||
ELzmaStatus *status, ISzAllocPtr alloc);
|
||||
|
||||
EXTERN_C_END
|
||||
|
||||
#endif
|
||||
|
||||
3438
extern/lzma/LzmaEnc.c
vendored
3438
extern/lzma/LzmaEnc.c
vendored
@@ -1,5 +1,7 @@
|
||||
/* LzmaEnc.c -- LZMA Encoder
|
||||
2009-02-02 : Igor Pavlov : Public domain */
|
||||
2023-04-13: Igor Pavlov : Public domain */
|
||||
|
||||
#include "Precomp.h"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
@@ -10,28 +12,36 @@
|
||||
#include <stdio.h>
|
||||
#endif
|
||||
|
||||
#include "CpuArch.h"
|
||||
#include "LzmaEnc.h"
|
||||
|
||||
#include "LzFind.h"
|
||||
#ifdef COMPRESS_MF_MT
|
||||
#ifndef Z7_ST
|
||||
#include "LzFindMt.h"
|
||||
#endif
|
||||
|
||||
/* the following LzmaEnc_* declarations is internal LZMA interface for LZMA2 encoder */
|
||||
|
||||
SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle p, ISeqInStreamPtr inStream, UInt32 keepWindowSize,
|
||||
ISzAllocPtr alloc, ISzAllocPtr allocBig);
|
||||
SRes LzmaEnc_MemPrepare(CLzmaEncHandle p, const Byte *src, SizeT srcLen,
|
||||
UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig);
|
||||
SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle p, BoolInt reInit,
|
||||
Byte *dest, size_t *destLen, UInt32 desiredPackSize, UInt32 *unpackSize);
|
||||
const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle p);
|
||||
void LzmaEnc_Finish(CLzmaEncHandle p);
|
||||
void LzmaEnc_SaveState(CLzmaEncHandle p);
|
||||
void LzmaEnc_RestoreState(CLzmaEncHandle p);
|
||||
|
||||
#ifdef SHOW_STAT
|
||||
static int ttt = 0;
|
||||
static unsigned g_STAT_OFFSET = 0;
|
||||
#endif
|
||||
|
||||
#define kBlockSizeMax ((1 << LZMA_NUM_BLOCK_SIZE_BITS) - 1)
|
||||
/* for good normalization speed we still reserve 256 MB before 4 GB range */
|
||||
#define kLzmaMaxHistorySize ((UInt32)15 << 28)
|
||||
|
||||
#define kBlockSize (9 << 10)
|
||||
#define kUnpackBlockSize (1 << 18)
|
||||
#define kMatchArraySize (1 << 21)
|
||||
#define kMatchRecordMaxSize ((LZMA_MATCH_LEN_MAX * 2 + 3) * LZMA_MATCH_LEN_MAX)
|
||||
|
||||
#define kNumMaxDirectBits (31)
|
||||
|
||||
#define kNumTopBits 24
|
||||
#define kTopValue ((UInt32)1 << kNumTopBits)
|
||||
// #define kNumTopBits 24
|
||||
#define kTopValue ((UInt32)1 << 24)
|
||||
|
||||
#define kNumBitModelTotalBits 11
|
||||
#define kBitModelTotal (1 << kNumBitModelTotalBits)
|
||||
@@ -40,14 +50,19 @@ static int ttt = 0;
|
||||
|
||||
#define kNumMoveReducingBits 4
|
||||
#define kNumBitPriceShiftBits 4
|
||||
#define kBitPrice (1 << kNumBitPriceShiftBits)
|
||||
// #define kBitPrice (1 << kNumBitPriceShiftBits)
|
||||
|
||||
#define REP_LEN_COUNT 64
|
||||
|
||||
void LzmaEncProps_Init(CLzmaEncProps *p)
|
||||
{
|
||||
p->level = 5;
|
||||
p->dictSize = p->mc = 0;
|
||||
p->reduceSize = (UInt64)(Int64)-1;
|
||||
p->lc = p->lp = p->pb = p->algo = p->fb = p->btMode = p->numHashBytes = p->numThreads = -1;
|
||||
p->numHashOutBits = 0;
|
||||
p->writeEndMark = 0;
|
||||
p->affinity = 0;
|
||||
}
|
||||
|
||||
void LzmaEncProps_Normalize(CLzmaEncProps *p)
|
||||
@@ -55,18 +70,37 @@ void LzmaEncProps_Normalize(CLzmaEncProps *p)
|
||||
int level = p->level;
|
||||
if (level < 0) level = 5;
|
||||
p->level = level;
|
||||
if (p->dictSize == 0) p->dictSize = (level <= 5 ? (1 << (level * 2 + 14)) : (level == 6 ? (1 << 25) : (1 << 26)));
|
||||
|
||||
if (p->dictSize == 0)
|
||||
p->dictSize =
|
||||
( level <= 3 ? ((UInt32)1 << (level * 2 + 16)) :
|
||||
( level <= 6 ? ((UInt32)1 << (level + 19)) :
|
||||
( level <= 7 ? ((UInt32)1 << 25) : ((UInt32)1 << 26)
|
||||
)));
|
||||
|
||||
if (p->dictSize > p->reduceSize)
|
||||
{
|
||||
UInt32 v = (UInt32)p->reduceSize;
|
||||
const UInt32 kReduceMin = ((UInt32)1 << 12);
|
||||
if (v < kReduceMin)
|
||||
v = kReduceMin;
|
||||
if (p->dictSize > v)
|
||||
p->dictSize = v;
|
||||
}
|
||||
|
||||
if (p->lc < 0) p->lc = 3;
|
||||
if (p->lp < 0) p->lp = 0;
|
||||
if (p->pb < 0) p->pb = 2;
|
||||
|
||||
if (p->algo < 0) p->algo = (level < 5 ? 0 : 1);
|
||||
if (p->fb < 0) p->fb = (level < 7 ? 32 : 64);
|
||||
if (p->btMode < 0) p->btMode = (p->algo == 0 ? 0 : 1);
|
||||
if (p->numHashBytes < 0) p->numHashBytes = 4;
|
||||
if (p->mc == 0) p->mc = (16 + (p->fb >> 1)) >> (p->btMode ? 0 : 1);
|
||||
if (p->numHashBytes < 0) p->numHashBytes = (p->btMode ? 4 : 5);
|
||||
if (p->mc == 0) p->mc = (16 + ((unsigned)p->fb >> 1)) >> (p->btMode ? 0 : 1);
|
||||
|
||||
if (p->numThreads < 0)
|
||||
p->numThreads =
|
||||
#ifdef COMPRESS_MF_MT
|
||||
#ifndef Z7_ST
|
||||
((p->btMode && p->algo) ? 2 : 1);
|
||||
#else
|
||||
1;
|
||||
@@ -80,48 +114,133 @@ UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps *props2)
|
||||
return props.dictSize;
|
||||
}
|
||||
|
||||
/* #define LZMA_LOG_BSR */
|
||||
/* Define it for Intel's CPU */
|
||||
|
||||
/*
|
||||
x86/x64:
|
||||
|
||||
BSR:
|
||||
IF (SRC == 0) ZF = 1, DEST is undefined;
|
||||
AMD : DEST is unchanged;
|
||||
IF (SRC != 0) ZF = 0; DEST is index of top non-zero bit
|
||||
BSR is slow in some processors
|
||||
|
||||
LZCNT:
|
||||
IF (SRC == 0) CF = 1, DEST is size_in_bits_of_register(src) (32 or 64)
|
||||
IF (SRC != 0) CF = 0, DEST = num_lead_zero_bits
|
||||
IF (DEST == 0) ZF = 1;
|
||||
|
||||
LZCNT works only in new processors starting from Haswell.
|
||||
if LZCNT is not supported by processor, then it's executed as BSR.
|
||||
LZCNT can be faster than BSR, if supported.
|
||||
*/
|
||||
|
||||
// #define LZMA_LOG_BSR
|
||||
|
||||
#if defined(MY_CPU_ARM_OR_ARM64) /* || defined(MY_CPU_X86_OR_AMD64) */
|
||||
|
||||
#if (defined(__clang__) && (__clang_major__ >= 6)) \
|
||||
|| (defined(__GNUC__) && (__GNUC__ >= 6))
|
||||
#define LZMA_LOG_BSR
|
||||
#elif defined(_MSC_VER) && (_MSC_VER >= 1300)
|
||||
// #if defined(MY_CPU_ARM_OR_ARM64)
|
||||
#define LZMA_LOG_BSR
|
||||
// #endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// #include <intrin.h>
|
||||
|
||||
#ifdef LZMA_LOG_BSR
|
||||
|
||||
#define kDicLogSizeMaxCompress 30
|
||||
#if defined(__clang__) \
|
||||
|| defined(__GNUC__)
|
||||
|
||||
#define BSR2_RET(pos, res) { unsigned long i; _BitScanReverse(&i, (pos)); res = (i + i) + ((pos >> (i - 1)) & 1); }
|
||||
/*
|
||||
C code: : (30 - __builtin_clz(x))
|
||||
gcc9/gcc10 for x64 /x86 : 30 - (bsr(x) xor 31)
|
||||
clang10 for x64 : 31 + (bsr(x) xor -32)
|
||||
*/
|
||||
|
||||
UInt32 GetPosSlot1(UInt32 pos)
|
||||
#define MY_clz(x) ((unsigned)__builtin_clz(x))
|
||||
// __lzcnt32
|
||||
// __builtin_ia32_lzcnt_u32
|
||||
|
||||
#else // #if defined(_MSC_VER)
|
||||
|
||||
#ifdef MY_CPU_ARM_OR_ARM64
|
||||
|
||||
#define MY_clz _CountLeadingZeros
|
||||
|
||||
#else // if defined(MY_CPU_X86_OR_AMD64)
|
||||
|
||||
// #define MY_clz __lzcnt // we can use lzcnt (unsupported by old CPU)
|
||||
// _BitScanReverse code is not optimal for some MSVC compilers
|
||||
#define BSR2_RET(pos, res) { unsigned long zz; _BitScanReverse(&zz, (pos)); zz--; \
|
||||
res = (zz + zz) + (pos >> zz); }
|
||||
|
||||
#endif // MY_CPU_X86_OR_AMD64
|
||||
|
||||
#endif // _MSC_VER
|
||||
|
||||
|
||||
#ifndef BSR2_RET
|
||||
|
||||
#define BSR2_RET(pos, res) { unsigned zz = 30 - MY_clz(pos); \
|
||||
res = (zz + zz) + (pos >> zz); }
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
unsigned GetPosSlot1(UInt32 pos);
|
||||
unsigned GetPosSlot1(UInt32 pos)
|
||||
{
|
||||
UInt32 res;
|
||||
unsigned res;
|
||||
BSR2_RET(pos, res);
|
||||
return res;
|
||||
}
|
||||
#define GetPosSlot2(pos, res) { BSR2_RET(pos, res); }
|
||||
#define GetPosSlot(pos, res) { if (pos < 2) res = pos; else BSR2_RET(pos, res); }
|
||||
|
||||
#else
|
||||
|
||||
#define kNumLogBits (9 + (int)sizeof(size_t) / 2)
|
||||
#else // ! LZMA_LOG_BSR
|
||||
|
||||
#define kNumLogBits (11 + sizeof(size_t) / 8 * 3)
|
||||
|
||||
#define kDicLogSizeMaxCompress ((kNumLogBits - 1) * 2 + 7)
|
||||
|
||||
void LzmaEnc_FastPosInit(Byte *g_FastPos)
|
||||
static void LzmaEnc_FastPosInit(Byte *g_FastPos)
|
||||
{
|
||||
int c = 2, slotFast;
|
||||
unsigned slot;
|
||||
g_FastPos[0] = 0;
|
||||
g_FastPos[1] = 1;
|
||||
g_FastPos += 2;
|
||||
|
||||
for (slotFast = 2; slotFast < kNumLogBits * 2; slotFast++)
|
||||
for (slot = 2; slot < kNumLogBits * 2; slot++)
|
||||
{
|
||||
UInt32 k = (1 << ((slotFast >> 1) - 1));
|
||||
UInt32 j;
|
||||
for (j = 0; j < k; j++, c++)
|
||||
g_FastPos[c] = (Byte)slotFast;
|
||||
size_t k = ((size_t)1 << ((slot >> 1) - 1));
|
||||
size_t j;
|
||||
for (j = 0; j < k; j++)
|
||||
g_FastPos[j] = (Byte)slot;
|
||||
g_FastPos += k;
|
||||
}
|
||||
}
|
||||
|
||||
#define BSR2_RET(pos, res) { UInt32 i = 6 + ((kNumLogBits - 1) & \
|
||||
/* we can use ((limit - pos) >> 31) only if (pos < ((UInt32)1 << 31)) */
|
||||
/*
|
||||
#define BSR2_RET(pos, res) { unsigned zz = 6 + ((kNumLogBits - 1) & \
|
||||
(0 - (((((UInt32)1 << (kNumLogBits + 6)) - 1) - pos) >> 31))); \
|
||||
res = p->g_FastPos[pos >> i] + (i * 2); }
|
||||
res = p->g_FastPos[pos >> zz] + (zz * 2); }
|
||||
*/
|
||||
|
||||
/*
|
||||
#define BSR2_RET(pos, res) { unsigned zz = 6 + ((kNumLogBits - 1) & \
|
||||
(0 - (((((UInt32)1 << (kNumLogBits)) - 1) - (pos >> 6)) >> 31))); \
|
||||
res = p->g_FastPos[pos >> zz] + (zz * 2); }
|
||||
*/
|
||||
|
||||
#define BSR2_RET(pos, res) { unsigned zz = (pos < (1 << (kNumLogBits + 6))) ? 6 : 6 + kNumLogBits - 1; \
|
||||
res = p->g_FastPos[pos >> zz] + (zz * 2); }
|
||||
|
||||
/*
|
||||
#define BSR2_RET(pos, res) { res = (pos < (1 << (kNumLogBits + 6))) ? \
|
||||
p->g_FastPos[pos >> 6] + 12 : \
|
||||
@@ -130,55 +249,57 @@ void LzmaEnc_FastPosInit(Byte *g_FastPos)
|
||||
|
||||
#define GetPosSlot1(pos) p->g_FastPos[pos]
|
||||
#define GetPosSlot2(pos, res) { BSR2_RET(pos, res); }
|
||||
#define GetPosSlot(pos, res) { if (pos < kNumFullDistances) res = p->g_FastPos[pos]; else BSR2_RET(pos, res); }
|
||||
#define GetPosSlot(pos, res) { if (pos < kNumFullDistances) res = p->g_FastPos[pos & (kNumFullDistances - 1)]; else BSR2_RET(pos, res); }
|
||||
|
||||
#endif
|
||||
#endif // LZMA_LOG_BSR
|
||||
|
||||
|
||||
#define LZMA_NUM_REPS 4
|
||||
|
||||
typedef unsigned CState;
|
||||
typedef UInt16 CState;
|
||||
typedef UInt16 CExtra;
|
||||
|
||||
typedef struct _COptimal
|
||||
typedef struct
|
||||
{
|
||||
UInt32 price;
|
||||
|
||||
CState state;
|
||||
int prev1IsChar;
|
||||
int prev2;
|
||||
|
||||
UInt32 posPrev2;
|
||||
UInt32 backPrev2;
|
||||
|
||||
UInt32 posPrev;
|
||||
UInt32 backPrev;
|
||||
UInt32 backs[LZMA_NUM_REPS];
|
||||
CExtra extra;
|
||||
// 0 : normal
|
||||
// 1 : LIT : MATCH
|
||||
// > 1 : MATCH (extra-1) : LIT : REP0 (len)
|
||||
UInt32 len;
|
||||
UInt32 dist;
|
||||
UInt32 reps[LZMA_NUM_REPS];
|
||||
} COptimal;
|
||||
|
||||
#define kNumOpts (1 << 12)
|
||||
|
||||
// 18.06
|
||||
#define kNumOpts (1 << 11)
|
||||
#define kPackReserve (kNumOpts * 8)
|
||||
// #define kNumOpts (1 << 12)
|
||||
// #define kPackReserve (1 + kNumOpts * 2)
|
||||
|
||||
#define kNumLenToPosStates 4
|
||||
#define kNumPosSlotBits 6
|
||||
#define kDicLogSizeMin 0
|
||||
// #define kDicLogSizeMin 0
|
||||
#define kDicLogSizeMax 32
|
||||
#define kDistTableSizeMax (kDicLogSizeMax * 2)
|
||||
|
||||
|
||||
#define kNumAlignBits 4
|
||||
#define kAlignTableSize (1 << kNumAlignBits)
|
||||
#define kAlignMask (kAlignTableSize - 1)
|
||||
|
||||
#define kStartPosModelIndex 4
|
||||
#define kEndPosModelIndex 14
|
||||
#define kNumPosModels (kEndPosModelIndex - kStartPosModelIndex)
|
||||
#define kNumFullDistances (1 << (kEndPosModelIndex >> 1))
|
||||
|
||||
#define kNumFullDistances (1 << (kEndPosModelIndex / 2))
|
||||
|
||||
#ifdef _LZMA_PROB32
|
||||
#define CLzmaProb UInt32
|
||||
typedef
|
||||
#ifdef Z7_LZMA_PROB32
|
||||
UInt32
|
||||
#else
|
||||
#define CLzmaProb UInt16
|
||||
UInt16
|
||||
#endif
|
||||
CLzmaProb;
|
||||
|
||||
#define LZMA_PB_MAX 4
|
||||
#define LZMA_LC_MAX 8
|
||||
@@ -186,270 +307,287 @@ typedef struct _COptimal
|
||||
|
||||
#define LZMA_NUM_PB_STATES_MAX (1 << LZMA_PB_MAX)
|
||||
|
||||
|
||||
#define kLenNumLowBits 3
|
||||
#define kLenNumLowSymbols (1 << kLenNumLowBits)
|
||||
#define kLenNumMidBits 3
|
||||
#define kLenNumMidSymbols (1 << kLenNumMidBits)
|
||||
#define kLenNumHighBits 8
|
||||
#define kLenNumHighSymbols (1 << kLenNumHighBits)
|
||||
|
||||
#define kLenNumSymbolsTotal (kLenNumLowSymbols + kLenNumMidSymbols + kLenNumHighSymbols)
|
||||
#define kLenNumSymbolsTotal (kLenNumLowSymbols * 2 + kLenNumHighSymbols)
|
||||
|
||||
#define LZMA_MATCH_LEN_MIN 2
|
||||
#define LZMA_MATCH_LEN_MAX (LZMA_MATCH_LEN_MIN + kLenNumSymbolsTotal - 1)
|
||||
|
||||
#define kNumStates 12
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
CLzmaProb choice;
|
||||
CLzmaProb choice2;
|
||||
CLzmaProb low[LZMA_NUM_PB_STATES_MAX << kLenNumLowBits];
|
||||
CLzmaProb mid[LZMA_NUM_PB_STATES_MAX << kLenNumMidBits];
|
||||
CLzmaProb low[LZMA_NUM_PB_STATES_MAX << (kLenNumLowBits + 1)];
|
||||
CLzmaProb high[kLenNumHighSymbols];
|
||||
} CLenEnc;
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
CLenEnc p;
|
||||
unsigned tableSize;
|
||||
UInt32 prices[LZMA_NUM_PB_STATES_MAX][kLenNumSymbolsTotal];
|
||||
UInt32 tableSize;
|
||||
UInt32 counters[LZMA_NUM_PB_STATES_MAX];
|
||||
// UInt32 prices1[LZMA_NUM_PB_STATES_MAX][kLenNumLowSymbols * 2];
|
||||
// UInt32 prices2[kLenNumSymbolsTotal];
|
||||
} CLenPriceEnc;
|
||||
|
||||
typedef struct _CRangeEnc
|
||||
#define GET_PRICE_LEN(p, posState, len) \
|
||||
((p)->prices[posState][(size_t)(len) - LZMA_MATCH_LEN_MIN])
|
||||
|
||||
/*
|
||||
#define GET_PRICE_LEN(p, posState, len) \
|
||||
((p)->prices2[(size_t)(len) - 2] + ((p)->prices1[posState][((len) - 2) & (kLenNumLowSymbols * 2 - 1)] & (((len) - 2 - kLenNumLowSymbols * 2) >> 9)))
|
||||
*/
|
||||
|
||||
typedef struct
|
||||
{
|
||||
UInt32 range;
|
||||
Byte cache;
|
||||
unsigned cache;
|
||||
UInt64 low;
|
||||
UInt64 cacheSize;
|
||||
Byte *buf;
|
||||
Byte *bufLim;
|
||||
Byte *bufBase;
|
||||
ISeqOutStream *outStream;
|
||||
ISeqOutStreamPtr outStream;
|
||||
UInt64 processed;
|
||||
SRes res;
|
||||
} CRangeEnc;
|
||||
|
||||
typedef struct _CSeqInStreamBuf
|
||||
{
|
||||
ISeqInStream funcTable;
|
||||
const Byte *data;
|
||||
SizeT rem;
|
||||
} CSeqInStreamBuf;
|
||||
|
||||
static SRes MyRead(void *pp, void *data, size_t *size)
|
||||
{
|
||||
size_t curSize = *size;
|
||||
CSeqInStreamBuf *p = (CSeqInStreamBuf *)pp;
|
||||
if (p->rem < curSize)
|
||||
curSize = p->rem;
|
||||
memcpy(data, p->data, curSize);
|
||||
p->rem -= curSize;
|
||||
p->data += curSize;
|
||||
*size = curSize;
|
||||
return SZ_OK;
|
||||
}
|
||||
|
||||
typedef struct
|
||||
{
|
||||
CLzmaProb *litProbs;
|
||||
|
||||
CLzmaProb isMatch[kNumStates][LZMA_NUM_PB_STATES_MAX];
|
||||
unsigned state;
|
||||
UInt32 reps[LZMA_NUM_REPS];
|
||||
|
||||
CLzmaProb posAlignEncoder[1 << kNumAlignBits];
|
||||
CLzmaProb isRep[kNumStates];
|
||||
CLzmaProb isRepG0[kNumStates];
|
||||
CLzmaProb isRepG1[kNumStates];
|
||||
CLzmaProb isRepG2[kNumStates];
|
||||
CLzmaProb isMatch[kNumStates][LZMA_NUM_PB_STATES_MAX];
|
||||
CLzmaProb isRep0Long[kNumStates][LZMA_NUM_PB_STATES_MAX];
|
||||
|
||||
CLzmaProb posSlotEncoder[kNumLenToPosStates][1 << kNumPosSlotBits];
|
||||
CLzmaProb posEncoders[kNumFullDistances - kEndPosModelIndex];
|
||||
CLzmaProb posAlignEncoder[1 << kNumAlignBits];
|
||||
CLzmaProb posEncoders[kNumFullDistances];
|
||||
|
||||
CLenPriceEnc lenEnc;
|
||||
CLenPriceEnc repLenEnc;
|
||||
CLenEnc lenProbs;
|
||||
CLenEnc repLenProbs;
|
||||
|
||||
UInt32 reps[LZMA_NUM_REPS];
|
||||
UInt32 state;
|
||||
} CSaveState;
|
||||
|
||||
typedef struct _CLzmaEnc
|
||||
|
||||
typedef UInt32 CProbPrice;
|
||||
|
||||
|
||||
struct CLzmaEnc
|
||||
{
|
||||
IMatchFinder matchFinder;
|
||||
void *matchFinderObj;
|
||||
IMatchFinder2 matchFinder;
|
||||
|
||||
#ifdef COMPRESS_MF_MT
|
||||
Bool mtMode;
|
||||
unsigned optCur;
|
||||
unsigned optEnd;
|
||||
|
||||
unsigned longestMatchLen;
|
||||
unsigned numPairs;
|
||||
UInt32 numAvail;
|
||||
|
||||
unsigned state;
|
||||
unsigned numFastBytes;
|
||||
unsigned additionalOffset;
|
||||
UInt32 reps[LZMA_NUM_REPS];
|
||||
unsigned lpMask, pbMask;
|
||||
CLzmaProb *litProbs;
|
||||
CRangeEnc rc;
|
||||
|
||||
UInt32 backRes;
|
||||
|
||||
unsigned lc, lp, pb;
|
||||
unsigned lclp;
|
||||
|
||||
BoolInt fastMode;
|
||||
BoolInt writeEndMark;
|
||||
BoolInt finished;
|
||||
BoolInt multiThread;
|
||||
BoolInt needInit;
|
||||
// BoolInt _maxMode;
|
||||
|
||||
UInt64 nowPos64;
|
||||
|
||||
unsigned matchPriceCount;
|
||||
// unsigned alignPriceCount;
|
||||
int repLenEncCounter;
|
||||
|
||||
unsigned distTableSize;
|
||||
|
||||
UInt32 dictSize;
|
||||
SRes result;
|
||||
|
||||
#ifndef Z7_ST
|
||||
BoolInt mtMode;
|
||||
// begin of CMatchFinderMt is used in LZ thread
|
||||
CMatchFinderMt matchFinderMt;
|
||||
// end of CMatchFinderMt is used in BT and HASH threads
|
||||
// #else
|
||||
// CMatchFinder matchFinderBase;
|
||||
#endif
|
||||
|
||||
CMatchFinder matchFinderBase;
|
||||
|
||||
#ifdef COMPRESS_MF_MT
|
||||
|
||||
// we suppose that we have 8-bytes alignment after CMatchFinder
|
||||
|
||||
#ifndef Z7_ST
|
||||
Byte pad[128];
|
||||
#endif
|
||||
|
||||
UInt32 optimumEndIndex;
|
||||
UInt32 optimumCurrentIndex;
|
||||
// LZ thread
|
||||
CProbPrice ProbPrices[kBitModelTotal >> kNumMoveReducingBits];
|
||||
|
||||
UInt32 longestMatchLength;
|
||||
UInt32 numPairs;
|
||||
UInt32 numAvail;
|
||||
COptimal opt[kNumOpts];
|
||||
// we want {len , dist} pairs to be 8-bytes aligned in matches array
|
||||
UInt32 matches[LZMA_MATCH_LEN_MAX * 2 + 2];
|
||||
|
||||
// we want 8-bytes alignment here
|
||||
UInt32 alignPrices[kAlignTableSize];
|
||||
UInt32 posSlotPrices[kNumLenToPosStates][kDistTableSizeMax];
|
||||
UInt32 distancesPrices[kNumLenToPosStates][kNumFullDistances];
|
||||
|
||||
CLzmaProb posAlignEncoder[1 << kNumAlignBits];
|
||||
CLzmaProb isRep[kNumStates];
|
||||
CLzmaProb isRepG0[kNumStates];
|
||||
CLzmaProb isRepG1[kNumStates];
|
||||
CLzmaProb isRepG2[kNumStates];
|
||||
CLzmaProb isMatch[kNumStates][LZMA_NUM_PB_STATES_MAX];
|
||||
CLzmaProb isRep0Long[kNumStates][LZMA_NUM_PB_STATES_MAX];
|
||||
CLzmaProb posSlotEncoder[kNumLenToPosStates][1 << kNumPosSlotBits];
|
||||
CLzmaProb posEncoders[kNumFullDistances];
|
||||
|
||||
CLenEnc lenProbs;
|
||||
CLenEnc repLenProbs;
|
||||
|
||||
#ifndef LZMA_LOG_BSR
|
||||
Byte g_FastPos[1 << kNumLogBits];
|
||||
#endif
|
||||
|
||||
UInt32 ProbPrices[kBitModelTotal >> kNumMoveReducingBits];
|
||||
UInt32 matches[LZMA_MATCH_LEN_MAX * 2 + 2 + 1];
|
||||
UInt32 numFastBytes;
|
||||
UInt32 additionalOffset;
|
||||
UInt32 reps[LZMA_NUM_REPS];
|
||||
UInt32 state;
|
||||
|
||||
UInt32 posSlotPrices[kNumLenToPosStates][kDistTableSizeMax];
|
||||
UInt32 distancesPrices[kNumLenToPosStates][kNumFullDistances];
|
||||
UInt32 alignPrices[kAlignTableSize];
|
||||
UInt32 alignPriceCount;
|
||||
|
||||
UInt32 distTableSize;
|
||||
|
||||
unsigned lc, lp, pb;
|
||||
unsigned lpMask, pbMask;
|
||||
|
||||
CLzmaProb *litProbs;
|
||||
|
||||
CLzmaProb isMatch[kNumStates][LZMA_NUM_PB_STATES_MAX];
|
||||
CLzmaProb isRep[kNumStates];
|
||||
CLzmaProb isRepG0[kNumStates];
|
||||
CLzmaProb isRepG1[kNumStates];
|
||||
CLzmaProb isRepG2[kNumStates];
|
||||
CLzmaProb isRep0Long[kNumStates][LZMA_NUM_PB_STATES_MAX];
|
||||
|
||||
CLzmaProb posSlotEncoder[kNumLenToPosStates][1 << kNumPosSlotBits];
|
||||
CLzmaProb posEncoders[kNumFullDistances - kEndPosModelIndex];
|
||||
CLzmaProb posAlignEncoder[1 << kNumAlignBits];
|
||||
|
||||
CLenPriceEnc lenEnc;
|
||||
CLenPriceEnc repLenEnc;
|
||||
|
||||
unsigned lclp;
|
||||
|
||||
Bool fastMode;
|
||||
|
||||
CRangeEnc rc;
|
||||
|
||||
Bool writeEndMark;
|
||||
UInt64 nowPos64;
|
||||
UInt32 matchPriceCount;
|
||||
Bool finished;
|
||||
Bool multiThread;
|
||||
|
||||
SRes result;
|
||||
UInt32 dictSize;
|
||||
UInt32 matchFinderCycles;
|
||||
|
||||
ISeqInStream *inStream;
|
||||
CSeqInStreamBuf seqBufInStream;
|
||||
COptimal opt[kNumOpts];
|
||||
|
||||
CSaveState saveState;
|
||||
} CLzmaEnc;
|
||||
|
||||
void LzmaEnc_SaveState(CLzmaEncHandle pp)
|
||||
// BoolInt mf_Failure;
|
||||
#ifndef Z7_ST
|
||||
Byte pad2[128];
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
#define MFB (p->matchFinderBase)
|
||||
/*
|
||||
#ifndef Z7_ST
|
||||
#define MFB (p->matchFinderMt.MatchFinder)
|
||||
#endif
|
||||
*/
|
||||
|
||||
// #define GET_CLzmaEnc_p CLzmaEnc *p = (CLzmaEnc*)(void *)p;
|
||||
// #define GET_const_CLzmaEnc_p const CLzmaEnc *p = (const CLzmaEnc*)(const void *)p;
|
||||
|
||||
#define COPY_ARR(dest, src, arr) memcpy((dest)->arr, (src)->arr, sizeof((src)->arr));
|
||||
|
||||
#define COPY_LZMA_ENC_STATE(d, s, p) \
|
||||
(d)->state = (s)->state; \
|
||||
COPY_ARR(d, s, reps) \
|
||||
COPY_ARR(d, s, posAlignEncoder) \
|
||||
COPY_ARR(d, s, isRep) \
|
||||
COPY_ARR(d, s, isRepG0) \
|
||||
COPY_ARR(d, s, isRepG1) \
|
||||
COPY_ARR(d, s, isRepG2) \
|
||||
COPY_ARR(d, s, isMatch) \
|
||||
COPY_ARR(d, s, isRep0Long) \
|
||||
COPY_ARR(d, s, posSlotEncoder) \
|
||||
COPY_ARR(d, s, posEncoders) \
|
||||
(d)->lenProbs = (s)->lenProbs; \
|
||||
(d)->repLenProbs = (s)->repLenProbs; \
|
||||
memcpy((d)->litProbs, (s)->litProbs, ((UInt32)0x300 << (p)->lclp) * sizeof(CLzmaProb));
|
||||
|
||||
void LzmaEnc_SaveState(CLzmaEncHandle p)
|
||||
{
|
||||
CLzmaEnc *p = (CLzmaEnc *)pp;
|
||||
CSaveState *dest = &p->saveState;
|
||||
int i;
|
||||
dest->lenEnc = p->lenEnc;
|
||||
dest->repLenEnc = p->repLenEnc;
|
||||
dest->state = p->state;
|
||||
|
||||
for (i = 0; i < kNumStates; i++)
|
||||
{
|
||||
memcpy(dest->isMatch[i], p->isMatch[i], sizeof(p->isMatch[i]));
|
||||
memcpy(dest->isRep0Long[i], p->isRep0Long[i], sizeof(p->isRep0Long[i]));
|
||||
}
|
||||
for (i = 0; i < kNumLenToPosStates; i++)
|
||||
memcpy(dest->posSlotEncoder[i], p->posSlotEncoder[i], sizeof(p->posSlotEncoder[i]));
|
||||
memcpy(dest->isRep, p->isRep, sizeof(p->isRep));
|
||||
memcpy(dest->isRepG0, p->isRepG0, sizeof(p->isRepG0));
|
||||
memcpy(dest->isRepG1, p->isRepG1, sizeof(p->isRepG1));
|
||||
memcpy(dest->isRepG2, p->isRepG2, sizeof(p->isRepG2));
|
||||
memcpy(dest->posEncoders, p->posEncoders, sizeof(p->posEncoders));
|
||||
memcpy(dest->posAlignEncoder, p->posAlignEncoder, sizeof(p->posAlignEncoder));
|
||||
memcpy(dest->reps, p->reps, sizeof(p->reps));
|
||||
memcpy(dest->litProbs, p->litProbs, (0x300 << p->lclp) * sizeof(CLzmaProb));
|
||||
// GET_CLzmaEnc_p
|
||||
CSaveState *v = &p->saveState;
|
||||
COPY_LZMA_ENC_STATE(v, p, p)
|
||||
}
|
||||
|
||||
void LzmaEnc_RestoreState(CLzmaEncHandle pp)
|
||||
void LzmaEnc_RestoreState(CLzmaEncHandle p)
|
||||
{
|
||||
CLzmaEnc *dest = (CLzmaEnc *)pp;
|
||||
const CSaveState *p = &dest->saveState;
|
||||
int i;
|
||||
dest->lenEnc = p->lenEnc;
|
||||
dest->repLenEnc = p->repLenEnc;
|
||||
dest->state = p->state;
|
||||
|
||||
for (i = 0; i < kNumStates; i++)
|
||||
{
|
||||
memcpy(dest->isMatch[i], p->isMatch[i], sizeof(p->isMatch[i]));
|
||||
memcpy(dest->isRep0Long[i], p->isRep0Long[i], sizeof(p->isRep0Long[i]));
|
||||
}
|
||||
for (i = 0; i < kNumLenToPosStates; i++)
|
||||
memcpy(dest->posSlotEncoder[i], p->posSlotEncoder[i], sizeof(p->posSlotEncoder[i]));
|
||||
memcpy(dest->isRep, p->isRep, sizeof(p->isRep));
|
||||
memcpy(dest->isRepG0, p->isRepG0, sizeof(p->isRepG0));
|
||||
memcpy(dest->isRepG1, p->isRepG1, sizeof(p->isRepG1));
|
||||
memcpy(dest->isRepG2, p->isRepG2, sizeof(p->isRepG2));
|
||||
memcpy(dest->posEncoders, p->posEncoders, sizeof(p->posEncoders));
|
||||
memcpy(dest->posAlignEncoder, p->posAlignEncoder, sizeof(p->posAlignEncoder));
|
||||
memcpy(dest->reps, p->reps, sizeof(p->reps));
|
||||
memcpy(dest->litProbs, p->litProbs, (0x300 << dest->lclp) * sizeof(CLzmaProb));
|
||||
// GET_CLzmaEnc_p
|
||||
const CSaveState *v = &p->saveState;
|
||||
COPY_LZMA_ENC_STATE(p, v, p)
|
||||
}
|
||||
|
||||
SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2)
|
||||
|
||||
Z7_NO_INLINE
|
||||
SRes LzmaEnc_SetProps(CLzmaEncHandle p, const CLzmaEncProps *props2)
|
||||
{
|
||||
CLzmaEnc *p = (CLzmaEnc *)pp;
|
||||
// GET_CLzmaEnc_p
|
||||
CLzmaEncProps props = *props2;
|
||||
LzmaEncProps_Normalize(&props);
|
||||
|
||||
if (props.lc > LZMA_LC_MAX || props.lp > LZMA_LP_MAX || props.pb > LZMA_PB_MAX ||
|
||||
props.dictSize > (1 << kDicLogSizeMaxCompress) || props.dictSize > (1 << 30))
|
||||
if (props.lc > LZMA_LC_MAX
|
||||
|| props.lp > LZMA_LP_MAX
|
||||
|| props.pb > LZMA_PB_MAX)
|
||||
return SZ_ERROR_PARAM;
|
||||
p->dictSize = props.dictSize;
|
||||
p->matchFinderCycles = props.mc;
|
||||
|
||||
|
||||
if (props.dictSize > kLzmaMaxHistorySize)
|
||||
props.dictSize = kLzmaMaxHistorySize;
|
||||
|
||||
#ifndef LZMA_LOG_BSR
|
||||
{
|
||||
unsigned fb = props.fb;
|
||||
const UInt64 dict64 = props.dictSize;
|
||||
if (dict64 > ((UInt64)1 << kDicLogSizeMaxCompress))
|
||||
return SZ_ERROR_PARAM;
|
||||
}
|
||||
#endif
|
||||
|
||||
p->dictSize = props.dictSize;
|
||||
{
|
||||
unsigned fb = (unsigned)props.fb;
|
||||
if (fb < 5)
|
||||
fb = 5;
|
||||
if (fb > LZMA_MATCH_LEN_MAX)
|
||||
fb = LZMA_MATCH_LEN_MAX;
|
||||
p->numFastBytes = fb;
|
||||
}
|
||||
p->lc = props.lc;
|
||||
p->lp = props.lp;
|
||||
p->pb = props.pb;
|
||||
p->lc = (unsigned)props.lc;
|
||||
p->lp = (unsigned)props.lp;
|
||||
p->pb = (unsigned)props.pb;
|
||||
p->fastMode = (props.algo == 0);
|
||||
p->matchFinderBase.btMode = props.btMode;
|
||||
// p->_maxMode = True;
|
||||
MFB.btMode = (Byte)(props.btMode ? 1 : 0);
|
||||
// MFB.btMode = (Byte)(props.btMode);
|
||||
{
|
||||
UInt32 numHashBytes = 4;
|
||||
unsigned numHashBytes = 4;
|
||||
if (props.btMode)
|
||||
{
|
||||
if (props.numHashBytes < 2)
|
||||
numHashBytes = 2;
|
||||
else if (props.numHashBytes < 4)
|
||||
numHashBytes = props.numHashBytes;
|
||||
if (props.numHashBytes < 2) numHashBytes = 2;
|
||||
else if (props.numHashBytes < 4) numHashBytes = (unsigned)props.numHashBytes;
|
||||
}
|
||||
p->matchFinderBase.numHashBytes = numHashBytes;
|
||||
if (props.numHashBytes >= 5) numHashBytes = 5;
|
||||
|
||||
MFB.numHashBytes = numHashBytes;
|
||||
// MFB.numHashBytes_Min = 2;
|
||||
MFB.numHashOutBits = (Byte)props.numHashOutBits;
|
||||
}
|
||||
|
||||
p->matchFinderBase.cutValue = props.mc;
|
||||
MFB.cutValue = props.mc;
|
||||
|
||||
p->writeEndMark = props.writeEndMark;
|
||||
p->writeEndMark = (BoolInt)props.writeEndMark;
|
||||
|
||||
#ifdef COMPRESS_MF_MT
|
||||
#ifndef Z7_ST
|
||||
/*
|
||||
if (newMultiThread != _multiThread)
|
||||
{
|
||||
@@ -458,56 +596,73 @@ SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2)
|
||||
}
|
||||
*/
|
||||
p->multiThread = (props.numThreads > 1);
|
||||
p->matchFinderMt.btSync.affinity =
|
||||
p->matchFinderMt.hashSync.affinity = props.affinity;
|
||||
#endif
|
||||
|
||||
return SZ_OK;
|
||||
}
|
||||
|
||||
static const int kLiteralNextStates[kNumStates] = {0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5};
|
||||
static const int kMatchNextStates[kNumStates] = {7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10};
|
||||
static const int kRepNextStates[kNumStates] = {8, 8, 8, 8, 8, 8, 8, 11, 11, 11, 11, 11};
|
||||
static const int kShortRepNextStates[kNumStates]= {9, 9, 9, 9, 9, 9, 9, 11, 11, 11, 11, 11};
|
||||
|
||||
#define IsCharState(s) ((s) < 7)
|
||||
void LzmaEnc_SetDataSize(CLzmaEncHandle p, UInt64 expectedDataSiize)
|
||||
{
|
||||
// GET_CLzmaEnc_p
|
||||
MFB.expectedDataSize = expectedDataSiize;
|
||||
}
|
||||
|
||||
|
||||
#define kState_Start 0
|
||||
#define kState_LitAfterMatch 4
|
||||
#define kState_LitAfterRep 5
|
||||
#define kState_MatchAfterLit 7
|
||||
#define kState_RepAfterLit 8
|
||||
|
||||
static const Byte kLiteralNextStates[kNumStates] = {0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5};
|
||||
static const Byte kMatchNextStates[kNumStates] = {7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10};
|
||||
static const Byte kRepNextStates[kNumStates] = {8, 8, 8, 8, 8, 8, 8, 11, 11, 11, 11, 11};
|
||||
static const Byte kShortRepNextStates[kNumStates]= {9, 9, 9, 9, 9, 9, 9, 11, 11, 11, 11, 11};
|
||||
|
||||
#define IsLitState(s) ((s) < 7)
|
||||
#define GetLenToPosState2(len) (((len) < kNumLenToPosStates - 1) ? (len) : kNumLenToPosStates - 1)
|
||||
#define GetLenToPosState(len) (((len) < kNumLenToPosStates + 1) ? (len) - 2 : kNumLenToPosStates - 1)
|
||||
|
||||
#define kInfinityPrice (1 << 30)
|
||||
|
||||
static void RangeEnc_Construct(CRangeEnc *p)
|
||||
{
|
||||
p->outStream = 0;
|
||||
p->bufBase = 0;
|
||||
p->outStream = NULL;
|
||||
p->bufBase = NULL;
|
||||
}
|
||||
|
||||
#define RangeEnc_GetProcessed(p) ((p)->processed + ((p)->buf - (p)->bufBase) + (p)->cacheSize)
|
||||
#define RangeEnc_GetProcessed(p) ( (p)->processed + (size_t)((p)->buf - (p)->bufBase) + (p)->cacheSize)
|
||||
#define RangeEnc_GetProcessed_sizet(p) ((size_t)(p)->processed + (size_t)((p)->buf - (p)->bufBase) + (size_t)(p)->cacheSize)
|
||||
|
||||
#define RC_BUF_SIZE (1 << 16)
|
||||
static int RangeEnc_Alloc(CRangeEnc *p, ISzAlloc *alloc)
|
||||
|
||||
static int RangeEnc_Alloc(CRangeEnc *p, ISzAllocPtr alloc)
|
||||
{
|
||||
if (p->bufBase == 0)
|
||||
if (!p->bufBase)
|
||||
{
|
||||
p->bufBase = (Byte *)alloc->Alloc(alloc, RC_BUF_SIZE);
|
||||
if (p->bufBase == 0)
|
||||
p->bufBase = (Byte *)ISzAlloc_Alloc(alloc, RC_BUF_SIZE);
|
||||
if (!p->bufBase)
|
||||
return 0;
|
||||
p->bufLim = p->bufBase + RC_BUF_SIZE;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void RangeEnc_Free(CRangeEnc *p, ISzAlloc *alloc)
|
||||
static void RangeEnc_Free(CRangeEnc *p, ISzAllocPtr alloc)
|
||||
{
|
||||
alloc->Free(alloc, p->bufBase);
|
||||
p->bufBase = 0;
|
||||
ISzAlloc_Free(alloc, p->bufBase);
|
||||
p->bufBase = NULL;
|
||||
}
|
||||
|
||||
static void RangeEnc_Init(CRangeEnc *p)
|
||||
{
|
||||
/* Stream.Init(); */
|
||||
p->low = 0;
|
||||
p->range = 0xFFFFFFFF;
|
||||
p->cacheSize = 1;
|
||||
p->cache = 0;
|
||||
p->low = 0;
|
||||
p->cacheSize = 0;
|
||||
|
||||
p->buf = p->bufBase;
|
||||
|
||||
@@ -515,37 +670,48 @@ static void RangeEnc_Init(CRangeEnc *p)
|
||||
p->res = SZ_OK;
|
||||
}
|
||||
|
||||
static void RangeEnc_FlushStream(CRangeEnc *p)
|
||||
Z7_NO_INLINE static void RangeEnc_FlushStream(CRangeEnc *p)
|
||||
{
|
||||
size_t num;
|
||||
if (p->res != SZ_OK)
|
||||
return;
|
||||
num = p->buf - p->bufBase;
|
||||
if (num != p->outStream->Write(p->outStream, p->bufBase, num))
|
||||
p->res = SZ_ERROR_WRITE;
|
||||
const size_t num = (size_t)(p->buf - p->bufBase);
|
||||
if (p->res == SZ_OK)
|
||||
{
|
||||
if (num != ISeqOutStream_Write(p->outStream, p->bufBase, num))
|
||||
p->res = SZ_ERROR_WRITE;
|
||||
}
|
||||
p->processed += num;
|
||||
p->buf = p->bufBase;
|
||||
}
|
||||
|
||||
static void MY_FAST_CALL RangeEnc_ShiftLow(CRangeEnc *p)
|
||||
Z7_NO_INLINE static void Z7_FASTCALL RangeEnc_ShiftLow(CRangeEnc *p)
|
||||
{
|
||||
if ((UInt32)p->low < (UInt32)0xFF000000 || (int)(p->low >> 32) != 0)
|
||||
UInt32 low = (UInt32)p->low;
|
||||
unsigned high = (unsigned)(p->low >> 32);
|
||||
p->low = (UInt32)(low << 8);
|
||||
if (low < (UInt32)0xFF000000 || high != 0)
|
||||
{
|
||||
Byte temp = p->cache;
|
||||
do
|
||||
{
|
||||
Byte *buf = p->buf;
|
||||
*buf++ = (Byte)(temp + (Byte)(p->low >> 32));
|
||||
*buf++ = (Byte)(p->cache + high);
|
||||
p->cache = (unsigned)(low >> 24);
|
||||
p->buf = buf;
|
||||
if (buf == p->bufLim)
|
||||
RangeEnc_FlushStream(p);
|
||||
temp = 0xFF;
|
||||
if (p->cacheSize == 0)
|
||||
return;
|
||||
}
|
||||
high += 0xFF;
|
||||
for (;;)
|
||||
{
|
||||
Byte *buf = p->buf;
|
||||
*buf++ = (Byte)(high);
|
||||
p->buf = buf;
|
||||
if (buf == p->bufLim)
|
||||
RangeEnc_FlushStream(p);
|
||||
if (--p->cacheSize == 0)
|
||||
return;
|
||||
}
|
||||
while (--p->cacheSize != 0);
|
||||
p->cache = (Byte)((UInt32)p->low >> 24);
|
||||
}
|
||||
p->cacheSize++;
|
||||
p->low = (UInt32)p->low << 8;
|
||||
}
|
||||
|
||||
static void RangeEnc_FlushData(CRangeEnc *p)
|
||||
@@ -555,78 +721,121 @@ static void RangeEnc_FlushData(CRangeEnc *p)
|
||||
RangeEnc_ShiftLow(p);
|
||||
}
|
||||
|
||||
static void RangeEnc_EncodeDirectBits(CRangeEnc *p, UInt32 value, int numBits)
|
||||
#define RC_NORM(p) if (range < kTopValue) { range <<= 8; RangeEnc_ShiftLow(p); }
|
||||
|
||||
#define RC_BIT_PRE(p, prob) \
|
||||
ttt = *(prob); \
|
||||
newBound = (range >> kNumBitModelTotalBits) * ttt;
|
||||
|
||||
// #define Z7_LZMA_ENC_USE_BRANCH
|
||||
|
||||
#ifdef Z7_LZMA_ENC_USE_BRANCH
|
||||
|
||||
#define RC_BIT(p, prob, bit) { \
|
||||
RC_BIT_PRE(p, prob) \
|
||||
if (bit == 0) { range = newBound; ttt += (kBitModelTotal - ttt) >> kNumMoveBits; } \
|
||||
else { (p)->low += newBound; range -= newBound; ttt -= ttt >> kNumMoveBits; } \
|
||||
*(prob) = (CLzmaProb)ttt; \
|
||||
RC_NORM(p) \
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#define RC_BIT(p, prob, bit) { \
|
||||
UInt32 mask; \
|
||||
RC_BIT_PRE(p, prob) \
|
||||
mask = 0 - (UInt32)bit; \
|
||||
range &= mask; \
|
||||
mask &= newBound; \
|
||||
range -= mask; \
|
||||
(p)->low += mask; \
|
||||
mask = (UInt32)bit - 1; \
|
||||
range += newBound & mask; \
|
||||
mask &= (kBitModelTotal - ((1 << kNumMoveBits) - 1)); \
|
||||
mask += ((1 << kNumMoveBits) - 1); \
|
||||
ttt += (UInt32)((Int32)(mask - ttt) >> kNumMoveBits); \
|
||||
*(prob) = (CLzmaProb)ttt; \
|
||||
RC_NORM(p) \
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
#define RC_BIT_0_BASE(p, prob) \
|
||||
range = newBound; *(prob) = (CLzmaProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits));
|
||||
|
||||
#define RC_BIT_1_BASE(p, prob) \
|
||||
range -= newBound; (p)->low += newBound; *(prob) = (CLzmaProb)(ttt - (ttt >> kNumMoveBits)); \
|
||||
|
||||
#define RC_BIT_0(p, prob) \
|
||||
RC_BIT_0_BASE(p, prob) \
|
||||
RC_NORM(p)
|
||||
|
||||
#define RC_BIT_1(p, prob) \
|
||||
RC_BIT_1_BASE(p, prob) \
|
||||
RC_NORM(p)
|
||||
|
||||
static void RangeEnc_EncodeBit_0(CRangeEnc *p, CLzmaProb *prob)
|
||||
{
|
||||
UInt32 range, ttt, newBound;
|
||||
range = p->range;
|
||||
RC_BIT_PRE(p, prob)
|
||||
RC_BIT_0(p, prob)
|
||||
p->range = range;
|
||||
}
|
||||
|
||||
static void LitEnc_Encode(CRangeEnc *p, CLzmaProb *probs, UInt32 sym)
|
||||
{
|
||||
UInt32 range = p->range;
|
||||
sym |= 0x100;
|
||||
do
|
||||
{
|
||||
p->range >>= 1;
|
||||
p->low += p->range & (0 - ((value >> --numBits) & 1));
|
||||
if (p->range < kTopValue)
|
||||
{
|
||||
p->range <<= 8;
|
||||
RangeEnc_ShiftLow(p);
|
||||
}
|
||||
UInt32 ttt, newBound;
|
||||
// RangeEnc_EncodeBit(p, probs + (sym >> 8), (sym >> 7) & 1);
|
||||
CLzmaProb *prob = probs + (sym >> 8);
|
||||
UInt32 bit = (sym >> 7) & 1;
|
||||
sym <<= 1;
|
||||
RC_BIT(p, prob, bit)
|
||||
}
|
||||
while (numBits != 0);
|
||||
while (sym < 0x10000);
|
||||
p->range = range;
|
||||
}
|
||||
|
||||
static void RangeEnc_EncodeBit(CRangeEnc *p, CLzmaProb *prob, UInt32 symbol)
|
||||
{
|
||||
UInt32 ttt = *prob;
|
||||
UInt32 newBound = (p->range >> kNumBitModelTotalBits) * ttt;
|
||||
if (symbol == 0)
|
||||
{
|
||||
p->range = newBound;
|
||||
ttt += (kBitModelTotal - ttt) >> kNumMoveBits;
|
||||
}
|
||||
else
|
||||
{
|
||||
p->low += newBound;
|
||||
p->range -= newBound;
|
||||
ttt -= ttt >> kNumMoveBits;
|
||||
}
|
||||
*prob = (CLzmaProb)ttt;
|
||||
if (p->range < kTopValue)
|
||||
{
|
||||
p->range <<= 8;
|
||||
RangeEnc_ShiftLow(p);
|
||||
}
|
||||
}
|
||||
|
||||
static void LitEnc_Encode(CRangeEnc *p, CLzmaProb *probs, UInt32 symbol)
|
||||
{
|
||||
symbol |= 0x100;
|
||||
do
|
||||
{
|
||||
RangeEnc_EncodeBit(p, probs + (symbol >> 8), (symbol >> 7) & 1);
|
||||
symbol <<= 1;
|
||||
}
|
||||
while (symbol < 0x10000);
|
||||
}
|
||||
|
||||
static void LitEnc_EncodeMatched(CRangeEnc *p, CLzmaProb *probs, UInt32 symbol, UInt32 matchByte)
|
||||
static void LitEnc_EncodeMatched(CRangeEnc *p, CLzmaProb *probs, UInt32 sym, UInt32 matchByte)
|
||||
{
|
||||
UInt32 range = p->range;
|
||||
UInt32 offs = 0x100;
|
||||
symbol |= 0x100;
|
||||
sym |= 0x100;
|
||||
do
|
||||
{
|
||||
UInt32 ttt, newBound;
|
||||
CLzmaProb *prob;
|
||||
UInt32 bit;
|
||||
matchByte <<= 1;
|
||||
RangeEnc_EncodeBit(p, probs + (offs + (matchByte & offs) + (symbol >> 8)), (symbol >> 7) & 1);
|
||||
symbol <<= 1;
|
||||
offs &= ~(matchByte ^ symbol);
|
||||
// RangeEnc_EncodeBit(p, probs + (offs + (matchByte & offs) + (sym >> 8)), (sym >> 7) & 1);
|
||||
prob = probs + (offs + (matchByte & offs) + (sym >> 8));
|
||||
bit = (sym >> 7) & 1;
|
||||
sym <<= 1;
|
||||
offs &= ~(matchByte ^ sym);
|
||||
RC_BIT(p, prob, bit)
|
||||
}
|
||||
while (symbol < 0x10000);
|
||||
while (sym < 0x10000);
|
||||
p->range = range;
|
||||
}
|
||||
|
||||
void LzmaEnc_InitPriceTables(UInt32 *ProbPrices)
|
||||
|
||||
|
||||
static void LzmaEnc_InitPriceTables(CProbPrice *ProbPrices)
|
||||
{
|
||||
UInt32 i;
|
||||
for (i = (1 << kNumMoveReducingBits) / 2; i < kBitModelTotal; i += (1 << kNumMoveReducingBits))
|
||||
for (i = 0; i < (kBitModelTotal >> kNumMoveReducingBits); i++)
|
||||
{
|
||||
const int kCyclesBits = kNumBitPriceShiftBits;
|
||||
UInt32 w = i;
|
||||
UInt32 bitCount = 0;
|
||||
int j;
|
||||
const unsigned kCyclesBits = kNumBitPriceShiftBits;
|
||||
UInt32 w = (i << kNumMoveReducingBits) + (1 << (kNumMoveReducingBits - 1));
|
||||
unsigned bitCount = 0;
|
||||
unsigned j;
|
||||
for (j = 0; j < kCyclesBits; j++)
|
||||
{
|
||||
w = w * w;
|
||||
@@ -637,554 +846,663 @@ void LzmaEnc_InitPriceTables(UInt32 *ProbPrices)
|
||||
bitCount++;
|
||||
}
|
||||
}
|
||||
ProbPrices[i >> kNumMoveReducingBits] = ((kNumBitModelTotalBits << kCyclesBits) - 15 - bitCount);
|
||||
ProbPrices[i] = (CProbPrice)(((unsigned)kNumBitModelTotalBits << kCyclesBits) - 15 - bitCount);
|
||||
// printf("\n%3d: %5d", i, ProbPrices[i]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#define GET_PRICE(prob, symbol) \
|
||||
p->ProbPrices[((prob) ^ (((-(int)(symbol))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits];
|
||||
#define GET_PRICE(prob, bit) \
|
||||
p->ProbPrices[((prob) ^ (unsigned)(((-(int)(bit))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits]
|
||||
|
||||
#define GET_PRICEa(prob, symbol) \
|
||||
ProbPrices[((prob) ^ ((-((int)(symbol))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits];
|
||||
#define GET_PRICEa(prob, bit) \
|
||||
ProbPrices[((prob) ^ (unsigned)((-((int)(bit))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits]
|
||||
|
||||
#define GET_PRICE_0(prob) p->ProbPrices[(prob) >> kNumMoveReducingBits]
|
||||
#define GET_PRICE_1(prob) p->ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits]
|
||||
|
||||
#define GET_PRICE_0a(prob) ProbPrices[(prob) >> kNumMoveReducingBits]
|
||||
#define GET_PRICE_1a(prob) ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits]
|
||||
#define GET_PRICEa_0(prob) ProbPrices[(prob) >> kNumMoveReducingBits]
|
||||
#define GET_PRICEa_1(prob) ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits]
|
||||
|
||||
static UInt32 LitEnc_GetPrice(const CLzmaProb *probs, UInt32 symbol, UInt32 *ProbPrices)
|
||||
|
||||
static UInt32 LitEnc_GetPrice(const CLzmaProb *probs, UInt32 sym, const CProbPrice *ProbPrices)
|
||||
{
|
||||
UInt32 price = 0;
|
||||
symbol |= 0x100;
|
||||
sym |= 0x100;
|
||||
do
|
||||
{
|
||||
price += GET_PRICEa(probs[symbol >> 8], (symbol >> 7) & 1);
|
||||
symbol <<= 1;
|
||||
unsigned bit = sym & 1;
|
||||
sym >>= 1;
|
||||
price += GET_PRICEa(probs[sym], bit);
|
||||
}
|
||||
while (symbol < 0x10000);
|
||||
while (sym >= 2);
|
||||
return price;
|
||||
}
|
||||
|
||||
static UInt32 LitEnc_GetPriceMatched(const CLzmaProb *probs, UInt32 symbol, UInt32 matchByte, UInt32 *ProbPrices)
|
||||
|
||||
static UInt32 LitEnc_Matched_GetPrice(const CLzmaProb *probs, UInt32 sym, UInt32 matchByte, const CProbPrice *ProbPrices)
|
||||
{
|
||||
UInt32 price = 0;
|
||||
UInt32 offs = 0x100;
|
||||
symbol |= 0x100;
|
||||
sym |= 0x100;
|
||||
do
|
||||
{
|
||||
matchByte <<= 1;
|
||||
price += GET_PRICEa(probs[offs + (matchByte & offs) + (symbol >> 8)], (symbol >> 7) & 1);
|
||||
symbol <<= 1;
|
||||
offs &= ~(matchByte ^ symbol);
|
||||
price += GET_PRICEa(probs[offs + (matchByte & offs) + (sym >> 8)], (sym >> 7) & 1);
|
||||
sym <<= 1;
|
||||
offs &= ~(matchByte ^ sym);
|
||||
}
|
||||
while (symbol < 0x10000);
|
||||
while (sym < 0x10000);
|
||||
return price;
|
||||
}
|
||||
|
||||
|
||||
static void RcTree_Encode(CRangeEnc *rc, CLzmaProb *probs, int numBitLevels, UInt32 symbol)
|
||||
static void RcTree_ReverseEncode(CRangeEnc *rc, CLzmaProb *probs, unsigned numBits, unsigned sym)
|
||||
{
|
||||
UInt32 m = 1;
|
||||
int i;
|
||||
for (i = numBitLevels; i != 0;)
|
||||
UInt32 range = rc->range;
|
||||
unsigned m = 1;
|
||||
do
|
||||
{
|
||||
UInt32 bit;
|
||||
i--;
|
||||
bit = (symbol >> i) & 1;
|
||||
RangeEnc_EncodeBit(rc, probs + m, bit);
|
||||
UInt32 ttt, newBound;
|
||||
unsigned bit = sym & 1;
|
||||
// RangeEnc_EncodeBit(rc, probs + m, bit);
|
||||
sym >>= 1;
|
||||
RC_BIT(rc, probs + m, bit)
|
||||
m = (m << 1) | bit;
|
||||
}
|
||||
while (--numBits);
|
||||
rc->range = range;
|
||||
}
|
||||
|
||||
static void RcTree_ReverseEncode(CRangeEnc *rc, CLzmaProb *probs, int numBitLevels, UInt32 symbol)
|
||||
{
|
||||
UInt32 m = 1;
|
||||
int i;
|
||||
for (i = 0; i < numBitLevels; i++)
|
||||
{
|
||||
UInt32 bit = symbol & 1;
|
||||
RangeEnc_EncodeBit(rc, probs + m, bit);
|
||||
m = (m << 1) | bit;
|
||||
symbol >>= 1;
|
||||
}
|
||||
}
|
||||
|
||||
static UInt32 RcTree_GetPrice(const CLzmaProb *probs, int numBitLevels, UInt32 symbol, UInt32 *ProbPrices)
|
||||
{
|
||||
UInt32 price = 0;
|
||||
symbol |= (1 << numBitLevels);
|
||||
while (symbol != 1)
|
||||
{
|
||||
price += GET_PRICEa(probs[symbol >> 1], symbol & 1);
|
||||
symbol >>= 1;
|
||||
}
|
||||
return price;
|
||||
}
|
||||
|
||||
static UInt32 RcTree_ReverseGetPrice(const CLzmaProb *probs, int numBitLevels, UInt32 symbol, UInt32 *ProbPrices)
|
||||
{
|
||||
UInt32 price = 0;
|
||||
UInt32 m = 1;
|
||||
int i;
|
||||
for (i = numBitLevels; i != 0; i--)
|
||||
{
|
||||
UInt32 bit = symbol & 1;
|
||||
symbol >>= 1;
|
||||
price += GET_PRICEa(probs[m], bit);
|
||||
m = (m << 1) | bit;
|
||||
}
|
||||
return price;
|
||||
}
|
||||
|
||||
|
||||
static void LenEnc_Init(CLenEnc *p)
|
||||
{
|
||||
unsigned i;
|
||||
p->choice = p->choice2 = kProbInitValue;
|
||||
for (i = 0; i < (LZMA_NUM_PB_STATES_MAX << kLenNumLowBits); i++)
|
||||
for (i = 0; i < (LZMA_NUM_PB_STATES_MAX << (kLenNumLowBits + 1)); i++)
|
||||
p->low[i] = kProbInitValue;
|
||||
for (i = 0; i < (LZMA_NUM_PB_STATES_MAX << kLenNumMidBits); i++)
|
||||
p->mid[i] = kProbInitValue;
|
||||
for (i = 0; i < kLenNumHighSymbols; i++)
|
||||
p->high[i] = kProbInitValue;
|
||||
}
|
||||
|
||||
static void LenEnc_Encode(CLenEnc *p, CRangeEnc *rc, UInt32 symbol, UInt32 posState)
|
||||
static void LenEnc_Encode(CLenEnc *p, CRangeEnc *rc, unsigned sym, unsigned posState)
|
||||
{
|
||||
if (symbol < kLenNumLowSymbols)
|
||||
UInt32 range, ttt, newBound;
|
||||
CLzmaProb *probs = p->low;
|
||||
range = rc->range;
|
||||
RC_BIT_PRE(rc, probs)
|
||||
if (sym >= kLenNumLowSymbols)
|
||||
{
|
||||
RangeEnc_EncodeBit(rc, &p->choice, 0);
|
||||
RcTree_Encode(rc, p->low + (posState << kLenNumLowBits), kLenNumLowBits, symbol);
|
||||
}
|
||||
else
|
||||
{
|
||||
RangeEnc_EncodeBit(rc, &p->choice, 1);
|
||||
if (symbol < kLenNumLowSymbols + kLenNumMidSymbols)
|
||||
RC_BIT_1(rc, probs)
|
||||
probs += kLenNumLowSymbols;
|
||||
RC_BIT_PRE(rc, probs)
|
||||
if (sym >= kLenNumLowSymbols * 2)
|
||||
{
|
||||
RangeEnc_EncodeBit(rc, &p->choice2, 0);
|
||||
RcTree_Encode(rc, p->mid + (posState << kLenNumMidBits), kLenNumMidBits, symbol - kLenNumLowSymbols);
|
||||
RC_BIT_1(rc, probs)
|
||||
rc->range = range;
|
||||
// RcTree_Encode(rc, p->high, kLenNumHighBits, sym - kLenNumLowSymbols * 2);
|
||||
LitEnc_Encode(rc, p->high, sym - kLenNumLowSymbols * 2);
|
||||
return;
|
||||
}
|
||||
else
|
||||
sym -= kLenNumLowSymbols;
|
||||
}
|
||||
|
||||
// RcTree_Encode(rc, probs + (posState << kLenNumLowBits), kLenNumLowBits, sym);
|
||||
{
|
||||
unsigned m;
|
||||
unsigned bit;
|
||||
RC_BIT_0(rc, probs)
|
||||
probs += (posState << (1 + kLenNumLowBits));
|
||||
bit = (sym >> 2) ; RC_BIT(rc, probs + 1, bit) m = (1 << 1) + bit;
|
||||
bit = (sym >> 1) & 1; RC_BIT(rc, probs + m, bit) m = (m << 1) + bit;
|
||||
bit = sym & 1; RC_BIT(rc, probs + m, bit)
|
||||
rc->range = range;
|
||||
}
|
||||
}
|
||||
|
||||
static void SetPrices_3(const CLzmaProb *probs, UInt32 startPrice, UInt32 *prices, const CProbPrice *ProbPrices)
|
||||
{
|
||||
unsigned i;
|
||||
for (i = 0; i < 8; i += 2)
|
||||
{
|
||||
UInt32 price = startPrice;
|
||||
UInt32 prob;
|
||||
price += GET_PRICEa(probs[1 ], (i >> 2));
|
||||
price += GET_PRICEa(probs[2 + (i >> 2)], (i >> 1) & 1);
|
||||
prob = probs[4 + (i >> 1)];
|
||||
prices[i ] = price + GET_PRICEa_0(prob);
|
||||
prices[i + 1] = price + GET_PRICEa_1(prob);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Z7_NO_INLINE static void Z7_FASTCALL LenPriceEnc_UpdateTables(
|
||||
CLenPriceEnc *p,
|
||||
unsigned numPosStates,
|
||||
const CLenEnc *enc,
|
||||
const CProbPrice *ProbPrices)
|
||||
{
|
||||
UInt32 b;
|
||||
|
||||
{
|
||||
unsigned prob = enc->low[0];
|
||||
UInt32 a, c;
|
||||
unsigned posState;
|
||||
b = GET_PRICEa_1(prob);
|
||||
a = GET_PRICEa_0(prob);
|
||||
c = b + GET_PRICEa_0(enc->low[kLenNumLowSymbols]);
|
||||
for (posState = 0; posState < numPosStates; posState++)
|
||||
{
|
||||
RangeEnc_EncodeBit(rc, &p->choice2, 1);
|
||||
RcTree_Encode(rc, p->high, kLenNumHighBits, symbol - kLenNumLowSymbols - kLenNumMidSymbols);
|
||||
UInt32 *prices = p->prices[posState];
|
||||
const CLzmaProb *probs = enc->low + (posState << (1 + kLenNumLowBits));
|
||||
SetPrices_3(probs, a, prices, ProbPrices);
|
||||
SetPrices_3(probs + kLenNumLowSymbols, c, prices + kLenNumLowSymbols, ProbPrices);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
{
|
||||
unsigned i;
|
||||
UInt32 b;
|
||||
a = GET_PRICEa_0(enc->low[0]);
|
||||
for (i = 0; i < kLenNumLowSymbols; i++)
|
||||
p->prices2[i] = a;
|
||||
a = GET_PRICEa_1(enc->low[0]);
|
||||
b = a + GET_PRICEa_0(enc->low[kLenNumLowSymbols]);
|
||||
for (i = kLenNumLowSymbols; i < kLenNumLowSymbols * 2; i++)
|
||||
p->prices2[i] = b;
|
||||
a += GET_PRICEa_1(enc->low[kLenNumLowSymbols]);
|
||||
}
|
||||
*/
|
||||
|
||||
// p->counter = numSymbols;
|
||||
// p->counter = 64;
|
||||
|
||||
{
|
||||
unsigned i = p->tableSize;
|
||||
|
||||
if (i > kLenNumLowSymbols * 2)
|
||||
{
|
||||
const CLzmaProb *probs = enc->high;
|
||||
UInt32 *prices = p->prices[0] + kLenNumLowSymbols * 2;
|
||||
i -= kLenNumLowSymbols * 2 - 1;
|
||||
i >>= 1;
|
||||
b += GET_PRICEa_1(enc->low[kLenNumLowSymbols]);
|
||||
do
|
||||
{
|
||||
/*
|
||||
p->prices2[i] = a +
|
||||
// RcTree_GetPrice(enc->high, kLenNumHighBits, i - kLenNumLowSymbols * 2, ProbPrices);
|
||||
LitEnc_GetPrice(probs, i - kLenNumLowSymbols * 2, ProbPrices);
|
||||
*/
|
||||
// UInt32 price = a + RcTree_GetPrice(probs, kLenNumHighBits - 1, sym, ProbPrices);
|
||||
unsigned sym = --i + (1 << (kLenNumHighBits - 1));
|
||||
UInt32 price = b;
|
||||
do
|
||||
{
|
||||
unsigned bit = sym & 1;
|
||||
sym >>= 1;
|
||||
price += GET_PRICEa(probs[sym], bit);
|
||||
}
|
||||
while (sym >= 2);
|
||||
|
||||
{
|
||||
unsigned prob = probs[(size_t)i + (1 << (kLenNumHighBits - 1))];
|
||||
prices[(size_t)i * 2 ] = price + GET_PRICEa_0(prob);
|
||||
prices[(size_t)i * 2 + 1] = price + GET_PRICEa_1(prob);
|
||||
}
|
||||
}
|
||||
while (i);
|
||||
|
||||
{
|
||||
unsigned posState;
|
||||
size_t num = (p->tableSize - kLenNumLowSymbols * 2) * sizeof(p->prices[0][0]);
|
||||
for (posState = 1; posState < numPosStates; posState++)
|
||||
memcpy(p->prices[posState] + kLenNumLowSymbols * 2, p->prices[0] + kLenNumLowSymbols * 2, num);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void LenEnc_SetPrices(CLenEnc *p, UInt32 posState, UInt32 numSymbols, UInt32 *prices, UInt32 *ProbPrices)
|
||||
{
|
||||
UInt32 a0 = GET_PRICE_0a(p->choice);
|
||||
UInt32 a1 = GET_PRICE_1a(p->choice);
|
||||
UInt32 b0 = a1 + GET_PRICE_0a(p->choice2);
|
||||
UInt32 b1 = a1 + GET_PRICE_1a(p->choice2);
|
||||
UInt32 i = 0;
|
||||
for (i = 0; i < kLenNumLowSymbols; i++)
|
||||
{
|
||||
if (i >= numSymbols)
|
||||
return;
|
||||
prices[i] = a0 + RcTree_GetPrice(p->low + (posState << kLenNumLowBits), kLenNumLowBits, i, ProbPrices);
|
||||
}
|
||||
for (; i < kLenNumLowSymbols + kLenNumMidSymbols; i++)
|
||||
{
|
||||
if (i >= numSymbols)
|
||||
return;
|
||||
prices[i] = b0 + RcTree_GetPrice(p->mid + (posState << kLenNumMidBits), kLenNumMidBits, i - kLenNumLowSymbols, ProbPrices);
|
||||
}
|
||||
for (; i < numSymbols; i++)
|
||||
prices[i] = b1 + RcTree_GetPrice(p->high, kLenNumHighBits, i - kLenNumLowSymbols - kLenNumMidSymbols, ProbPrices);
|
||||
}
|
||||
|
||||
static void MY_FAST_CALL LenPriceEnc_UpdateTable(CLenPriceEnc *p, UInt32 posState, UInt32 *ProbPrices)
|
||||
{
|
||||
LenEnc_SetPrices(&p->p, posState, p->tableSize, p->prices[posState], ProbPrices);
|
||||
p->counters[posState] = p->tableSize;
|
||||
}
|
||||
|
||||
static void LenPriceEnc_UpdateTables(CLenPriceEnc *p, UInt32 numPosStates, UInt32 *ProbPrices)
|
||||
{
|
||||
UInt32 posState;
|
||||
for (posState = 0; posState < numPosStates; posState++)
|
||||
LenPriceEnc_UpdateTable(p, posState, ProbPrices);
|
||||
}
|
||||
|
||||
static void LenEnc_Encode2(CLenPriceEnc *p, CRangeEnc *rc, UInt32 symbol, UInt32 posState, Bool updatePrice, UInt32 *ProbPrices)
|
||||
{
|
||||
LenEnc_Encode(&p->p, rc, symbol, posState);
|
||||
if (updatePrice)
|
||||
if (--p->counters[posState] == 0)
|
||||
LenPriceEnc_UpdateTable(p, posState, ProbPrices);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
static void MovePos(CLzmaEnc *p, UInt32 num)
|
||||
{
|
||||
/*
|
||||
#ifdef SHOW_STAT
|
||||
ttt += num;
|
||||
printf("\n MovePos %d", num);
|
||||
g_STAT_OFFSET += num;
|
||||
printf("\n MovePos %u", num);
|
||||
#endif
|
||||
if (num != 0)
|
||||
{
|
||||
p->additionalOffset += num;
|
||||
p->matchFinder.Skip(p->matchFinderObj, num);
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
#define MOVE_POS(p, num) { \
|
||||
p->additionalOffset += (num); \
|
||||
p->matchFinder.Skip(p->matchFinderObj, (UInt32)(num)); }
|
||||
|
||||
static UInt32 ReadMatchDistances(CLzmaEnc *p, UInt32 *numDistancePairsRes)
|
||||
|
||||
static unsigned ReadMatchDistances(CLzmaEnc *p, unsigned *numPairsRes)
|
||||
{
|
||||
UInt32 lenRes = 0, numPairs;
|
||||
unsigned numPairs;
|
||||
|
||||
p->additionalOffset++;
|
||||
p->numAvail = p->matchFinder.GetNumAvailableBytes(p->matchFinderObj);
|
||||
numPairs = p->matchFinder.GetMatches(p->matchFinderObj, p->matches);
|
||||
#ifdef SHOW_STAT
|
||||
printf("\n i = %d numPairs = %d ", ttt, numPairs / 2);
|
||||
ttt++;
|
||||
{
|
||||
UInt32 i;
|
||||
const UInt32 *d = p->matchFinder.GetMatches(p->matchFinderObj, p->matches);
|
||||
// if (!d) { p->mf_Failure = True; *numPairsRes = 0; return 0; }
|
||||
numPairs = (unsigned)(d - p->matches);
|
||||
}
|
||||
*numPairsRes = numPairs;
|
||||
|
||||
#ifdef SHOW_STAT
|
||||
printf("\n i = %u numPairs = %u ", g_STAT_OFFSET, numPairs / 2);
|
||||
g_STAT_OFFSET++;
|
||||
{
|
||||
unsigned i;
|
||||
for (i = 0; i < numPairs; i += 2)
|
||||
printf("%2d %6d | ", p->matches[i], p->matches[i + 1]);
|
||||
printf("%2u %6u | ", p->matches[i], p->matches[i + 1]);
|
||||
}
|
||||
#endif
|
||||
if (numPairs > 0)
|
||||
|
||||
if (numPairs == 0)
|
||||
return 0;
|
||||
{
|
||||
lenRes = p->matches[numPairs - 2];
|
||||
if (lenRes == p->numFastBytes)
|
||||
const unsigned len = p->matches[(size_t)numPairs - 2];
|
||||
if (len != p->numFastBytes)
|
||||
return len;
|
||||
{
|
||||
const Byte *pby = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1;
|
||||
UInt32 distance = p->matches[numPairs - 1] + 1;
|
||||
UInt32 numAvail = p->numAvail;
|
||||
if (numAvail > LZMA_MATCH_LEN_MAX)
|
||||
numAvail = LZMA_MATCH_LEN_MAX;
|
||||
{
|
||||
const Byte *pby2 = pby - distance;
|
||||
for (; lenRes < numAvail && pby[lenRes] == pby2[lenRes]; lenRes++);
|
||||
const Byte *p1 = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1;
|
||||
const Byte *p2 = p1 + len;
|
||||
const ptrdiff_t dif = (ptrdiff_t)-1 - (ptrdiff_t)p->matches[(size_t)numPairs - 1];
|
||||
const Byte *lim = p1 + numAvail;
|
||||
for (; p2 != lim && *p2 == p2[dif]; p2++)
|
||||
{}
|
||||
return (unsigned)(p2 - p1);
|
||||
}
|
||||
}
|
||||
}
|
||||
p->additionalOffset++;
|
||||
*numDistancePairsRes = numPairs;
|
||||
return lenRes;
|
||||
}
|
||||
|
||||
#define MARK_LIT ((UInt32)(Int32)-1)
|
||||
|
||||
#define MakeAsChar(p) (p)->backPrev = (UInt32)(-1); (p)->prev1IsChar = False;
|
||||
#define MakeAsShortRep(p) (p)->backPrev = 0; (p)->prev1IsChar = False;
|
||||
#define IsShortRep(p) ((p)->backPrev == 0)
|
||||
#define MakeAs_Lit(p) { (p)->dist = MARK_LIT; (p)->extra = 0; }
|
||||
#define MakeAs_ShortRep(p) { (p)->dist = 0; (p)->extra = 0; }
|
||||
#define IsShortRep(p) ((p)->dist == 0)
|
||||
|
||||
static UInt32 GetRepLen1Price(CLzmaEnc *p, UInt32 state, UInt32 posState)
|
||||
{
|
||||
return
|
||||
GET_PRICE_0(p->isRepG0[state]) +
|
||||
GET_PRICE_0(p->isRep0Long[state][posState]);
|
||||
}
|
||||
|
||||
static UInt32 GetPureRepPrice(CLzmaEnc *p, UInt32 repIndex, UInt32 state, UInt32 posState)
|
||||
#define GetPrice_ShortRep(p, state, posState) \
|
||||
( GET_PRICE_0(p->isRepG0[state]) + GET_PRICE_0(p->isRep0Long[state][posState]))
|
||||
|
||||
#define GetPrice_Rep_0(p, state, posState) ( \
|
||||
GET_PRICE_1(p->isMatch[state][posState]) \
|
||||
+ GET_PRICE_1(p->isRep0Long[state][posState])) \
|
||||
+ GET_PRICE_1(p->isRep[state]) \
|
||||
+ GET_PRICE_0(p->isRepG0[state])
|
||||
|
||||
Z7_FORCE_INLINE
|
||||
static UInt32 GetPrice_PureRep(const CLzmaEnc *p, unsigned repIndex, size_t state, size_t posState)
|
||||
{
|
||||
UInt32 price;
|
||||
UInt32 prob = p->isRepG0[state];
|
||||
if (repIndex == 0)
|
||||
{
|
||||
price = GET_PRICE_0(p->isRepG0[state]);
|
||||
price = GET_PRICE_0(prob);
|
||||
price += GET_PRICE_1(p->isRep0Long[state][posState]);
|
||||
}
|
||||
else
|
||||
{
|
||||
price = GET_PRICE_1(p->isRepG0[state]);
|
||||
price = GET_PRICE_1(prob);
|
||||
prob = p->isRepG1[state];
|
||||
if (repIndex == 1)
|
||||
price += GET_PRICE_0(p->isRepG1[state]);
|
||||
price += GET_PRICE_0(prob);
|
||||
else
|
||||
{
|
||||
price += GET_PRICE_1(p->isRepG1[state]);
|
||||
price += GET_PRICE_1(prob);
|
||||
price += GET_PRICE(p->isRepG2[state], repIndex - 2);
|
||||
}
|
||||
}
|
||||
return price;
|
||||
}
|
||||
|
||||
static UInt32 GetRepPrice(CLzmaEnc *p, UInt32 repIndex, UInt32 len, UInt32 state, UInt32 posState)
|
||||
|
||||
static unsigned Backward(CLzmaEnc *p, unsigned cur)
|
||||
{
|
||||
return p->repLenEnc.prices[posState][len - LZMA_MATCH_LEN_MIN] +
|
||||
GetPureRepPrice(p, repIndex, state, posState);
|
||||
}
|
||||
|
||||
static UInt32 Backward(CLzmaEnc *p, UInt32 *backRes, UInt32 cur)
|
||||
{
|
||||
UInt32 posMem = p->opt[cur].posPrev;
|
||||
UInt32 backMem = p->opt[cur].backPrev;
|
||||
p->optimumEndIndex = cur;
|
||||
do
|
||||
{
|
||||
if (p->opt[cur].prev1IsChar)
|
||||
{
|
||||
MakeAsChar(&p->opt[posMem])
|
||||
p->opt[posMem].posPrev = posMem - 1;
|
||||
if (p->opt[cur].prev2)
|
||||
{
|
||||
p->opt[posMem - 1].prev1IsChar = False;
|
||||
p->opt[posMem - 1].posPrev = p->opt[cur].posPrev2;
|
||||
p->opt[posMem - 1].backPrev = p->opt[cur].backPrev2;
|
||||
}
|
||||
}
|
||||
{
|
||||
UInt32 posPrev = posMem;
|
||||
UInt32 backCur = backMem;
|
||||
|
||||
backMem = p->opt[posPrev].backPrev;
|
||||
posMem = p->opt[posPrev].posPrev;
|
||||
|
||||
p->opt[posPrev].backPrev = backCur;
|
||||
p->opt[posPrev].posPrev = cur;
|
||||
cur = posPrev;
|
||||
}
|
||||
}
|
||||
while (cur != 0);
|
||||
*backRes = p->opt[0].backPrev;
|
||||
p->optimumCurrentIndex = p->opt[0].posPrev;
|
||||
return p->optimumCurrentIndex;
|
||||
}
|
||||
|
||||
#define LIT_PROBS(pos, prevByte) (p->litProbs + ((((pos) & p->lpMask) << p->lc) + ((prevByte) >> (8 - p->lc))) * 0x300)
|
||||
|
||||
static UInt32 GetOptimum(CLzmaEnc *p, UInt32 position, UInt32 *backRes)
|
||||
{
|
||||
UInt32 numAvail, mainLen, numPairs, repMaxIndex, i, posState, lenEnd, len, cur;
|
||||
UInt32 matchPrice, repMatchPrice, normalMatchPrice;
|
||||
UInt32 reps[LZMA_NUM_REPS], repLens[LZMA_NUM_REPS];
|
||||
UInt32 *matches;
|
||||
const Byte *data;
|
||||
Byte curByte, matchByte;
|
||||
if (p->optimumEndIndex != p->optimumCurrentIndex)
|
||||
{
|
||||
const COptimal *opt = &p->opt[p->optimumCurrentIndex];
|
||||
UInt32 lenRes = opt->posPrev - p->optimumCurrentIndex;
|
||||
*backRes = opt->backPrev;
|
||||
p->optimumCurrentIndex = opt->posPrev;
|
||||
return lenRes;
|
||||
}
|
||||
p->optimumCurrentIndex = p->optimumEndIndex = 0;
|
||||
|
||||
if (p->additionalOffset == 0)
|
||||
mainLen = ReadMatchDistances(p, &numPairs);
|
||||
else
|
||||
{
|
||||
mainLen = p->longestMatchLength;
|
||||
numPairs = p->numPairs;
|
||||
}
|
||||
|
||||
numAvail = p->numAvail;
|
||||
if (numAvail < 2)
|
||||
{
|
||||
*backRes = (UInt32)(-1);
|
||||
return 1;
|
||||
}
|
||||
if (numAvail > LZMA_MATCH_LEN_MAX)
|
||||
numAvail = LZMA_MATCH_LEN_MAX;
|
||||
|
||||
data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1;
|
||||
repMaxIndex = 0;
|
||||
for (i = 0; i < LZMA_NUM_REPS; i++)
|
||||
{
|
||||
UInt32 lenTest;
|
||||
const Byte *data2;
|
||||
reps[i] = p->reps[i];
|
||||
data2 = data - (reps[i] + 1);
|
||||
if (data[0] != data2[0] || data[1] != data2[1])
|
||||
{
|
||||
repLens[i] = 0;
|
||||
continue;
|
||||
}
|
||||
for (lenTest = 2; lenTest < numAvail && data[lenTest] == data2[lenTest]; lenTest++);
|
||||
repLens[i] = lenTest;
|
||||
if (lenTest > repLens[repMaxIndex])
|
||||
repMaxIndex = i;
|
||||
}
|
||||
if (repLens[repMaxIndex] >= p->numFastBytes)
|
||||
{
|
||||
UInt32 lenRes;
|
||||
*backRes = repMaxIndex;
|
||||
lenRes = repLens[repMaxIndex];
|
||||
MovePos(p, lenRes - 1);
|
||||
return lenRes;
|
||||
}
|
||||
|
||||
matches = p->matches;
|
||||
if (mainLen >= p->numFastBytes)
|
||||
{
|
||||
*backRes = matches[numPairs - 1] + LZMA_NUM_REPS;
|
||||
MovePos(p, mainLen - 1);
|
||||
return mainLen;
|
||||
}
|
||||
curByte = *data;
|
||||
matchByte = *(data - (reps[0] + 1));
|
||||
|
||||
if (mainLen < 2 && curByte != matchByte && repLens[repMaxIndex] < 2)
|
||||
{
|
||||
*backRes = (UInt32)-1;
|
||||
return 1;
|
||||
}
|
||||
|
||||
p->opt[0].state = (CState)p->state;
|
||||
|
||||
posState = (position & p->pbMask);
|
||||
|
||||
{
|
||||
const CLzmaProb *probs = LIT_PROBS(position, *(data - 1));
|
||||
p->opt[1].price = GET_PRICE_0(p->isMatch[p->state][posState]) +
|
||||
(!IsCharState(p->state) ?
|
||||
LitEnc_GetPriceMatched(probs, curByte, matchByte, p->ProbPrices) :
|
||||
LitEnc_GetPrice(probs, curByte, p->ProbPrices));
|
||||
}
|
||||
|
||||
MakeAsChar(&p->opt[1]);
|
||||
|
||||
matchPrice = GET_PRICE_1(p->isMatch[p->state][posState]);
|
||||
repMatchPrice = matchPrice + GET_PRICE_1(p->isRep[p->state]);
|
||||
|
||||
if (matchByte == curByte)
|
||||
{
|
||||
UInt32 shortRepPrice = repMatchPrice + GetRepLen1Price(p, p->state, posState);
|
||||
if (shortRepPrice < p->opt[1].price)
|
||||
{
|
||||
p->opt[1].price = shortRepPrice;
|
||||
MakeAsShortRep(&p->opt[1]);
|
||||
}
|
||||
}
|
||||
lenEnd = ((mainLen >= repLens[repMaxIndex]) ? mainLen : repLens[repMaxIndex]);
|
||||
|
||||
if (lenEnd < 2)
|
||||
{
|
||||
*backRes = p->opt[1].backPrev;
|
||||
return 1;
|
||||
}
|
||||
|
||||
p->opt[1].posPrev = 0;
|
||||
for (i = 0; i < LZMA_NUM_REPS; i++)
|
||||
p->opt[0].backs[i] = reps[i];
|
||||
|
||||
len = lenEnd;
|
||||
do
|
||||
p->opt[len--].price = kInfinityPrice;
|
||||
while (len >= 2);
|
||||
|
||||
for (i = 0; i < LZMA_NUM_REPS; i++)
|
||||
{
|
||||
UInt32 repLen = repLens[i];
|
||||
UInt32 price;
|
||||
if (repLen < 2)
|
||||
continue;
|
||||
price = repMatchPrice + GetPureRepPrice(p, i, p->state, posState);
|
||||
do
|
||||
{
|
||||
UInt32 curAndLenPrice = price + p->repLenEnc.prices[posState][repLen - 2];
|
||||
COptimal *opt = &p->opt[repLen];
|
||||
if (curAndLenPrice < opt->price)
|
||||
{
|
||||
opt->price = curAndLenPrice;
|
||||
opt->posPrev = 0;
|
||||
opt->backPrev = i;
|
||||
opt->prev1IsChar = False;
|
||||
}
|
||||
}
|
||||
while (--repLen >= 2);
|
||||
}
|
||||
|
||||
normalMatchPrice = matchPrice + GET_PRICE_0(p->isRep[p->state]);
|
||||
|
||||
len = ((repLens[0] >= 2) ? repLens[0] + 1 : 2);
|
||||
if (len <= mainLen)
|
||||
{
|
||||
UInt32 offs = 0;
|
||||
while (len > matches[offs])
|
||||
offs += 2;
|
||||
for (; ; len++)
|
||||
{
|
||||
COptimal *opt;
|
||||
UInt32 distance = matches[offs + 1];
|
||||
|
||||
UInt32 curAndLenPrice = normalMatchPrice + p->lenEnc.prices[posState][len - LZMA_MATCH_LEN_MIN];
|
||||
UInt32 lenToPosState = GetLenToPosState(len);
|
||||
if (distance < kNumFullDistances)
|
||||
curAndLenPrice += p->distancesPrices[lenToPosState][distance];
|
||||
else
|
||||
{
|
||||
UInt32 slot;
|
||||
GetPosSlot2(distance, slot);
|
||||
curAndLenPrice += p->alignPrices[distance & kAlignMask] + p->posSlotPrices[lenToPosState][slot];
|
||||
}
|
||||
opt = &p->opt[len];
|
||||
if (curAndLenPrice < opt->price)
|
||||
{
|
||||
opt->price = curAndLenPrice;
|
||||
opt->posPrev = 0;
|
||||
opt->backPrev = distance + LZMA_NUM_REPS;
|
||||
opt->prev1IsChar = False;
|
||||
}
|
||||
if (len == matches[offs])
|
||||
{
|
||||
offs += 2;
|
||||
if (offs == numPairs)
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cur = 0;
|
||||
|
||||
#ifdef SHOW_STAT2
|
||||
if (position >= 0)
|
||||
{
|
||||
unsigned i;
|
||||
printf("\n pos = %4X", position);
|
||||
for (i = cur; i <= lenEnd; i++)
|
||||
printf("\nprice[%4X] = %d", position - cur + i, p->opt[i].price);
|
||||
}
|
||||
#endif
|
||||
unsigned wr = cur + 1;
|
||||
p->optEnd = wr;
|
||||
|
||||
for (;;)
|
||||
{
|
||||
UInt32 numAvailFull, newLen, numPairs, posPrev, state, posState, startLen;
|
||||
UInt32 curPrice, curAnd1Price, matchPrice, repMatchPrice;
|
||||
Bool nextIsChar;
|
||||
UInt32 dist = p->opt[cur].dist;
|
||||
unsigned len = (unsigned)p->opt[cur].len;
|
||||
unsigned extra = (unsigned)p->opt[cur].extra;
|
||||
cur -= len;
|
||||
|
||||
if (extra)
|
||||
{
|
||||
wr--;
|
||||
p->opt[wr].len = (UInt32)len;
|
||||
cur -= extra;
|
||||
len = extra;
|
||||
if (extra == 1)
|
||||
{
|
||||
p->opt[wr].dist = dist;
|
||||
dist = MARK_LIT;
|
||||
}
|
||||
else
|
||||
{
|
||||
p->opt[wr].dist = 0;
|
||||
len--;
|
||||
wr--;
|
||||
p->opt[wr].dist = MARK_LIT;
|
||||
p->opt[wr].len = 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (cur == 0)
|
||||
{
|
||||
p->backRes = dist;
|
||||
p->optCur = wr;
|
||||
return len;
|
||||
}
|
||||
|
||||
wr--;
|
||||
p->opt[wr].dist = dist;
|
||||
p->opt[wr].len = (UInt32)len;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
#define LIT_PROBS(pos, prevByte) \
|
||||
(p->litProbs + (UInt32)3 * (((((pos) << 8) + (prevByte)) & p->lpMask) << p->lc))
|
||||
|
||||
|
||||
static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
|
||||
{
|
||||
unsigned last, cur;
|
||||
UInt32 reps[LZMA_NUM_REPS];
|
||||
unsigned repLens[LZMA_NUM_REPS];
|
||||
UInt32 *matches;
|
||||
|
||||
{
|
||||
UInt32 numAvail;
|
||||
unsigned numPairs, mainLen, repMaxIndex, i, posState;
|
||||
UInt32 matchPrice, repMatchPrice;
|
||||
const Byte *data;
|
||||
Byte curByte, matchByte;
|
||||
|
||||
p->optCur = p->optEnd = 0;
|
||||
|
||||
if (p->additionalOffset == 0)
|
||||
mainLen = ReadMatchDistances(p, &numPairs);
|
||||
else
|
||||
{
|
||||
mainLen = p->longestMatchLen;
|
||||
numPairs = p->numPairs;
|
||||
}
|
||||
|
||||
numAvail = p->numAvail;
|
||||
if (numAvail < 2)
|
||||
{
|
||||
p->backRes = MARK_LIT;
|
||||
return 1;
|
||||
}
|
||||
if (numAvail > LZMA_MATCH_LEN_MAX)
|
||||
numAvail = LZMA_MATCH_LEN_MAX;
|
||||
|
||||
data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1;
|
||||
repMaxIndex = 0;
|
||||
|
||||
for (i = 0; i < LZMA_NUM_REPS; i++)
|
||||
{
|
||||
unsigned len;
|
||||
const Byte *data2;
|
||||
reps[i] = p->reps[i];
|
||||
data2 = data - reps[i];
|
||||
if (data[0] != data2[0] || data[1] != data2[1])
|
||||
{
|
||||
repLens[i] = 0;
|
||||
continue;
|
||||
}
|
||||
for (len = 2; len < numAvail && data[len] == data2[len]; len++)
|
||||
{}
|
||||
repLens[i] = len;
|
||||
if (len > repLens[repMaxIndex])
|
||||
repMaxIndex = i;
|
||||
if (len == LZMA_MATCH_LEN_MAX) // 21.03 : optimization
|
||||
break;
|
||||
}
|
||||
|
||||
if (repLens[repMaxIndex] >= p->numFastBytes)
|
||||
{
|
||||
unsigned len;
|
||||
p->backRes = (UInt32)repMaxIndex;
|
||||
len = repLens[repMaxIndex];
|
||||
MOVE_POS(p, len - 1)
|
||||
return len;
|
||||
}
|
||||
|
||||
matches = p->matches;
|
||||
#define MATCHES matches
|
||||
// #define MATCHES p->matches
|
||||
|
||||
if (mainLen >= p->numFastBytes)
|
||||
{
|
||||
p->backRes = MATCHES[(size_t)numPairs - 1] + LZMA_NUM_REPS;
|
||||
MOVE_POS(p, mainLen - 1)
|
||||
return mainLen;
|
||||
}
|
||||
|
||||
curByte = *data;
|
||||
matchByte = *(data - reps[0]);
|
||||
|
||||
last = repLens[repMaxIndex];
|
||||
if (last <= mainLen)
|
||||
last = mainLen;
|
||||
|
||||
if (last < 2 && curByte != matchByte)
|
||||
{
|
||||
p->backRes = MARK_LIT;
|
||||
return 1;
|
||||
}
|
||||
|
||||
p->opt[0].state = (CState)p->state;
|
||||
|
||||
posState = (position & p->pbMask);
|
||||
|
||||
{
|
||||
const CLzmaProb *probs = LIT_PROBS(position, *(data - 1));
|
||||
p->opt[1].price = GET_PRICE_0(p->isMatch[p->state][posState]) +
|
||||
(!IsLitState(p->state) ?
|
||||
LitEnc_Matched_GetPrice(probs, curByte, matchByte, p->ProbPrices) :
|
||||
LitEnc_GetPrice(probs, curByte, p->ProbPrices));
|
||||
}
|
||||
|
||||
MakeAs_Lit(&p->opt[1])
|
||||
|
||||
matchPrice = GET_PRICE_1(p->isMatch[p->state][posState]);
|
||||
repMatchPrice = matchPrice + GET_PRICE_1(p->isRep[p->state]);
|
||||
|
||||
// 18.06
|
||||
if (matchByte == curByte && repLens[0] == 0)
|
||||
{
|
||||
UInt32 shortRepPrice = repMatchPrice + GetPrice_ShortRep(p, p->state, posState);
|
||||
if (shortRepPrice < p->opt[1].price)
|
||||
{
|
||||
p->opt[1].price = shortRepPrice;
|
||||
MakeAs_ShortRep(&p->opt[1])
|
||||
}
|
||||
if (last < 2)
|
||||
{
|
||||
p->backRes = p->opt[1].dist;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
p->opt[1].len = 1;
|
||||
|
||||
p->opt[0].reps[0] = reps[0];
|
||||
p->opt[0].reps[1] = reps[1];
|
||||
p->opt[0].reps[2] = reps[2];
|
||||
p->opt[0].reps[3] = reps[3];
|
||||
|
||||
// ---------- REP ----------
|
||||
|
||||
for (i = 0; i < LZMA_NUM_REPS; i++)
|
||||
{
|
||||
unsigned repLen = repLens[i];
|
||||
UInt32 price;
|
||||
if (repLen < 2)
|
||||
continue;
|
||||
price = repMatchPrice + GetPrice_PureRep(p, i, p->state, posState);
|
||||
do
|
||||
{
|
||||
UInt32 price2 = price + GET_PRICE_LEN(&p->repLenEnc, posState, repLen);
|
||||
COptimal *opt = &p->opt[repLen];
|
||||
if (price2 < opt->price)
|
||||
{
|
||||
opt->price = price2;
|
||||
opt->len = (UInt32)repLen;
|
||||
opt->dist = (UInt32)i;
|
||||
opt->extra = 0;
|
||||
}
|
||||
}
|
||||
while (--repLen >= 2);
|
||||
}
|
||||
|
||||
|
||||
// ---------- MATCH ----------
|
||||
{
|
||||
unsigned len = repLens[0] + 1;
|
||||
if (len <= mainLen)
|
||||
{
|
||||
unsigned offs = 0;
|
||||
UInt32 normalMatchPrice = matchPrice + GET_PRICE_0(p->isRep[p->state]);
|
||||
|
||||
if (len < 2)
|
||||
len = 2;
|
||||
else
|
||||
while (len > MATCHES[offs])
|
||||
offs += 2;
|
||||
|
||||
for (; ; len++)
|
||||
{
|
||||
COptimal *opt;
|
||||
UInt32 dist = MATCHES[(size_t)offs + 1];
|
||||
UInt32 price = normalMatchPrice + GET_PRICE_LEN(&p->lenEnc, posState, len);
|
||||
unsigned lenToPosState = GetLenToPosState(len);
|
||||
|
||||
if (dist < kNumFullDistances)
|
||||
price += p->distancesPrices[lenToPosState][dist & (kNumFullDistances - 1)];
|
||||
else
|
||||
{
|
||||
unsigned slot;
|
||||
GetPosSlot2(dist, slot)
|
||||
price += p->alignPrices[dist & kAlignMask];
|
||||
price += p->posSlotPrices[lenToPosState][slot];
|
||||
}
|
||||
|
||||
opt = &p->opt[len];
|
||||
|
||||
if (price < opt->price)
|
||||
{
|
||||
opt->price = price;
|
||||
opt->len = (UInt32)len;
|
||||
opt->dist = dist + LZMA_NUM_REPS;
|
||||
opt->extra = 0;
|
||||
}
|
||||
|
||||
if (len == MATCHES[offs])
|
||||
{
|
||||
offs += 2;
|
||||
if (offs == numPairs)
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
cur = 0;
|
||||
|
||||
#ifdef SHOW_STAT2
|
||||
/* if (position >= 0) */
|
||||
{
|
||||
unsigned i;
|
||||
printf("\n pos = %4X", position);
|
||||
for (i = cur; i <= last; i++)
|
||||
printf("\nprice[%4X] = %u", position - cur + i, p->opt[i].price);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ---------- Optimal Parsing ----------
|
||||
|
||||
for (;;)
|
||||
{
|
||||
unsigned numAvail;
|
||||
UInt32 numAvailFull;
|
||||
unsigned newLen, numPairs, prev, state, posState, startLen;
|
||||
UInt32 litPrice, matchPrice, repMatchPrice;
|
||||
BoolInt nextIsLit;
|
||||
Byte curByte, matchByte;
|
||||
const Byte *data;
|
||||
COptimal *curOpt;
|
||||
COptimal *nextOpt;
|
||||
COptimal *curOpt, *nextOpt;
|
||||
|
||||
cur++;
|
||||
if (cur == lenEnd)
|
||||
return Backward(p, backRes, cur);
|
||||
if (++cur == last)
|
||||
break;
|
||||
|
||||
// 18.06
|
||||
if (cur >= kNumOpts - 64)
|
||||
{
|
||||
unsigned j, best;
|
||||
UInt32 price = p->opt[cur].price;
|
||||
best = cur;
|
||||
for (j = cur + 1; j <= last; j++)
|
||||
{
|
||||
UInt32 price2 = p->opt[j].price;
|
||||
if (price >= price2)
|
||||
{
|
||||
price = price2;
|
||||
best = j;
|
||||
}
|
||||
}
|
||||
{
|
||||
unsigned delta = best - cur;
|
||||
if (delta != 0)
|
||||
{
|
||||
MOVE_POS(p, delta)
|
||||
}
|
||||
}
|
||||
cur = best;
|
||||
break;
|
||||
}
|
||||
|
||||
newLen = ReadMatchDistances(p, &numPairs);
|
||||
|
||||
if (newLen >= p->numFastBytes)
|
||||
{
|
||||
p->numPairs = numPairs;
|
||||
p->longestMatchLength = newLen;
|
||||
return Backward(p, backRes, cur);
|
||||
p->longestMatchLen = newLen;
|
||||
break;
|
||||
}
|
||||
position++;
|
||||
|
||||
curOpt = &p->opt[cur];
|
||||
posPrev = curOpt->posPrev;
|
||||
if (curOpt->prev1IsChar)
|
||||
{
|
||||
posPrev--;
|
||||
if (curOpt->prev2)
|
||||
{
|
||||
state = p->opt[curOpt->posPrev2].state;
|
||||
if (curOpt->backPrev2 < LZMA_NUM_REPS)
|
||||
state = kRepNextStates[state];
|
||||
else
|
||||
state = kMatchNextStates[state];
|
||||
}
|
||||
else
|
||||
state = p->opt[posPrev].state;
|
||||
state = kLiteralNextStates[state];
|
||||
}
|
||||
else
|
||||
state = p->opt[posPrev].state;
|
||||
if (posPrev == cur - 1)
|
||||
|
||||
position++;
|
||||
|
||||
// we need that check here, if skip_items in p->opt are possible
|
||||
/*
|
||||
if (curOpt->price >= kInfinityPrice)
|
||||
continue;
|
||||
*/
|
||||
|
||||
prev = cur - curOpt->len;
|
||||
|
||||
if (curOpt->len == 1)
|
||||
{
|
||||
state = (unsigned)p->opt[prev].state;
|
||||
if (IsShortRep(curOpt))
|
||||
state = kShortRepNextStates[state];
|
||||
else
|
||||
@@ -1192,355 +1510,499 @@ static UInt32 GetOptimum(CLzmaEnc *p, UInt32 position, UInt32 *backRes)
|
||||
}
|
||||
else
|
||||
{
|
||||
UInt32 pos;
|
||||
const COptimal *prevOpt;
|
||||
if (curOpt->prev1IsChar && curOpt->prev2)
|
||||
UInt32 b0;
|
||||
UInt32 dist = curOpt->dist;
|
||||
|
||||
if (curOpt->extra)
|
||||
{
|
||||
posPrev = curOpt->posPrev2;
|
||||
pos = curOpt->backPrev2;
|
||||
state = kRepNextStates[state];
|
||||
prev -= (unsigned)curOpt->extra;
|
||||
state = kState_RepAfterLit;
|
||||
if (curOpt->extra == 1)
|
||||
state = (dist < LZMA_NUM_REPS ? kState_RepAfterLit : kState_MatchAfterLit);
|
||||
}
|
||||
else
|
||||
{
|
||||
pos = curOpt->backPrev;
|
||||
if (pos < LZMA_NUM_REPS)
|
||||
state = (unsigned)p->opt[prev].state;
|
||||
if (dist < LZMA_NUM_REPS)
|
||||
state = kRepNextStates[state];
|
||||
else
|
||||
state = kMatchNextStates[state];
|
||||
}
|
||||
prevOpt = &p->opt[posPrev];
|
||||
if (pos < LZMA_NUM_REPS)
|
||||
|
||||
prevOpt = &p->opt[prev];
|
||||
b0 = prevOpt->reps[0];
|
||||
|
||||
if (dist < LZMA_NUM_REPS)
|
||||
{
|
||||
UInt32 i;
|
||||
reps[0] = prevOpt->backs[pos];
|
||||
for (i = 1; i <= pos; i++)
|
||||
reps[i] = prevOpt->backs[i - 1];
|
||||
for (; i < LZMA_NUM_REPS; i++)
|
||||
reps[i] = prevOpt->backs[i];
|
||||
if (dist == 0)
|
||||
{
|
||||
reps[0] = b0;
|
||||
reps[1] = prevOpt->reps[1];
|
||||
reps[2] = prevOpt->reps[2];
|
||||
reps[3] = prevOpt->reps[3];
|
||||
}
|
||||
else
|
||||
{
|
||||
reps[1] = b0;
|
||||
b0 = prevOpt->reps[1];
|
||||
if (dist == 1)
|
||||
{
|
||||
reps[0] = b0;
|
||||
reps[2] = prevOpt->reps[2];
|
||||
reps[3] = prevOpt->reps[3];
|
||||
}
|
||||
else
|
||||
{
|
||||
reps[2] = b0;
|
||||
reps[0] = prevOpt->reps[dist];
|
||||
reps[3] = prevOpt->reps[dist ^ 1];
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
UInt32 i;
|
||||
reps[0] = (pos - LZMA_NUM_REPS);
|
||||
for (i = 1; i < LZMA_NUM_REPS; i++)
|
||||
reps[i] = prevOpt->backs[i - 1];
|
||||
reps[0] = (dist - LZMA_NUM_REPS + 1);
|
||||
reps[1] = b0;
|
||||
reps[2] = prevOpt->reps[1];
|
||||
reps[3] = prevOpt->reps[2];
|
||||
}
|
||||
}
|
||||
|
||||
curOpt->state = (CState)state;
|
||||
curOpt->reps[0] = reps[0];
|
||||
curOpt->reps[1] = reps[1];
|
||||
curOpt->reps[2] = reps[2];
|
||||
curOpt->reps[3] = reps[3];
|
||||
|
||||
curOpt->backs[0] = reps[0];
|
||||
curOpt->backs[1] = reps[1];
|
||||
curOpt->backs[2] = reps[2];
|
||||
curOpt->backs[3] = reps[3];
|
||||
|
||||
curPrice = curOpt->price;
|
||||
nextIsChar = False;
|
||||
data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1;
|
||||
curByte = *data;
|
||||
matchByte = *(data - (reps[0] + 1));
|
||||
matchByte = *(data - reps[0]);
|
||||
|
||||
posState = (position & p->pbMask);
|
||||
|
||||
curAnd1Price = curPrice + GET_PRICE_0(p->isMatch[state][posState]);
|
||||
/*
|
||||
The order of Price checks:
|
||||
< LIT
|
||||
<= SHORT_REP
|
||||
< LIT : REP_0
|
||||
< REP [ : LIT : REP_0 ]
|
||||
< MATCH [ : LIT : REP_0 ]
|
||||
*/
|
||||
|
||||
{
|
||||
UInt32 curPrice = curOpt->price;
|
||||
unsigned prob = p->isMatch[state][posState];
|
||||
matchPrice = curPrice + GET_PRICE_1(prob);
|
||||
litPrice = curPrice + GET_PRICE_0(prob);
|
||||
}
|
||||
|
||||
nextOpt = &p->opt[(size_t)cur + 1];
|
||||
nextIsLit = False;
|
||||
|
||||
// here we can allow skip_items in p->opt, if we don't check (nextOpt->price < kInfinityPrice)
|
||||
// 18.new.06
|
||||
if ((nextOpt->price < kInfinityPrice
|
||||
// && !IsLitState(state)
|
||||
&& matchByte == curByte)
|
||||
|| litPrice > nextOpt->price
|
||||
)
|
||||
litPrice = 0;
|
||||
else
|
||||
{
|
||||
const CLzmaProb *probs = LIT_PROBS(position, *(data - 1));
|
||||
curAnd1Price +=
|
||||
(!IsCharState(state) ?
|
||||
LitEnc_GetPriceMatched(probs, curByte, matchByte, p->ProbPrices) :
|
||||
litPrice += (!IsLitState(state) ?
|
||||
LitEnc_Matched_GetPrice(probs, curByte, matchByte, p->ProbPrices) :
|
||||
LitEnc_GetPrice(probs, curByte, p->ProbPrices));
|
||||
}
|
||||
|
||||
nextOpt = &p->opt[cur + 1];
|
||||
|
||||
if (curAnd1Price < nextOpt->price)
|
||||
{
|
||||
nextOpt->price = curAnd1Price;
|
||||
nextOpt->posPrev = cur;
|
||||
MakeAsChar(nextOpt);
|
||||
nextIsChar = True;
|
||||
}
|
||||
|
||||
matchPrice = curPrice + GET_PRICE_1(p->isMatch[state][posState]);
|
||||
repMatchPrice = matchPrice + GET_PRICE_1(p->isRep[state]);
|
||||
|
||||
if (matchByte == curByte && !(nextOpt->posPrev < cur && nextOpt->backPrev == 0))
|
||||
{
|
||||
UInt32 shortRepPrice = repMatchPrice + GetRepLen1Price(p, state, posState);
|
||||
if (shortRepPrice <= nextOpt->price)
|
||||
|
||||
if (litPrice < nextOpt->price)
|
||||
{
|
||||
nextOpt->price = shortRepPrice;
|
||||
nextOpt->posPrev = cur;
|
||||
MakeAsShortRep(nextOpt);
|
||||
nextIsChar = True;
|
||||
nextOpt->price = litPrice;
|
||||
nextOpt->len = 1;
|
||||
MakeAs_Lit(nextOpt)
|
||||
nextIsLit = True;
|
||||
}
|
||||
}
|
||||
|
||||
repMatchPrice = matchPrice + GET_PRICE_1(p->isRep[state]);
|
||||
|
||||
numAvailFull = p->numAvail;
|
||||
{
|
||||
UInt32 temp = kNumOpts - 1 - cur;
|
||||
if (temp < numAvailFull)
|
||||
numAvailFull = temp;
|
||||
unsigned temp = kNumOpts - 1 - cur;
|
||||
if (numAvailFull > temp)
|
||||
numAvailFull = (UInt32)temp;
|
||||
}
|
||||
|
||||
// 18.06
|
||||
// ---------- SHORT_REP ----------
|
||||
if (IsLitState(state)) // 18.new
|
||||
if (matchByte == curByte)
|
||||
if (repMatchPrice < nextOpt->price) // 18.new
|
||||
// if (numAvailFull < 2 || data[1] != *(data - reps[0] + 1))
|
||||
if (
|
||||
// nextOpt->price >= kInfinityPrice ||
|
||||
nextOpt->len < 2 // we can check nextOpt->len, if skip items are not allowed in p->opt
|
||||
|| (nextOpt->dist != 0
|
||||
// && nextOpt->extra <= 1 // 17.old
|
||||
)
|
||||
)
|
||||
{
|
||||
UInt32 shortRepPrice = repMatchPrice + GetPrice_ShortRep(p, state, posState);
|
||||
// if (shortRepPrice <= nextOpt->price) // 17.old
|
||||
if (shortRepPrice < nextOpt->price) // 18.new
|
||||
{
|
||||
nextOpt->price = shortRepPrice;
|
||||
nextOpt->len = 1;
|
||||
MakeAs_ShortRep(nextOpt)
|
||||
nextIsLit = False;
|
||||
}
|
||||
}
|
||||
|
||||
if (numAvailFull < 2)
|
||||
continue;
|
||||
numAvail = (numAvailFull <= p->numFastBytes ? numAvailFull : p->numFastBytes);
|
||||
|
||||
if (!nextIsChar && matchByte != curByte) /* speed optimization */
|
||||
{
|
||||
/* try Literal + rep0 */
|
||||
UInt32 temp;
|
||||
UInt32 lenTest2;
|
||||
const Byte *data2 = data - (reps[0] + 1);
|
||||
UInt32 limit = p->numFastBytes + 1;
|
||||
if (limit > numAvailFull)
|
||||
limit = numAvailFull;
|
||||
// numAvail <= p->numFastBytes
|
||||
|
||||
for (temp = 1; temp < limit && data[temp] == data2[temp]; temp++);
|
||||
lenTest2 = temp - 1;
|
||||
if (lenTest2 >= 2)
|
||||
// ---------- LIT : REP_0 ----------
|
||||
|
||||
if (!nextIsLit
|
||||
&& litPrice != 0 // 18.new
|
||||
&& matchByte != curByte
|
||||
&& numAvailFull > 2)
|
||||
{
|
||||
const Byte *data2 = data - reps[0];
|
||||
if (data[1] == data2[1] && data[2] == data2[2])
|
||||
{
|
||||
UInt32 state2 = kLiteralNextStates[state];
|
||||
UInt32 posStateNext = (position + 1) & p->pbMask;
|
||||
UInt32 nextRepMatchPrice = curAnd1Price +
|
||||
GET_PRICE_1(p->isMatch[state2][posStateNext]) +
|
||||
GET_PRICE_1(p->isRep[state2]);
|
||||
/* for (; lenTest2 >= 2; lenTest2--) */
|
||||
unsigned len;
|
||||
unsigned limit = p->numFastBytes + 1;
|
||||
if (limit > numAvailFull)
|
||||
limit = numAvailFull;
|
||||
for (len = 3; len < limit && data[len] == data2[len]; len++)
|
||||
{}
|
||||
|
||||
{
|
||||
UInt32 curAndLenPrice;
|
||||
COptimal *opt;
|
||||
UInt32 offset = cur + 1 + lenTest2;
|
||||
while (lenEnd < offset)
|
||||
p->opt[++lenEnd].price = kInfinityPrice;
|
||||
curAndLenPrice = nextRepMatchPrice + GetRepPrice(p, 0, lenTest2, state2, posStateNext);
|
||||
opt = &p->opt[offset];
|
||||
if (curAndLenPrice < opt->price)
|
||||
unsigned state2 = kLiteralNextStates[state];
|
||||
unsigned posState2 = (position + 1) & p->pbMask;
|
||||
UInt32 price = litPrice + GetPrice_Rep_0(p, state2, posState2);
|
||||
{
|
||||
opt->price = curAndLenPrice;
|
||||
opt->posPrev = cur + 1;
|
||||
opt->backPrev = 0;
|
||||
opt->prev1IsChar = True;
|
||||
opt->prev2 = False;
|
||||
unsigned offset = cur + len;
|
||||
|
||||
if (last < offset)
|
||||
last = offset;
|
||||
|
||||
// do
|
||||
{
|
||||
UInt32 price2;
|
||||
COptimal *opt;
|
||||
len--;
|
||||
// price2 = price + GetPrice_Len_Rep_0(p, len, state2, posState2);
|
||||
price2 = price + GET_PRICE_LEN(&p->repLenEnc, posState2, len);
|
||||
|
||||
opt = &p->opt[offset];
|
||||
// offset--;
|
||||
if (price2 < opt->price)
|
||||
{
|
||||
opt->price = price2;
|
||||
opt->len = (UInt32)len;
|
||||
opt->dist = 0;
|
||||
opt->extra = 1;
|
||||
}
|
||||
}
|
||||
// while (len >= 3);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
startLen = 2; /* speed optimization */
|
||||
|
||||
{
|
||||
UInt32 repIndex;
|
||||
for (repIndex = 0; repIndex < LZMA_NUM_REPS; repIndex++)
|
||||
{
|
||||
UInt32 lenTest;
|
||||
UInt32 lenTestTemp;
|
||||
UInt32 price;
|
||||
const Byte *data2 = data - (reps[repIndex] + 1);
|
||||
if (data[0] != data2[0] || data[1] != data2[1])
|
||||
continue;
|
||||
for (lenTest = 2; lenTest < numAvail && data[lenTest] == data2[lenTest]; lenTest++);
|
||||
while (lenEnd < cur + lenTest)
|
||||
p->opt[++lenEnd].price = kInfinityPrice;
|
||||
lenTestTemp = lenTest;
|
||||
price = repMatchPrice + GetPureRepPrice(p, repIndex, state, posState);
|
||||
do
|
||||
// ---------- REP ----------
|
||||
unsigned repIndex = 0; // 17.old
|
||||
// unsigned repIndex = IsLitState(state) ? 0 : 1; // 18.notused
|
||||
for (; repIndex < LZMA_NUM_REPS; repIndex++)
|
||||
{
|
||||
UInt32 curAndLenPrice = price + p->repLenEnc.prices[posState][lenTest - 2];
|
||||
COptimal *opt = &p->opt[cur + lenTest];
|
||||
if (curAndLenPrice < opt->price)
|
||||
{
|
||||
opt->price = curAndLenPrice;
|
||||
opt->posPrev = cur;
|
||||
opt->backPrev = repIndex;
|
||||
opt->prev1IsChar = False;
|
||||
}
|
||||
}
|
||||
while (--lenTest >= 2);
|
||||
lenTest = lenTestTemp;
|
||||
|
||||
if (repIndex == 0)
|
||||
startLen = lenTest + 1;
|
||||
unsigned len;
|
||||
UInt32 price;
|
||||
const Byte *data2 = data - reps[repIndex];
|
||||
if (data[0] != data2[0] || data[1] != data2[1])
|
||||
continue;
|
||||
|
||||
/* if (_maxMode) */
|
||||
for (len = 2; len < numAvail && data[len] == data2[len]; len++)
|
||||
{}
|
||||
|
||||
// if (len < startLen) continue; // 18.new: speed optimization
|
||||
|
||||
{
|
||||
UInt32 lenTest2 = lenTest + 1;
|
||||
UInt32 limit = lenTest2 + p->numFastBytes;
|
||||
UInt32 nextRepMatchPrice;
|
||||
if (limit > numAvailFull)
|
||||
limit = numAvailFull;
|
||||
for (; lenTest2 < limit && data[lenTest2] == data2[lenTest2]; lenTest2++);
|
||||
lenTest2 -= lenTest + 1;
|
||||
if (lenTest2 >= 2)
|
||||
unsigned offset = cur + len;
|
||||
if (last < offset)
|
||||
last = offset;
|
||||
}
|
||||
{
|
||||
unsigned len2 = len;
|
||||
price = repMatchPrice + GetPrice_PureRep(p, repIndex, state, posState);
|
||||
do
|
||||
{
|
||||
UInt32 state2 = kRepNextStates[state];
|
||||
UInt32 posStateNext = (position + lenTest) & p->pbMask;
|
||||
UInt32 curAndLenCharPrice =
|
||||
price + p->repLenEnc.prices[posState][lenTest - 2] +
|
||||
GET_PRICE_0(p->isMatch[state2][posStateNext]) +
|
||||
LitEnc_GetPriceMatched(LIT_PROBS(position + lenTest, data[lenTest - 1]),
|
||||
data[lenTest], data2[lenTest], p->ProbPrices);
|
||||
state2 = kLiteralNextStates[state2];
|
||||
posStateNext = (position + lenTest + 1) & p->pbMask;
|
||||
nextRepMatchPrice = curAndLenCharPrice +
|
||||
GET_PRICE_1(p->isMatch[state2][posStateNext]) +
|
||||
GET_PRICE_1(p->isRep[state2]);
|
||||
|
||||
/* for (; lenTest2 >= 2; lenTest2--) */
|
||||
UInt32 price2 = price + GET_PRICE_LEN(&p->repLenEnc, posState, len2);
|
||||
COptimal *opt = &p->opt[cur + len2];
|
||||
if (price2 < opt->price)
|
||||
{
|
||||
UInt32 curAndLenPrice;
|
||||
COptimal *opt;
|
||||
UInt32 offset = cur + lenTest + 1 + lenTest2;
|
||||
while (lenEnd < offset)
|
||||
p->opt[++lenEnd].price = kInfinityPrice;
|
||||
curAndLenPrice = nextRepMatchPrice + GetRepPrice(p, 0, lenTest2, state2, posStateNext);
|
||||
opt = &p->opt[offset];
|
||||
if (curAndLenPrice < opt->price)
|
||||
{
|
||||
opt->price = curAndLenPrice;
|
||||
opt->posPrev = cur + lenTest + 1;
|
||||
opt->backPrev = 0;
|
||||
opt->prev1IsChar = True;
|
||||
opt->prev2 = True;
|
||||
opt->posPrev2 = cur;
|
||||
opt->backPrev2 = repIndex;
|
||||
}
|
||||
opt->price = price2;
|
||||
opt->len = (UInt32)len2;
|
||||
opt->dist = (UInt32)repIndex;
|
||||
opt->extra = 0;
|
||||
}
|
||||
}
|
||||
while (--len2 >= 2);
|
||||
}
|
||||
|
||||
if (repIndex == 0) startLen = len + 1; // 17.old
|
||||
// startLen = len + 1; // 18.new
|
||||
|
||||
/* if (_maxMode) */
|
||||
{
|
||||
// ---------- REP : LIT : REP_0 ----------
|
||||
// numFastBytes + 1 + numFastBytes
|
||||
|
||||
unsigned len2 = len + 1;
|
||||
unsigned limit = len2 + p->numFastBytes;
|
||||
if (limit > numAvailFull)
|
||||
limit = numAvailFull;
|
||||
|
||||
len2 += 2;
|
||||
if (len2 <= limit)
|
||||
if (data[len2 - 2] == data2[len2 - 2])
|
||||
if (data[len2 - 1] == data2[len2 - 1])
|
||||
{
|
||||
unsigned state2 = kRepNextStates[state];
|
||||
unsigned posState2 = (position + len) & p->pbMask;
|
||||
price += GET_PRICE_LEN(&p->repLenEnc, posState, len)
|
||||
+ GET_PRICE_0(p->isMatch[state2][posState2])
|
||||
+ LitEnc_Matched_GetPrice(LIT_PROBS(position + len, data[(size_t)len - 1]),
|
||||
data[len], data2[len], p->ProbPrices);
|
||||
|
||||
// state2 = kLiteralNextStates[state2];
|
||||
state2 = kState_LitAfterRep;
|
||||
posState2 = (posState2 + 1) & p->pbMask;
|
||||
|
||||
|
||||
price += GetPrice_Rep_0(p, state2, posState2);
|
||||
|
||||
for (; len2 < limit && data[len2] == data2[len2]; len2++)
|
||||
{}
|
||||
|
||||
len2 -= len;
|
||||
// if (len2 >= 3)
|
||||
{
|
||||
{
|
||||
unsigned offset = cur + len + len2;
|
||||
|
||||
if (last < offset)
|
||||
last = offset;
|
||||
// do
|
||||
{
|
||||
UInt32 price2;
|
||||
COptimal *opt;
|
||||
len2--;
|
||||
// price2 = price + GetPrice_Len_Rep_0(p, len2, state2, posState2);
|
||||
price2 = price + GET_PRICE_LEN(&p->repLenEnc, posState2, len2);
|
||||
|
||||
opt = &p->opt[offset];
|
||||
// offset--;
|
||||
if (price2 < opt->price)
|
||||
{
|
||||
opt->price = price2;
|
||||
opt->len = (UInt32)len2;
|
||||
opt->extra = (CExtra)(len + 1);
|
||||
opt->dist = (UInt32)repIndex;
|
||||
}
|
||||
}
|
||||
// while (len2 >= 3);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
/* for (UInt32 lenTest = 2; lenTest <= newLen; lenTest++) */
|
||||
|
||||
|
||||
// ---------- MATCH ----------
|
||||
/* for (unsigned len = 2; len <= newLen; len++) */
|
||||
if (newLen > numAvail)
|
||||
{
|
||||
newLen = numAvail;
|
||||
for (numPairs = 0; newLen > matches[numPairs]; numPairs += 2);
|
||||
matches[numPairs] = newLen;
|
||||
for (numPairs = 0; newLen > MATCHES[numPairs]; numPairs += 2);
|
||||
MATCHES[numPairs] = (UInt32)newLen;
|
||||
numPairs += 2;
|
||||
}
|
||||
|
||||
// startLen = 2; /* speed optimization */
|
||||
|
||||
if (newLen >= startLen)
|
||||
{
|
||||
UInt32 normalMatchPrice = matchPrice + GET_PRICE_0(p->isRep[state]);
|
||||
UInt32 offs, curBack, posSlot;
|
||||
UInt32 lenTest;
|
||||
while (lenEnd < cur + newLen)
|
||||
p->opt[++lenEnd].price = kInfinityPrice;
|
||||
UInt32 dist;
|
||||
unsigned offs, posSlot, len;
|
||||
|
||||
{
|
||||
unsigned offset = cur + newLen;
|
||||
if (last < offset)
|
||||
last = offset;
|
||||
}
|
||||
|
||||
offs = 0;
|
||||
while (startLen > matches[offs])
|
||||
while (startLen > MATCHES[offs])
|
||||
offs += 2;
|
||||
curBack = matches[offs + 1];
|
||||
GetPosSlot2(curBack, posSlot);
|
||||
for (lenTest = /*2*/ startLen; ; lenTest++)
|
||||
dist = MATCHES[(size_t)offs + 1];
|
||||
|
||||
// if (dist >= kNumFullDistances)
|
||||
GetPosSlot2(dist, posSlot)
|
||||
|
||||
for (len = /*2*/ startLen; ; len++)
|
||||
{
|
||||
UInt32 curAndLenPrice = normalMatchPrice + p->lenEnc.prices[posState][lenTest - LZMA_MATCH_LEN_MIN];
|
||||
UInt32 lenToPosState = GetLenToPosState(lenTest);
|
||||
COptimal *opt;
|
||||
if (curBack < kNumFullDistances)
|
||||
curAndLenPrice += p->distancesPrices[lenToPosState][curBack];
|
||||
else
|
||||
curAndLenPrice += p->posSlotPrices[lenToPosState][posSlot] + p->alignPrices[curBack & kAlignMask];
|
||||
|
||||
opt = &p->opt[cur + lenTest];
|
||||
if (curAndLenPrice < opt->price)
|
||||
UInt32 price = normalMatchPrice + GET_PRICE_LEN(&p->lenEnc, posState, len);
|
||||
{
|
||||
opt->price = curAndLenPrice;
|
||||
opt->posPrev = cur;
|
||||
opt->backPrev = curBack + LZMA_NUM_REPS;
|
||||
opt->prev1IsChar = False;
|
||||
COptimal *opt;
|
||||
unsigned lenNorm = len - 2;
|
||||
lenNorm = GetLenToPosState2(lenNorm);
|
||||
if (dist < kNumFullDistances)
|
||||
price += p->distancesPrices[lenNorm][dist & (kNumFullDistances - 1)];
|
||||
else
|
||||
price += p->posSlotPrices[lenNorm][posSlot] + p->alignPrices[dist & kAlignMask];
|
||||
|
||||
opt = &p->opt[cur + len];
|
||||
if (price < opt->price)
|
||||
{
|
||||
opt->price = price;
|
||||
opt->len = (UInt32)len;
|
||||
opt->dist = dist + LZMA_NUM_REPS;
|
||||
opt->extra = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (/*_maxMode && */lenTest == matches[offs])
|
||||
if (len == MATCHES[offs])
|
||||
{
|
||||
/* Try Match + Literal + Rep0 */
|
||||
const Byte *data2 = data - (curBack + 1);
|
||||
UInt32 lenTest2 = lenTest + 1;
|
||||
UInt32 limit = lenTest2 + p->numFastBytes;
|
||||
UInt32 nextRepMatchPrice;
|
||||
// if (p->_maxMode) {
|
||||
// MATCH : LIT : REP_0
|
||||
|
||||
const Byte *data2 = data - dist - 1;
|
||||
unsigned len2 = len + 1;
|
||||
unsigned limit = len2 + p->numFastBytes;
|
||||
if (limit > numAvailFull)
|
||||
limit = numAvailFull;
|
||||
for (; lenTest2 < limit && data[lenTest2] == data2[lenTest2]; lenTest2++);
|
||||
lenTest2 -= lenTest + 1;
|
||||
if (lenTest2 >= 2)
|
||||
|
||||
len2 += 2;
|
||||
if (len2 <= limit)
|
||||
if (data[len2 - 2] == data2[len2 - 2])
|
||||
if (data[len2 - 1] == data2[len2 - 1])
|
||||
{
|
||||
UInt32 state2 = kMatchNextStates[state];
|
||||
UInt32 posStateNext = (position + lenTest) & p->pbMask;
|
||||
UInt32 curAndLenCharPrice = curAndLenPrice +
|
||||
GET_PRICE_0(p->isMatch[state2][posStateNext]) +
|
||||
LitEnc_GetPriceMatched(LIT_PROBS(position + lenTest, data[lenTest - 1]),
|
||||
data[lenTest], data2[lenTest], p->ProbPrices);
|
||||
state2 = kLiteralNextStates[state2];
|
||||
posStateNext = (posStateNext + 1) & p->pbMask;
|
||||
nextRepMatchPrice = curAndLenCharPrice +
|
||||
GET_PRICE_1(p->isMatch[state2][posStateNext]) +
|
||||
GET_PRICE_1(p->isRep[state2]);
|
||||
|
||||
/* for (; lenTest2 >= 2; lenTest2--) */
|
||||
for (; len2 < limit && data[len2] == data2[len2]; len2++)
|
||||
{}
|
||||
|
||||
len2 -= len;
|
||||
|
||||
// if (len2 >= 3)
|
||||
{
|
||||
unsigned state2 = kMatchNextStates[state];
|
||||
unsigned posState2 = (position + len) & p->pbMask;
|
||||
unsigned offset;
|
||||
price += GET_PRICE_0(p->isMatch[state2][posState2]);
|
||||
price += LitEnc_Matched_GetPrice(LIT_PROBS(position + len, data[(size_t)len - 1]),
|
||||
data[len], data2[len], p->ProbPrices);
|
||||
|
||||
// state2 = kLiteralNextStates[state2];
|
||||
state2 = kState_LitAfterMatch;
|
||||
|
||||
posState2 = (posState2 + 1) & p->pbMask;
|
||||
price += GetPrice_Rep_0(p, state2, posState2);
|
||||
|
||||
offset = cur + len + len2;
|
||||
|
||||
if (last < offset)
|
||||
last = offset;
|
||||
// do
|
||||
{
|
||||
UInt32 offset = cur + lenTest + 1 + lenTest2;
|
||||
UInt32 curAndLenPrice;
|
||||
UInt32 price2;
|
||||
COptimal *opt;
|
||||
while (lenEnd < offset)
|
||||
p->opt[++lenEnd].price = kInfinityPrice;
|
||||
curAndLenPrice = nextRepMatchPrice + GetRepPrice(p, 0, lenTest2, state2, posStateNext);
|
||||
len2--;
|
||||
// price2 = price + GetPrice_Len_Rep_0(p, len2, state2, posState2);
|
||||
price2 = price + GET_PRICE_LEN(&p->repLenEnc, posState2, len2);
|
||||
opt = &p->opt[offset];
|
||||
if (curAndLenPrice < opt->price)
|
||||
// offset--;
|
||||
if (price2 < opt->price)
|
||||
{
|
||||
opt->price = curAndLenPrice;
|
||||
opt->posPrev = cur + lenTest + 1;
|
||||
opt->backPrev = 0;
|
||||
opt->prev1IsChar = True;
|
||||
opt->prev2 = True;
|
||||
opt->posPrev2 = cur;
|
||||
opt->backPrev2 = curBack + LZMA_NUM_REPS;
|
||||
opt->price = price2;
|
||||
opt->len = (UInt32)len2;
|
||||
opt->extra = (CExtra)(len + 1);
|
||||
opt->dist = dist + LZMA_NUM_REPS;
|
||||
}
|
||||
}
|
||||
// while (len2 >= 3);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
offs += 2;
|
||||
if (offs == numPairs)
|
||||
break;
|
||||
curBack = matches[offs + 1];
|
||||
if (curBack >= kNumFullDistances)
|
||||
GetPosSlot2(curBack, posSlot);
|
||||
dist = MATCHES[(size_t)offs + 1];
|
||||
// if (dist >= kNumFullDistances)
|
||||
GetPosSlot2(dist, posSlot)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
do
|
||||
p->opt[last].price = kInfinityPrice;
|
||||
while (--last);
|
||||
|
||||
return Backward(p, cur);
|
||||
}
|
||||
|
||||
|
||||
|
||||
#define ChangePair(smallDist, bigDist) (((bigDist) >> 7) > (smallDist))
|
||||
|
||||
static UInt32 GetOptimumFast(CLzmaEnc *p, UInt32 *backRes)
|
||||
|
||||
|
||||
static unsigned GetOptimumFast(CLzmaEnc *p)
|
||||
{
|
||||
UInt32 numAvail, mainLen, mainDist, numPairs, repIndex, repLen, i;
|
||||
UInt32 numAvail, mainDist;
|
||||
unsigned mainLen, numPairs, repIndex, repLen, i;
|
||||
const Byte *data;
|
||||
const UInt32 *matches;
|
||||
|
||||
if (p->additionalOffset == 0)
|
||||
mainLen = ReadMatchDistances(p, &numPairs);
|
||||
else
|
||||
{
|
||||
mainLen = p->longestMatchLength;
|
||||
mainLen = p->longestMatchLen;
|
||||
numPairs = p->numPairs;
|
||||
}
|
||||
|
||||
numAvail = p->numAvail;
|
||||
*backRes = (UInt32)-1;
|
||||
p->backRes = MARK_LIT;
|
||||
if (numAvail < 2)
|
||||
return 1;
|
||||
// if (mainLen < 2 && p->state == 0) return 1; // 18.06.notused
|
||||
if (numAvail > LZMA_MATCH_LEN_MAX)
|
||||
numAvail = LZMA_MATCH_LEN_MAX;
|
||||
data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1;
|
||||
|
||||
repLen = repIndex = 0;
|
||||
|
||||
for (i = 0; i < LZMA_NUM_REPS; i++)
|
||||
{
|
||||
UInt32 len;
|
||||
const Byte *data2 = data - (p->reps[i] + 1);
|
||||
unsigned len;
|
||||
const Byte *data2 = data - p->reps[i];
|
||||
if (data[0] != data2[0] || data[1] != data2[1])
|
||||
continue;
|
||||
for (len = 2; len < numAvail && data[len] == data2[len]; len++);
|
||||
for (len = 2; len < numAvail && data[len] == data2[len]; len++)
|
||||
{}
|
||||
if (len >= p->numFastBytes)
|
||||
{
|
||||
*backRes = i;
|
||||
MovePos(p, len - 1);
|
||||
p->backRes = (UInt32)i;
|
||||
MOVE_POS(p, len - 1)
|
||||
return len;
|
||||
}
|
||||
if (len > repLen)
|
||||
@@ -1550,98 +2012,182 @@ static UInt32 GetOptimumFast(CLzmaEnc *p, UInt32 *backRes)
|
||||
}
|
||||
}
|
||||
|
||||
matches = p->matches;
|
||||
if (mainLen >= p->numFastBytes)
|
||||
{
|
||||
*backRes = matches[numPairs - 1] + LZMA_NUM_REPS;
|
||||
MovePos(p, mainLen - 1);
|
||||
p->backRes = p->matches[(size_t)numPairs - 1] + LZMA_NUM_REPS;
|
||||
MOVE_POS(p, mainLen - 1)
|
||||
return mainLen;
|
||||
}
|
||||
|
||||
mainDist = 0; /* for GCC */
|
||||
|
||||
if (mainLen >= 2)
|
||||
{
|
||||
mainDist = matches[numPairs - 1];
|
||||
while (numPairs > 2 && mainLen == matches[numPairs - 4] + 1)
|
||||
mainDist = p->matches[(size_t)numPairs - 1];
|
||||
while (numPairs > 2)
|
||||
{
|
||||
if (!ChangePair(matches[numPairs - 3], mainDist))
|
||||
UInt32 dist2;
|
||||
if (mainLen != p->matches[(size_t)numPairs - 4] + 1)
|
||||
break;
|
||||
dist2 = p->matches[(size_t)numPairs - 3];
|
||||
if (!ChangePair(dist2, mainDist))
|
||||
break;
|
||||
numPairs -= 2;
|
||||
mainLen = matches[numPairs - 2];
|
||||
mainDist = matches[numPairs - 1];
|
||||
mainLen--;
|
||||
mainDist = dist2;
|
||||
}
|
||||
if (mainLen == 2 && mainDist >= 0x80)
|
||||
mainLen = 1;
|
||||
}
|
||||
|
||||
if (repLen >= 2 && (
|
||||
(repLen + 1 >= mainLen) ||
|
||||
(repLen + 2 >= mainLen && mainDist >= (1 << 9)) ||
|
||||
(repLen + 3 >= mainLen && mainDist >= (1 << 15))))
|
||||
if (repLen >= 2)
|
||||
if ( repLen + 1 >= mainLen
|
||||
|| (repLen + 2 >= mainLen && mainDist >= (1 << 9))
|
||||
|| (repLen + 3 >= mainLen && mainDist >= (1 << 15)))
|
||||
{
|
||||
*backRes = repIndex;
|
||||
MovePos(p, repLen - 1);
|
||||
p->backRes = (UInt32)repIndex;
|
||||
MOVE_POS(p, repLen - 1)
|
||||
return repLen;
|
||||
}
|
||||
|
||||
if (mainLen < 2 || numAvail <= 2)
|
||||
return 1;
|
||||
|
||||
p->longestMatchLength = ReadMatchDistances(p, &p->numPairs);
|
||||
if (p->longestMatchLength >= 2)
|
||||
{
|
||||
UInt32 newDistance = matches[p->numPairs - 1];
|
||||
if ((p->longestMatchLength >= mainLen && newDistance < mainDist) ||
|
||||
(p->longestMatchLength == mainLen + 1 && !ChangePair(mainDist, newDistance)) ||
|
||||
(p->longestMatchLength > mainLen + 1) ||
|
||||
(p->longestMatchLength + 1 >= mainLen && mainLen >= 3 && ChangePair(newDistance, mainDist)))
|
||||
return 1;
|
||||
unsigned len1 = ReadMatchDistances(p, &p->numPairs);
|
||||
p->longestMatchLen = len1;
|
||||
|
||||
if (len1 >= 2)
|
||||
{
|
||||
UInt32 newDist = p->matches[(size_t)p->numPairs - 1];
|
||||
if ( (len1 >= mainLen && newDist < mainDist)
|
||||
|| (len1 == mainLen + 1 && !ChangePair(mainDist, newDist))
|
||||
|| (len1 > mainLen + 1)
|
||||
|| (len1 + 1 >= mainLen && mainLen >= 3 && ChangePair(newDist, mainDist)))
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1;
|
||||
|
||||
for (i = 0; i < LZMA_NUM_REPS; i++)
|
||||
{
|
||||
UInt32 len, limit;
|
||||
const Byte *data2 = data - (p->reps[i] + 1);
|
||||
unsigned len, limit;
|
||||
const Byte *data2 = data - p->reps[i];
|
||||
if (data[0] != data2[0] || data[1] != data2[1])
|
||||
continue;
|
||||
limit = mainLen - 1;
|
||||
for (len = 2; len < limit && data[len] == data2[len]; len++);
|
||||
if (len >= limit)
|
||||
return 1;
|
||||
for (len = 2;; len++)
|
||||
{
|
||||
if (len >= limit)
|
||||
return 1;
|
||||
if (data[len] != data2[len])
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
p->backRes = mainDist + LZMA_NUM_REPS;
|
||||
if (mainLen != 2)
|
||||
{
|
||||
MOVE_POS(p, mainLen - 2)
|
||||
}
|
||||
*backRes = mainDist + LZMA_NUM_REPS;
|
||||
MovePos(p, mainLen - 2);
|
||||
return mainLen;
|
||||
}
|
||||
|
||||
static void WriteEndMarker(CLzmaEnc *p, UInt32 posState)
|
||||
|
||||
|
||||
|
||||
static void WriteEndMarker(CLzmaEnc *p, unsigned posState)
|
||||
{
|
||||
UInt32 len;
|
||||
RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][posState], 1);
|
||||
RangeEnc_EncodeBit(&p->rc, &p->isRep[p->state], 0);
|
||||
UInt32 range;
|
||||
range = p->rc.range;
|
||||
{
|
||||
UInt32 ttt, newBound;
|
||||
CLzmaProb *prob = &p->isMatch[p->state][posState];
|
||||
RC_BIT_PRE(&p->rc, prob)
|
||||
RC_BIT_1(&p->rc, prob)
|
||||
prob = &p->isRep[p->state];
|
||||
RC_BIT_PRE(&p->rc, prob)
|
||||
RC_BIT_0(&p->rc, prob)
|
||||
}
|
||||
p->state = kMatchNextStates[p->state];
|
||||
len = LZMA_MATCH_LEN_MIN;
|
||||
LenEnc_Encode2(&p->lenEnc, &p->rc, len - LZMA_MATCH_LEN_MIN, posState, !p->fastMode, p->ProbPrices);
|
||||
RcTree_Encode(&p->rc, p->posSlotEncoder[GetLenToPosState(len)], kNumPosSlotBits, (1 << kNumPosSlotBits) - 1);
|
||||
RangeEnc_EncodeDirectBits(&p->rc, (((UInt32)1 << 30) - 1) >> kNumAlignBits, 30 - kNumAlignBits);
|
||||
RcTree_ReverseEncode(&p->rc, p->posAlignEncoder, kNumAlignBits, kAlignMask);
|
||||
|
||||
p->rc.range = range;
|
||||
LenEnc_Encode(&p->lenProbs, &p->rc, 0, posState);
|
||||
range = p->rc.range;
|
||||
|
||||
{
|
||||
// RcTree_Encode_PosSlot(&p->rc, p->posSlotEncoder[0], (1 << kNumPosSlotBits) - 1);
|
||||
CLzmaProb *probs = p->posSlotEncoder[0];
|
||||
unsigned m = 1;
|
||||
do
|
||||
{
|
||||
UInt32 ttt, newBound;
|
||||
RC_BIT_PRE(p, probs + m)
|
||||
RC_BIT_1(&p->rc, probs + m)
|
||||
m = (m << 1) + 1;
|
||||
}
|
||||
while (m < (1 << kNumPosSlotBits));
|
||||
}
|
||||
{
|
||||
// RangeEnc_EncodeDirectBits(&p->rc, ((UInt32)1 << (30 - kNumAlignBits)) - 1, 30 - kNumAlignBits); UInt32 range = p->range;
|
||||
unsigned numBits = 30 - kNumAlignBits;
|
||||
do
|
||||
{
|
||||
range >>= 1;
|
||||
p->rc.low += range;
|
||||
RC_NORM(&p->rc)
|
||||
}
|
||||
while (--numBits);
|
||||
}
|
||||
|
||||
{
|
||||
// RcTree_ReverseEncode(&p->rc, p->posAlignEncoder, kNumAlignBits, kAlignMask);
|
||||
CLzmaProb *probs = p->posAlignEncoder;
|
||||
unsigned m = 1;
|
||||
do
|
||||
{
|
||||
UInt32 ttt, newBound;
|
||||
RC_BIT_PRE(p, probs + m)
|
||||
RC_BIT_1(&p->rc, probs + m)
|
||||
m = (m << 1) + 1;
|
||||
}
|
||||
while (m < kAlignTableSize);
|
||||
}
|
||||
p->rc.range = range;
|
||||
}
|
||||
|
||||
|
||||
static SRes CheckErrors(CLzmaEnc *p)
|
||||
{
|
||||
if (p->result != SZ_OK)
|
||||
return p->result;
|
||||
if (p->rc.res != SZ_OK)
|
||||
p->result = SZ_ERROR_WRITE;
|
||||
if (p->matchFinderBase.result != SZ_OK)
|
||||
|
||||
#ifndef Z7_ST
|
||||
if (
|
||||
// p->mf_Failure ||
|
||||
(p->mtMode &&
|
||||
( // p->matchFinderMt.failure_LZ_LZ ||
|
||||
p->matchFinderMt.failure_LZ_BT))
|
||||
)
|
||||
{
|
||||
p->result = MY_HRES_ERROR_INTERNAL_ERROR;
|
||||
// printf("\nCheckErrors p->matchFinderMt.failureLZ\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
if (MFB.result != SZ_OK)
|
||||
p->result = SZ_ERROR_READ;
|
||||
|
||||
if (p->result != SZ_OK)
|
||||
p->finished = True;
|
||||
return p->result;
|
||||
}
|
||||
|
||||
static SRes Flush(CLzmaEnc *p, UInt32 nowPos)
|
||||
|
||||
Z7_NO_INLINE static SRes Flush(CLzmaEnc *p, UInt32 nowPos)
|
||||
{
|
||||
/* ReleaseMFStream(); */
|
||||
p->finished = True;
|
||||
@@ -1652,61 +2198,140 @@ static SRes Flush(CLzmaEnc *p, UInt32 nowPos)
|
||||
return CheckErrors(p);
|
||||
}
|
||||
|
||||
static void FillAlignPrices(CLzmaEnc *p)
|
||||
|
||||
Z7_NO_INLINE static void FillAlignPrices(CLzmaEnc *p)
|
||||
{
|
||||
UInt32 i;
|
||||
for (i = 0; i < kAlignTableSize; i++)
|
||||
p->alignPrices[i] = RcTree_ReverseGetPrice(p->posAlignEncoder, kNumAlignBits, i, p->ProbPrices);
|
||||
p->alignPriceCount = 0;
|
||||
unsigned i;
|
||||
const CProbPrice *ProbPrices = p->ProbPrices;
|
||||
const CLzmaProb *probs = p->posAlignEncoder;
|
||||
// p->alignPriceCount = 0;
|
||||
for (i = 0; i < kAlignTableSize / 2; i++)
|
||||
{
|
||||
UInt32 price = 0;
|
||||
unsigned sym = i;
|
||||
unsigned m = 1;
|
||||
unsigned bit;
|
||||
UInt32 prob;
|
||||
bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[m], bit); m = (m << 1) + bit;
|
||||
bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[m], bit); m = (m << 1) + bit;
|
||||
bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[m], bit); m = (m << 1) + bit;
|
||||
prob = probs[m];
|
||||
p->alignPrices[i ] = price + GET_PRICEa_0(prob);
|
||||
p->alignPrices[i + 8] = price + GET_PRICEa_1(prob);
|
||||
// p->alignPrices[i] = RcTree_ReverseGetPrice(p->posAlignEncoder, kNumAlignBits, i, p->ProbPrices);
|
||||
}
|
||||
}
|
||||
|
||||
static void FillDistancesPrices(CLzmaEnc *p)
|
||||
{
|
||||
UInt32 tempPrices[kNumFullDistances];
|
||||
UInt32 i, lenToPosState;
|
||||
for (i = kStartPosModelIndex; i < kNumFullDistances; i++)
|
||||
{
|
||||
UInt32 posSlot = GetPosSlot1(i);
|
||||
UInt32 footerBits = ((posSlot >> 1) - 1);
|
||||
UInt32 base = ((2 | (posSlot & 1)) << footerBits);
|
||||
tempPrices[i] = RcTree_ReverseGetPrice(p->posEncoders + base - posSlot - 1, footerBits, i - base, p->ProbPrices);
|
||||
}
|
||||
|
||||
for (lenToPosState = 0; lenToPosState < kNumLenToPosStates; lenToPosState++)
|
||||
Z7_NO_INLINE static void FillDistancesPrices(CLzmaEnc *p)
|
||||
{
|
||||
// int y; for (y = 0; y < 100; y++) {
|
||||
|
||||
UInt32 tempPrices[kNumFullDistances];
|
||||
unsigned i, lps;
|
||||
|
||||
const CProbPrice *ProbPrices = p->ProbPrices;
|
||||
p->matchPriceCount = 0;
|
||||
|
||||
for (i = kStartPosModelIndex / 2; i < kNumFullDistances / 2; i++)
|
||||
{
|
||||
UInt32 posSlot;
|
||||
const CLzmaProb *encoder = p->posSlotEncoder[lenToPosState];
|
||||
UInt32 *posSlotPrices = p->posSlotPrices[lenToPosState];
|
||||
for (posSlot = 0; posSlot < p->distTableSize; posSlot++)
|
||||
posSlotPrices[posSlot] = RcTree_GetPrice(encoder, kNumPosSlotBits, posSlot, p->ProbPrices);
|
||||
for (posSlot = kEndPosModelIndex; posSlot < p->distTableSize; posSlot++)
|
||||
posSlotPrices[posSlot] += ((((posSlot >> 1) - 1) - kNumAlignBits) << kNumBitPriceShiftBits);
|
||||
unsigned posSlot = GetPosSlot1(i);
|
||||
unsigned footerBits = (posSlot >> 1) - 1;
|
||||
unsigned base = ((2 | (posSlot & 1)) << footerBits);
|
||||
const CLzmaProb *probs = p->posEncoders + (size_t)base * 2;
|
||||
// tempPrices[i] = RcTree_ReverseGetPrice(p->posEncoders + base, footerBits, i - base, p->ProbPrices);
|
||||
UInt32 price = 0;
|
||||
unsigned m = 1;
|
||||
unsigned sym = i;
|
||||
unsigned offset = (unsigned)1 << footerBits;
|
||||
base += i;
|
||||
|
||||
if (footerBits)
|
||||
do
|
||||
{
|
||||
unsigned bit = sym & 1;
|
||||
sym >>= 1;
|
||||
price += GET_PRICEa(probs[m], bit);
|
||||
m = (m << 1) + bit;
|
||||
}
|
||||
while (--footerBits);
|
||||
|
||||
{
|
||||
UInt32 *distancesPrices = p->distancesPrices[lenToPosState];
|
||||
UInt32 i;
|
||||
for (i = 0; i < kStartPosModelIndex; i++)
|
||||
distancesPrices[i] = posSlotPrices[i];
|
||||
for (; i < kNumFullDistances; i++)
|
||||
distancesPrices[i] = posSlotPrices[GetPosSlot1(i)] + tempPrices[i];
|
||||
unsigned prob = probs[m];
|
||||
tempPrices[base ] = price + GET_PRICEa_0(prob);
|
||||
tempPrices[base + offset] = price + GET_PRICEa_1(prob);
|
||||
}
|
||||
}
|
||||
p->matchPriceCount = 0;
|
||||
|
||||
for (lps = 0; lps < kNumLenToPosStates; lps++)
|
||||
{
|
||||
unsigned slot;
|
||||
unsigned distTableSize2 = (p->distTableSize + 1) >> 1;
|
||||
UInt32 *posSlotPrices = p->posSlotPrices[lps];
|
||||
const CLzmaProb *probs = p->posSlotEncoder[lps];
|
||||
|
||||
for (slot = 0; slot < distTableSize2; slot++)
|
||||
{
|
||||
// posSlotPrices[slot] = RcTree_GetPrice(encoder, kNumPosSlotBits, slot, p->ProbPrices);
|
||||
UInt32 price;
|
||||
unsigned bit;
|
||||
unsigned sym = slot + (1 << (kNumPosSlotBits - 1));
|
||||
unsigned prob;
|
||||
bit = sym & 1; sym >>= 1; price = GET_PRICEa(probs[sym], bit);
|
||||
bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[sym], bit);
|
||||
bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[sym], bit);
|
||||
bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[sym], bit);
|
||||
bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[sym], bit);
|
||||
prob = probs[(size_t)slot + (1 << (kNumPosSlotBits - 1))];
|
||||
posSlotPrices[(size_t)slot * 2 ] = price + GET_PRICEa_0(prob);
|
||||
posSlotPrices[(size_t)slot * 2 + 1] = price + GET_PRICEa_1(prob);
|
||||
}
|
||||
|
||||
{
|
||||
UInt32 delta = ((UInt32)((kEndPosModelIndex / 2 - 1) - kNumAlignBits) << kNumBitPriceShiftBits);
|
||||
for (slot = kEndPosModelIndex / 2; slot < distTableSize2; slot++)
|
||||
{
|
||||
posSlotPrices[(size_t)slot * 2 ] += delta;
|
||||
posSlotPrices[(size_t)slot * 2 + 1] += delta;
|
||||
delta += ((UInt32)1 << kNumBitPriceShiftBits);
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
UInt32 *dp = p->distancesPrices[lps];
|
||||
|
||||
dp[0] = posSlotPrices[0];
|
||||
dp[1] = posSlotPrices[1];
|
||||
dp[2] = posSlotPrices[2];
|
||||
dp[3] = posSlotPrices[3];
|
||||
|
||||
for (i = 4; i < kNumFullDistances; i += 2)
|
||||
{
|
||||
UInt32 slotPrice = posSlotPrices[GetPosSlot1(i)];
|
||||
dp[i ] = slotPrice + tempPrices[i];
|
||||
dp[i + 1] = slotPrice + tempPrices[i + 1];
|
||||
}
|
||||
}
|
||||
}
|
||||
// }
|
||||
}
|
||||
|
||||
void LzmaEnc_Construct(CLzmaEnc *p)
|
||||
|
||||
|
||||
static void LzmaEnc_Construct(CLzmaEnc *p)
|
||||
{
|
||||
RangeEnc_Construct(&p->rc);
|
||||
MatchFinder_Construct(&p->matchFinderBase);
|
||||
#ifdef COMPRESS_MF_MT
|
||||
MatchFinder_Construct(&MFB);
|
||||
|
||||
#ifndef Z7_ST
|
||||
p->matchFinderMt.MatchFinder = &MFB;
|
||||
MatchFinderMt_Construct(&p->matchFinderMt);
|
||||
p->matchFinderMt.MatchFinder = &p->matchFinderBase;
|
||||
#endif
|
||||
|
||||
{
|
||||
CLzmaEncProps props;
|
||||
LzmaEncProps_Init(&props);
|
||||
LzmaEnc_SetProps(p, &props);
|
||||
LzmaEnc_SetProps((CLzmaEncHandle)(void *)p, &props);
|
||||
}
|
||||
|
||||
#ifndef LZMA_LOG_BSR
|
||||
@@ -1714,226 +2339,370 @@ void LzmaEnc_Construct(CLzmaEnc *p)
|
||||
#endif
|
||||
|
||||
LzmaEnc_InitPriceTables(p->ProbPrices);
|
||||
p->litProbs = 0;
|
||||
p->saveState.litProbs = 0;
|
||||
p->litProbs = NULL;
|
||||
p->saveState.litProbs = NULL;
|
||||
}
|
||||
|
||||
CLzmaEncHandle LzmaEnc_Create(ISzAlloc *alloc)
|
||||
CLzmaEncHandle LzmaEnc_Create(ISzAllocPtr alloc)
|
||||
{
|
||||
void *p;
|
||||
p = alloc->Alloc(alloc, sizeof(CLzmaEnc));
|
||||
if (p != 0)
|
||||
p = ISzAlloc_Alloc(alloc, sizeof(CLzmaEnc));
|
||||
if (p)
|
||||
LzmaEnc_Construct((CLzmaEnc *)p);
|
||||
return p;
|
||||
}
|
||||
|
||||
void LzmaEnc_FreeLits(CLzmaEnc *p, ISzAlloc *alloc)
|
||||
static void LzmaEnc_FreeLits(CLzmaEnc *p, ISzAllocPtr alloc)
|
||||
{
|
||||
alloc->Free(alloc, p->litProbs);
|
||||
alloc->Free(alloc, p->saveState.litProbs);
|
||||
p->litProbs = 0;
|
||||
p->saveState.litProbs = 0;
|
||||
ISzAlloc_Free(alloc, p->litProbs);
|
||||
ISzAlloc_Free(alloc, p->saveState.litProbs);
|
||||
p->litProbs = NULL;
|
||||
p->saveState.litProbs = NULL;
|
||||
}
|
||||
|
||||
void LzmaEnc_Destruct(CLzmaEnc *p, ISzAlloc *alloc, ISzAlloc *allocBig)
|
||||
static void LzmaEnc_Destruct(CLzmaEnc *p, ISzAllocPtr alloc, ISzAllocPtr allocBig)
|
||||
{
|
||||
#ifdef COMPRESS_MF_MT
|
||||
#ifndef Z7_ST
|
||||
MatchFinderMt_Destruct(&p->matchFinderMt, allocBig);
|
||||
#endif
|
||||
MatchFinder_Free(&p->matchFinderBase, allocBig);
|
||||
|
||||
MatchFinder_Free(&MFB, allocBig);
|
||||
LzmaEnc_FreeLits(p, alloc);
|
||||
RangeEnc_Free(&p->rc, alloc);
|
||||
}
|
||||
|
||||
void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAlloc *alloc, ISzAlloc *allocBig)
|
||||
void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAllocPtr alloc, ISzAllocPtr allocBig)
|
||||
{
|
||||
LzmaEnc_Destruct((CLzmaEnc *)p, alloc, allocBig);
|
||||
alloc->Free(alloc, p);
|
||||
// GET_CLzmaEnc_p
|
||||
LzmaEnc_Destruct(p, alloc, allocBig);
|
||||
ISzAlloc_Free(alloc, p);
|
||||
}
|
||||
|
||||
static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, Bool useLimits, UInt32 maxPackSize, UInt32 maxUnpackSize)
|
||||
|
||||
Z7_NO_INLINE
|
||||
static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpackSize)
|
||||
{
|
||||
UInt32 nowPos32, startPos32;
|
||||
if (p->inStream != 0)
|
||||
if (p->needInit)
|
||||
{
|
||||
p->matchFinderBase.stream = p->inStream;
|
||||
#ifndef Z7_ST
|
||||
if (p->mtMode)
|
||||
{
|
||||
RINOK(MatchFinderMt_InitMt(&p->matchFinderMt))
|
||||
}
|
||||
#endif
|
||||
p->matchFinder.Init(p->matchFinderObj);
|
||||
p->inStream = 0;
|
||||
p->needInit = 0;
|
||||
}
|
||||
|
||||
if (p->finished)
|
||||
return p->result;
|
||||
RINOK(CheckErrors(p));
|
||||
RINOK(CheckErrors(p))
|
||||
|
||||
nowPos32 = (UInt32)p->nowPos64;
|
||||
startPos32 = nowPos32;
|
||||
|
||||
if (p->nowPos64 == 0)
|
||||
{
|
||||
UInt32 numPairs;
|
||||
unsigned numPairs;
|
||||
Byte curByte;
|
||||
if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) == 0)
|
||||
return Flush(p, nowPos32);
|
||||
ReadMatchDistances(p, &numPairs);
|
||||
RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][0], 0);
|
||||
p->state = kLiteralNextStates[p->state];
|
||||
curByte = p->matchFinder.GetIndexByte(p->matchFinderObj, 0 - p->additionalOffset);
|
||||
RangeEnc_EncodeBit_0(&p->rc, &p->isMatch[kState_Start][0]);
|
||||
// p->state = kLiteralNextStates[p->state];
|
||||
curByte = *(p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset);
|
||||
LitEnc_Encode(&p->rc, p->litProbs, curByte);
|
||||
p->additionalOffset--;
|
||||
nowPos32++;
|
||||
}
|
||||
|
||||
if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) != 0)
|
||||
|
||||
for (;;)
|
||||
{
|
||||
UInt32 pos, len, posState;
|
||||
|
||||
UInt32 dist;
|
||||
unsigned len, posState;
|
||||
UInt32 range, ttt, newBound;
|
||||
CLzmaProb *probs;
|
||||
|
||||
if (p->fastMode)
|
||||
len = GetOptimumFast(p, &pos);
|
||||
len = GetOptimumFast(p);
|
||||
else
|
||||
len = GetOptimum(p, nowPos32, &pos);
|
||||
{
|
||||
unsigned oci = p->optCur;
|
||||
if (p->optEnd == oci)
|
||||
len = GetOptimum(p, nowPos32);
|
||||
else
|
||||
{
|
||||
const COptimal *opt = &p->opt[oci];
|
||||
len = opt->len;
|
||||
p->backRes = opt->dist;
|
||||
p->optCur = oci + 1;
|
||||
}
|
||||
}
|
||||
|
||||
posState = (unsigned)nowPos32 & p->pbMask;
|
||||
range = p->rc.range;
|
||||
probs = &p->isMatch[p->state][posState];
|
||||
|
||||
RC_BIT_PRE(&p->rc, probs)
|
||||
|
||||
dist = p->backRes;
|
||||
|
||||
#ifdef SHOW_STAT2
|
||||
printf("\n pos = %4X, len = %d pos = %d", nowPos32, len, pos);
|
||||
printf("\n pos = %6X, len = %3u pos = %6u", nowPos32, len, dist);
|
||||
#endif
|
||||
|
||||
posState = nowPos32 & p->pbMask;
|
||||
if (len == 1 && pos == (UInt32)-1)
|
||||
if (dist == MARK_LIT)
|
||||
{
|
||||
Byte curByte;
|
||||
CLzmaProb *probs;
|
||||
const Byte *data;
|
||||
unsigned state;
|
||||
|
||||
RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][posState], 0);
|
||||
RC_BIT_0(&p->rc, probs)
|
||||
p->rc.range = range;
|
||||
data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset;
|
||||
curByte = *data;
|
||||
probs = LIT_PROBS(nowPos32, *(data - 1));
|
||||
if (IsCharState(p->state))
|
||||
curByte = *data;
|
||||
state = p->state;
|
||||
p->state = kLiteralNextStates[state];
|
||||
if (IsLitState(state))
|
||||
LitEnc_Encode(&p->rc, probs, curByte);
|
||||
else
|
||||
LitEnc_EncodeMatched(&p->rc, probs, curByte, *(data - p->reps[0] - 1));
|
||||
p->state = kLiteralNextStates[p->state];
|
||||
LitEnc_EncodeMatched(&p->rc, probs, curByte, *(data - p->reps[0]));
|
||||
}
|
||||
else
|
||||
{
|
||||
RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][posState], 1);
|
||||
if (pos < LZMA_NUM_REPS)
|
||||
RC_BIT_1(&p->rc, probs)
|
||||
probs = &p->isRep[p->state];
|
||||
RC_BIT_PRE(&p->rc, probs)
|
||||
|
||||
if (dist < LZMA_NUM_REPS)
|
||||
{
|
||||
RangeEnc_EncodeBit(&p->rc, &p->isRep[p->state], 1);
|
||||
if (pos == 0)
|
||||
RC_BIT_1(&p->rc, probs)
|
||||
probs = &p->isRepG0[p->state];
|
||||
RC_BIT_PRE(&p->rc, probs)
|
||||
if (dist == 0)
|
||||
{
|
||||
RangeEnc_EncodeBit(&p->rc, &p->isRepG0[p->state], 0);
|
||||
RangeEnc_EncodeBit(&p->rc, &p->isRep0Long[p->state][posState], ((len == 1) ? 0 : 1));
|
||||
RC_BIT_0(&p->rc, probs)
|
||||
probs = &p->isRep0Long[p->state][posState];
|
||||
RC_BIT_PRE(&p->rc, probs)
|
||||
if (len != 1)
|
||||
{
|
||||
RC_BIT_1_BASE(&p->rc, probs)
|
||||
}
|
||||
else
|
||||
{
|
||||
RC_BIT_0_BASE(&p->rc, probs)
|
||||
p->state = kShortRepNextStates[p->state];
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
UInt32 distance = p->reps[pos];
|
||||
RangeEnc_EncodeBit(&p->rc, &p->isRepG0[p->state], 1);
|
||||
if (pos == 1)
|
||||
RangeEnc_EncodeBit(&p->rc, &p->isRepG1[p->state], 0);
|
||||
RC_BIT_1(&p->rc, probs)
|
||||
probs = &p->isRepG1[p->state];
|
||||
RC_BIT_PRE(&p->rc, probs)
|
||||
if (dist == 1)
|
||||
{
|
||||
RC_BIT_0_BASE(&p->rc, probs)
|
||||
dist = p->reps[1];
|
||||
}
|
||||
else
|
||||
{
|
||||
RangeEnc_EncodeBit(&p->rc, &p->isRepG1[p->state], 1);
|
||||
RangeEnc_EncodeBit(&p->rc, &p->isRepG2[p->state], pos - 2);
|
||||
if (pos == 3)
|
||||
RC_BIT_1(&p->rc, probs)
|
||||
probs = &p->isRepG2[p->state];
|
||||
RC_BIT_PRE(&p->rc, probs)
|
||||
if (dist == 2)
|
||||
{
|
||||
RC_BIT_0_BASE(&p->rc, probs)
|
||||
dist = p->reps[2];
|
||||
}
|
||||
else
|
||||
{
|
||||
RC_BIT_1_BASE(&p->rc, probs)
|
||||
dist = p->reps[3];
|
||||
p->reps[3] = p->reps[2];
|
||||
}
|
||||
p->reps[2] = p->reps[1];
|
||||
}
|
||||
p->reps[1] = p->reps[0];
|
||||
p->reps[0] = distance;
|
||||
p->reps[0] = dist;
|
||||
}
|
||||
if (len == 1)
|
||||
p->state = kShortRepNextStates[p->state];
|
||||
else
|
||||
|
||||
RC_NORM(&p->rc)
|
||||
|
||||
p->rc.range = range;
|
||||
|
||||
if (len != 1)
|
||||
{
|
||||
LenEnc_Encode2(&p->repLenEnc, &p->rc, len - LZMA_MATCH_LEN_MIN, posState, !p->fastMode, p->ProbPrices);
|
||||
LenEnc_Encode(&p->repLenProbs, &p->rc, len - LZMA_MATCH_LEN_MIN, posState);
|
||||
--p->repLenEncCounter;
|
||||
p->state = kRepNextStates[p->state];
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
UInt32 posSlot;
|
||||
RangeEnc_EncodeBit(&p->rc, &p->isRep[p->state], 0);
|
||||
unsigned posSlot;
|
||||
RC_BIT_0(&p->rc, probs)
|
||||
p->rc.range = range;
|
||||
p->state = kMatchNextStates[p->state];
|
||||
LenEnc_Encode2(&p->lenEnc, &p->rc, len - LZMA_MATCH_LEN_MIN, posState, !p->fastMode, p->ProbPrices);
|
||||
pos -= LZMA_NUM_REPS;
|
||||
GetPosSlot(pos, posSlot);
|
||||
RcTree_Encode(&p->rc, p->posSlotEncoder[GetLenToPosState(len)], kNumPosSlotBits, posSlot);
|
||||
|
||||
if (posSlot >= kStartPosModelIndex)
|
||||
{
|
||||
UInt32 footerBits = ((posSlot >> 1) - 1);
|
||||
UInt32 base = ((2 | (posSlot & 1)) << footerBits);
|
||||
UInt32 posReduced = pos - base;
|
||||
|
||||
if (posSlot < kEndPosModelIndex)
|
||||
RcTree_ReverseEncode(&p->rc, p->posEncoders + base - posSlot - 1, footerBits, posReduced);
|
||||
else
|
||||
{
|
||||
RangeEnc_EncodeDirectBits(&p->rc, posReduced >> kNumAlignBits, footerBits - kNumAlignBits);
|
||||
RcTree_ReverseEncode(&p->rc, p->posAlignEncoder, kNumAlignBits, posReduced & kAlignMask);
|
||||
p->alignPriceCount++;
|
||||
}
|
||||
}
|
||||
LenEnc_Encode(&p->lenProbs, &p->rc, len - LZMA_MATCH_LEN_MIN, posState);
|
||||
// --p->lenEnc.counter;
|
||||
|
||||
dist -= LZMA_NUM_REPS;
|
||||
p->reps[3] = p->reps[2];
|
||||
p->reps[2] = p->reps[1];
|
||||
p->reps[1] = p->reps[0];
|
||||
p->reps[0] = pos;
|
||||
p->reps[0] = dist + 1;
|
||||
|
||||
p->matchPriceCount++;
|
||||
GetPosSlot(dist, posSlot)
|
||||
// RcTree_Encode_PosSlot(&p->rc, p->posSlotEncoder[GetLenToPosState(len)], posSlot);
|
||||
{
|
||||
UInt32 sym = (UInt32)posSlot + (1 << kNumPosSlotBits);
|
||||
range = p->rc.range;
|
||||
probs = p->posSlotEncoder[GetLenToPosState(len)];
|
||||
do
|
||||
{
|
||||
CLzmaProb *prob = probs + (sym >> kNumPosSlotBits);
|
||||
UInt32 bit = (sym >> (kNumPosSlotBits - 1)) & 1;
|
||||
sym <<= 1;
|
||||
RC_BIT(&p->rc, prob, bit)
|
||||
}
|
||||
while (sym < (1 << kNumPosSlotBits * 2));
|
||||
p->rc.range = range;
|
||||
}
|
||||
|
||||
if (dist >= kStartPosModelIndex)
|
||||
{
|
||||
unsigned footerBits = ((posSlot >> 1) - 1);
|
||||
|
||||
if (dist < kNumFullDistances)
|
||||
{
|
||||
unsigned base = ((2 | (posSlot & 1)) << footerBits);
|
||||
RcTree_ReverseEncode(&p->rc, p->posEncoders + base, footerBits, (unsigned)(dist /* - base */));
|
||||
}
|
||||
else
|
||||
{
|
||||
UInt32 pos2 = (dist | 0xF) << (32 - footerBits);
|
||||
range = p->rc.range;
|
||||
// RangeEnc_EncodeDirectBits(&p->rc, posReduced >> kNumAlignBits, footerBits - kNumAlignBits);
|
||||
/*
|
||||
do
|
||||
{
|
||||
range >>= 1;
|
||||
p->rc.low += range & (0 - ((dist >> --footerBits) & 1));
|
||||
RC_NORM(&p->rc)
|
||||
}
|
||||
while (footerBits > kNumAlignBits);
|
||||
*/
|
||||
do
|
||||
{
|
||||
range >>= 1;
|
||||
p->rc.low += range & (0 - (pos2 >> 31));
|
||||
pos2 += pos2;
|
||||
RC_NORM(&p->rc)
|
||||
}
|
||||
while (pos2 != 0xF0000000);
|
||||
|
||||
|
||||
// RcTree_ReverseEncode(&p->rc, p->posAlignEncoder, kNumAlignBits, posReduced & kAlignMask);
|
||||
|
||||
{
|
||||
unsigned m = 1;
|
||||
unsigned bit;
|
||||
bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit) m = (m << 1) + bit;
|
||||
bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit) m = (m << 1) + bit;
|
||||
bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit) m = (m << 1) + bit;
|
||||
bit = dist & 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit)
|
||||
p->rc.range = range;
|
||||
// p->alignPriceCount++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
nowPos32 += (UInt32)len;
|
||||
p->additionalOffset -= len;
|
||||
nowPos32 += len;
|
||||
|
||||
if (p->additionalOffset == 0)
|
||||
{
|
||||
UInt32 processed;
|
||||
|
||||
if (!p->fastMode)
|
||||
{
|
||||
if (p->matchPriceCount >= (1 << 7))
|
||||
FillDistancesPrices(p);
|
||||
if (p->alignPriceCount >= kAlignTableSize)
|
||||
/*
|
||||
if (p->alignPriceCount >= 16) // kAlignTableSize
|
||||
FillAlignPrices(p);
|
||||
if (p->matchPriceCount >= 128)
|
||||
FillDistancesPrices(p);
|
||||
if (p->lenEnc.counter <= 0)
|
||||
LenPriceEnc_UpdateTables(&p->lenEnc, 1 << p->pb, &p->lenProbs, p->ProbPrices);
|
||||
*/
|
||||
if (p->matchPriceCount >= 64)
|
||||
{
|
||||
FillAlignPrices(p);
|
||||
// { int y; for (y = 0; y < 100; y++) {
|
||||
FillDistancesPrices(p);
|
||||
// }}
|
||||
LenPriceEnc_UpdateTables(&p->lenEnc, (unsigned)1 << p->pb, &p->lenProbs, p->ProbPrices);
|
||||
}
|
||||
if (p->repLenEncCounter <= 0)
|
||||
{
|
||||
p->repLenEncCounter = REP_LEN_COUNT;
|
||||
LenPriceEnc_UpdateTables(&p->repLenEnc, (unsigned)1 << p->pb, &p->repLenProbs, p->ProbPrices);
|
||||
}
|
||||
}
|
||||
|
||||
if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) == 0)
|
||||
break;
|
||||
processed = nowPos32 - startPos32;
|
||||
if (useLimits)
|
||||
|
||||
if (maxPackSize)
|
||||
{
|
||||
if (processed + kNumOpts + 300 >= maxUnpackSize ||
|
||||
RangeEnc_GetProcessed(&p->rc) + kNumOpts * 2 >= maxPackSize)
|
||||
if (processed + kNumOpts + 300 >= maxUnpackSize
|
||||
|| RangeEnc_GetProcessed_sizet(&p->rc) + kPackReserve >= maxPackSize)
|
||||
break;
|
||||
}
|
||||
else if (processed >= (1 << 15))
|
||||
else if (processed >= (1 << 17))
|
||||
{
|
||||
p->nowPos64 += nowPos32 - startPos32;
|
||||
return CheckErrors(p);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
p->nowPos64 += nowPos32 - startPos32;
|
||||
return Flush(p, nowPos32);
|
||||
}
|
||||
|
||||
|
||||
|
||||
#define kBigHashDicLimit ((UInt32)1 << 24)
|
||||
|
||||
static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAlloc *alloc, ISzAlloc *allocBig)
|
||||
static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig)
|
||||
{
|
||||
UInt32 beforeSize = kNumOpts;
|
||||
UInt32 dictSize;
|
||||
|
||||
if (!RangeEnc_Alloc(&p->rc, alloc))
|
||||
return SZ_ERROR_MEM;
|
||||
#ifdef COMPRESS_MF_MT
|
||||
Bool btMode = (p->matchFinderBase.btMode != 0);;
|
||||
p->mtMode = (p->multiThread && !p->fastMode && btMode);
|
||||
|
||||
#ifndef Z7_ST
|
||||
p->mtMode = (p->multiThread && !p->fastMode && (MFB.btMode != 0));
|
||||
#endif
|
||||
|
||||
{
|
||||
unsigned lclp = p->lc + p->lp;
|
||||
if (p->litProbs == 0 || p->saveState.litProbs == 0 || p->lclp != lclp)
|
||||
if (!p->litProbs || !p->saveState.litProbs || p->lclp != lclp)
|
||||
{
|
||||
LzmaEnc_FreeLits(p, alloc);
|
||||
p->litProbs = (CLzmaProb *)alloc->Alloc(alloc, (0x300 << lclp) * sizeof(CLzmaProb));
|
||||
p->saveState.litProbs = (CLzmaProb *)alloc->Alloc(alloc, (0x300 << lclp) * sizeof(CLzmaProb));
|
||||
if (p->litProbs == 0 || p->saveState.litProbs == 0)
|
||||
p->litProbs = (CLzmaProb *)ISzAlloc_Alloc(alloc, ((UInt32)0x300 << lclp) * sizeof(CLzmaProb));
|
||||
p->saveState.litProbs = (CLzmaProb *)ISzAlloc_Alloc(alloc, ((UInt32)0x300 << lclp) * sizeof(CLzmaProb));
|
||||
if (!p->litProbs || !p->saveState.litProbs)
|
||||
{
|
||||
LzmaEnc_FreeLits(p, alloc);
|
||||
return SZ_ERROR_MEM;
|
||||
@@ -1942,42 +2711,71 @@ static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAlloc *alloc, I
|
||||
}
|
||||
}
|
||||
|
||||
p->matchFinderBase.bigHash = (p->dictSize > kBigHashDicLimit);
|
||||
MFB.bigHash = (Byte)(p->dictSize > kBigHashDicLimit ? 1 : 0);
|
||||
|
||||
if (beforeSize + p->dictSize < keepWindowSize)
|
||||
beforeSize = keepWindowSize - p->dictSize;
|
||||
|
||||
#ifdef COMPRESS_MF_MT
|
||||
dictSize = p->dictSize;
|
||||
if (dictSize == ((UInt32)2 << 30) ||
|
||||
dictSize == ((UInt32)3 << 30))
|
||||
{
|
||||
/* 21.03 : here we reduce the dictionary for 2 reasons:
|
||||
1) we don't want 32-bit back_distance matches in decoder for 2 GB dictionary.
|
||||
2) we want to elimate useless last MatchFinder_Normalize3() for corner cases,
|
||||
where data size is aligned for 1 GB: 5/6/8 GB.
|
||||
That reducing must be >= 1 for such corner cases. */
|
||||
dictSize -= 1;
|
||||
}
|
||||
|
||||
if (beforeSize + dictSize < keepWindowSize)
|
||||
beforeSize = keepWindowSize - dictSize;
|
||||
|
||||
/* in worst case we can look ahead for
|
||||
max(LZMA_MATCH_LEN_MAX, numFastBytes + 1 + numFastBytes) bytes.
|
||||
we send larger value for (keepAfter) to MantchFinder_Create():
|
||||
(numFastBytes + LZMA_MATCH_LEN_MAX + 1)
|
||||
*/
|
||||
|
||||
#ifndef Z7_ST
|
||||
if (p->mtMode)
|
||||
{
|
||||
RINOK(MatchFinderMt_Create(&p->matchFinderMt, p->dictSize, beforeSize, p->numFastBytes, LZMA_MATCH_LEN_MAX, allocBig));
|
||||
RINOK(MatchFinderMt_Create(&p->matchFinderMt, dictSize, beforeSize,
|
||||
p->numFastBytes, LZMA_MATCH_LEN_MAX + 1 /* 18.04 */
|
||||
, allocBig))
|
||||
p->matchFinderObj = &p->matchFinderMt;
|
||||
MFB.bigHash = (Byte)(MFB.hashMask >= 0xFFFFFF ? 1 : 0);
|
||||
MatchFinderMt_CreateVTable(&p->matchFinderMt, &p->matchFinder);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
if (!MatchFinder_Create(&p->matchFinderBase, p->dictSize, beforeSize, p->numFastBytes, LZMA_MATCH_LEN_MAX, allocBig))
|
||||
if (!MatchFinder_Create(&MFB, dictSize, beforeSize,
|
||||
p->numFastBytes, LZMA_MATCH_LEN_MAX + 1 /* 21.03 */
|
||||
, allocBig))
|
||||
return SZ_ERROR_MEM;
|
||||
p->matchFinderObj = &p->matchFinderBase;
|
||||
MatchFinder_CreateVTable(&p->matchFinderBase, &p->matchFinder);
|
||||
p->matchFinderObj = &MFB;
|
||||
MatchFinder_CreateVTable(&MFB, &p->matchFinder);
|
||||
}
|
||||
|
||||
return SZ_OK;
|
||||
}
|
||||
|
||||
void LzmaEnc_Init(CLzmaEnc *p)
|
||||
static void LzmaEnc_Init(CLzmaEnc *p)
|
||||
{
|
||||
UInt32 i;
|
||||
unsigned i;
|
||||
p->state = 0;
|
||||
for (i = 0 ; i < LZMA_NUM_REPS; i++)
|
||||
p->reps[i] = 0;
|
||||
p->reps[0] =
|
||||
p->reps[1] =
|
||||
p->reps[2] =
|
||||
p->reps[3] = 1;
|
||||
|
||||
RangeEnc_Init(&p->rc);
|
||||
|
||||
for (i = 0; i < (1 << kNumAlignBits); i++)
|
||||
p->posAlignEncoder[i] = kProbInitValue;
|
||||
|
||||
for (i = 0; i < kNumStates; i++)
|
||||
{
|
||||
UInt32 j;
|
||||
unsigned j;
|
||||
for (j = 0; j < LZMA_NUM_PB_STATES_MAX; j++)
|
||||
{
|
||||
p->isMatch[i][j] = kProbInitValue;
|
||||
@@ -1989,41 +2787,50 @@ void LzmaEnc_Init(CLzmaEnc *p)
|
||||
p->isRepG2[i] = kProbInitValue;
|
||||
}
|
||||
|
||||
{
|
||||
UInt32 num = 0x300 << (p->lp + p->lc);
|
||||
for (i = 0; i < num; i++)
|
||||
p->litProbs[i] = kProbInitValue;
|
||||
}
|
||||
|
||||
{
|
||||
for (i = 0; i < kNumLenToPosStates; i++)
|
||||
{
|
||||
CLzmaProb *probs = p->posSlotEncoder[i];
|
||||
UInt32 j;
|
||||
unsigned j;
|
||||
for (j = 0; j < (1 << kNumPosSlotBits); j++)
|
||||
probs[j] = kProbInitValue;
|
||||
}
|
||||
}
|
||||
{
|
||||
for (i = 0; i < kNumFullDistances - kEndPosModelIndex; i++)
|
||||
for (i = 0; i < kNumFullDistances; i++)
|
||||
p->posEncoders[i] = kProbInitValue;
|
||||
}
|
||||
|
||||
LenEnc_Init(&p->lenEnc.p);
|
||||
LenEnc_Init(&p->repLenEnc.p);
|
||||
{
|
||||
UInt32 num = (UInt32)0x300 << (p->lp + p->lc);
|
||||
UInt32 k;
|
||||
CLzmaProb *probs = p->litProbs;
|
||||
for (k = 0; k < num; k++)
|
||||
probs[k] = kProbInitValue;
|
||||
}
|
||||
|
||||
for (i = 0; i < (1 << kNumAlignBits); i++)
|
||||
p->posAlignEncoder[i] = kProbInitValue;
|
||||
|
||||
p->optimumEndIndex = 0;
|
||||
p->optimumCurrentIndex = 0;
|
||||
LenEnc_Init(&p->lenProbs);
|
||||
LenEnc_Init(&p->repLenProbs);
|
||||
|
||||
p->optEnd = 0;
|
||||
p->optCur = 0;
|
||||
|
||||
{
|
||||
for (i = 0; i < kNumOpts; i++)
|
||||
p->opt[i].price = kInfinityPrice;
|
||||
}
|
||||
|
||||
p->additionalOffset = 0;
|
||||
|
||||
p->pbMask = (1 << p->pb) - 1;
|
||||
p->lpMask = (1 << p->lp) - 1;
|
||||
p->pbMask = ((unsigned)1 << p->pb) - 1;
|
||||
p->lpMask = ((UInt32)0x100 << p->lp) - ((unsigned)0x100 >> p->lc);
|
||||
|
||||
// p->mf_Failure = False;
|
||||
}
|
||||
|
||||
void LzmaEnc_InitPrices(CLzmaEnc *p)
|
||||
|
||||
static void LzmaEnc_InitPrices(CLzmaEnc *p)
|
||||
{
|
||||
if (!p->fastMode)
|
||||
{
|
||||
@@ -2034,116 +2841,125 @@ void LzmaEnc_InitPrices(CLzmaEnc *p)
|
||||
p->lenEnc.tableSize =
|
||||
p->repLenEnc.tableSize =
|
||||
p->numFastBytes + 1 - LZMA_MATCH_LEN_MIN;
|
||||
LenPriceEnc_UpdateTables(&p->lenEnc, 1 << p->pb, p->ProbPrices);
|
||||
LenPriceEnc_UpdateTables(&p->repLenEnc, 1 << p->pb, p->ProbPrices);
|
||||
|
||||
p->repLenEncCounter = REP_LEN_COUNT;
|
||||
|
||||
LenPriceEnc_UpdateTables(&p->lenEnc, (unsigned)1 << p->pb, &p->lenProbs, p->ProbPrices);
|
||||
LenPriceEnc_UpdateTables(&p->repLenEnc, (unsigned)1 << p->pb, &p->repLenProbs, p->ProbPrices);
|
||||
}
|
||||
|
||||
static SRes LzmaEnc_AllocAndInit(CLzmaEnc *p, UInt32 keepWindowSize, ISzAlloc *alloc, ISzAlloc *allocBig)
|
||||
static SRes LzmaEnc_AllocAndInit(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig)
|
||||
{
|
||||
UInt32 i;
|
||||
for (i = 0; i < (UInt32)kDicLogSizeMaxCompress; i++)
|
||||
unsigned i;
|
||||
for (i = kEndPosModelIndex / 2; i < kDicLogSizeMax; i++)
|
||||
if (p->dictSize <= ((UInt32)1 << i))
|
||||
break;
|
||||
p->distTableSize = i * 2;
|
||||
|
||||
p->finished = False;
|
||||
p->result = SZ_OK;
|
||||
RINOK(LzmaEnc_Alloc(p, keepWindowSize, alloc, allocBig));
|
||||
p->nowPos64 = 0;
|
||||
p->needInit = 1;
|
||||
RINOK(LzmaEnc_Alloc(p, keepWindowSize, alloc, allocBig))
|
||||
LzmaEnc_Init(p);
|
||||
LzmaEnc_InitPrices(p);
|
||||
p->nowPos64 = 0;
|
||||
return SZ_OK;
|
||||
}
|
||||
|
||||
static SRes LzmaEnc_Prepare(CLzmaEncHandle pp, ISeqInStream *inStream, ISeqOutStream *outStream,
|
||||
ISzAlloc *alloc, ISzAlloc *allocBig)
|
||||
static SRes LzmaEnc_Prepare(CLzmaEncHandle p,
|
||||
ISeqOutStreamPtr outStream,
|
||||
ISeqInStreamPtr inStream,
|
||||
ISzAllocPtr alloc, ISzAllocPtr allocBig)
|
||||
{
|
||||
CLzmaEnc *p = (CLzmaEnc *)pp;
|
||||
p->inStream = inStream;
|
||||
// GET_CLzmaEnc_p
|
||||
MatchFinder_SET_STREAM(&MFB, inStream)
|
||||
p->rc.outStream = outStream;
|
||||
return LzmaEnc_AllocAndInit(p, 0, alloc, allocBig);
|
||||
}
|
||||
|
||||
SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle pp,
|
||||
ISeqInStream *inStream, UInt32 keepWindowSize,
|
||||
ISzAlloc *alloc, ISzAlloc *allocBig)
|
||||
SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle p,
|
||||
ISeqInStreamPtr inStream, UInt32 keepWindowSize,
|
||||
ISzAllocPtr alloc, ISzAllocPtr allocBig)
|
||||
{
|
||||
CLzmaEnc *p = (CLzmaEnc *)pp;
|
||||
p->inStream = inStream;
|
||||
// GET_CLzmaEnc_p
|
||||
MatchFinder_SET_STREAM(&MFB, inStream)
|
||||
return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig);
|
||||
}
|
||||
|
||||
static void LzmaEnc_SetInputBuf(CLzmaEnc *p, const Byte *src, SizeT srcLen)
|
||||
SRes LzmaEnc_MemPrepare(CLzmaEncHandle p,
|
||||
const Byte *src, SizeT srcLen,
|
||||
UInt32 keepWindowSize,
|
||||
ISzAllocPtr alloc, ISzAllocPtr allocBig)
|
||||
{
|
||||
p->seqBufInStream.funcTable.Read = MyRead;
|
||||
p->seqBufInStream.data = src;
|
||||
p->seqBufInStream.rem = srcLen;
|
||||
}
|
||||
|
||||
SRes LzmaEnc_MemPrepare(CLzmaEncHandle pp, const Byte *src, SizeT srcLen,
|
||||
UInt32 keepWindowSize, ISzAlloc *alloc, ISzAlloc *allocBig)
|
||||
{
|
||||
CLzmaEnc *p = (CLzmaEnc *)pp;
|
||||
LzmaEnc_SetInputBuf(p, src, srcLen);
|
||||
p->inStream = &p->seqBufInStream.funcTable;
|
||||
// GET_CLzmaEnc_p
|
||||
MatchFinder_SET_DIRECT_INPUT_BUF(&MFB, src, srcLen)
|
||||
LzmaEnc_SetDataSize(p, srcLen);
|
||||
return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig);
|
||||
}
|
||||
|
||||
void LzmaEnc_Finish(CLzmaEncHandle pp)
|
||||
void LzmaEnc_Finish(CLzmaEncHandle p)
|
||||
{
|
||||
#ifdef COMPRESS_MF_MT
|
||||
CLzmaEnc *p = (CLzmaEnc *)pp;
|
||||
#ifndef Z7_ST
|
||||
// GET_CLzmaEnc_p
|
||||
if (p->mtMode)
|
||||
MatchFinderMt_ReleaseStream(&p->matchFinderMt);
|
||||
#else
|
||||
pp = pp;
|
||||
UNUSED_VAR(p)
|
||||
#endif
|
||||
}
|
||||
|
||||
typedef struct _CSeqOutStreamBuf
|
||||
{
|
||||
ISeqOutStream funcTable;
|
||||
Byte *data;
|
||||
SizeT rem;
|
||||
Bool overflow;
|
||||
} CSeqOutStreamBuf;
|
||||
|
||||
static size_t MyWrite(void *pp, const void *data, size_t size)
|
||||
typedef struct
|
||||
{
|
||||
CSeqOutStreamBuf *p = (CSeqOutStreamBuf *)pp;
|
||||
ISeqOutStream vt;
|
||||
Byte *data;
|
||||
size_t rem;
|
||||
BoolInt overflow;
|
||||
} CLzmaEnc_SeqOutStreamBuf;
|
||||
|
||||
static size_t SeqOutStreamBuf_Write(ISeqOutStreamPtr pp, const void *data, size_t size)
|
||||
{
|
||||
Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CLzmaEnc_SeqOutStreamBuf)
|
||||
if (p->rem < size)
|
||||
{
|
||||
size = p->rem;
|
||||
p->overflow = True;
|
||||
}
|
||||
memcpy(p->data, data, size);
|
||||
p->rem -= size;
|
||||
p->data += size;
|
||||
if (size != 0)
|
||||
{
|
||||
memcpy(p->data, data, size);
|
||||
p->rem -= size;
|
||||
p->data += size;
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
|
||||
UInt32 LzmaEnc_GetNumAvailableBytes(CLzmaEncHandle pp)
|
||||
/*
|
||||
UInt32 LzmaEnc_GetNumAvailableBytes(CLzmaEncHandle p)
|
||||
{
|
||||
const CLzmaEnc *p = (CLzmaEnc *)pp;
|
||||
GET_const_CLzmaEnc_p
|
||||
return p->matchFinder.GetNumAvailableBytes(p->matchFinderObj);
|
||||
}
|
||||
*/
|
||||
|
||||
const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle pp)
|
||||
const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle p)
|
||||
{
|
||||
const CLzmaEnc *p = (CLzmaEnc *)pp;
|
||||
// GET_const_CLzmaEnc_p
|
||||
return p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset;
|
||||
}
|
||||
|
||||
SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, Bool reInit,
|
||||
|
||||
// (desiredPackSize == 0) is not allowed
|
||||
SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle p, BoolInt reInit,
|
||||
Byte *dest, size_t *destLen, UInt32 desiredPackSize, UInt32 *unpackSize)
|
||||
{
|
||||
CLzmaEnc *p = (CLzmaEnc *)pp;
|
||||
// GET_CLzmaEnc_p
|
||||
UInt64 nowPos64;
|
||||
SRes res;
|
||||
CSeqOutStreamBuf outStream;
|
||||
CLzmaEnc_SeqOutStreamBuf outStream;
|
||||
|
||||
outStream.funcTable.Write = MyWrite;
|
||||
outStream.vt.Write = SeqOutStreamBuf_Write;
|
||||
outStream.data = dest;
|
||||
outStream.rem = *destLen;
|
||||
outStream.overflow = False;
|
||||
@@ -2155,11 +2971,11 @@ SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, Bool reInit,
|
||||
if (reInit)
|
||||
LzmaEnc_Init(p);
|
||||
LzmaEnc_InitPrices(p);
|
||||
nowPos64 = p->nowPos64;
|
||||
RangeEnc_Init(&p->rc);
|
||||
p->rc.outStream = &outStream.funcTable;
|
||||
|
||||
res = LzmaEnc_CodeOneBlock(p, True, desiredPackSize, *unpackSize);
|
||||
p->rc.outStream = &outStream.vt;
|
||||
nowPos64 = p->nowPos64;
|
||||
|
||||
res = LzmaEnc_CodeOneBlock(p, desiredPackSize, *unpackSize);
|
||||
|
||||
*unpackSize = (UInt32)(p->nowPos64 - nowPos64);
|
||||
*destLen -= outStream.rem;
|
||||
@@ -2169,29 +2985,26 @@ SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, Bool reInit,
|
||||
return res;
|
||||
}
|
||||
|
||||
SRes LzmaEnc_Encode(CLzmaEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStream, ICompressProgress *progress,
|
||||
ISzAlloc *alloc, ISzAlloc *allocBig)
|
||||
|
||||
Z7_NO_INLINE
|
||||
static SRes LzmaEnc_Encode2(CLzmaEnc *p, ICompressProgressPtr progress)
|
||||
{
|
||||
CLzmaEnc *p = (CLzmaEnc *)pp;
|
||||
SRes res = SZ_OK;
|
||||
|
||||
#ifdef COMPRESS_MF_MT
|
||||
#ifndef Z7_ST
|
||||
Byte allocaDummy[0x300];
|
||||
int i = 0;
|
||||
for (i = 0; i < 16; i++)
|
||||
allocaDummy[i] = (Byte)i;
|
||||
allocaDummy[0] = 0;
|
||||
allocaDummy[1] = allocaDummy[0];
|
||||
#endif
|
||||
|
||||
RINOK(LzmaEnc_Prepare(pp, inStream, outStream, alloc, allocBig));
|
||||
|
||||
for (;;)
|
||||
{
|
||||
res = LzmaEnc_CodeOneBlock(p, False, 0, 0);
|
||||
if (res != SZ_OK || p->finished != 0)
|
||||
res = LzmaEnc_CodeOneBlock(p, 0, 0);
|
||||
if (res != SZ_OK || p->finished)
|
||||
break;
|
||||
if (progress != 0)
|
||||
if (progress)
|
||||
{
|
||||
res = progress->Progress(progress, p->nowPos64, RangeEnc_GetProcessed(&p->rc));
|
||||
res = ICompressProgress_Progress(progress, p->nowPos64, RangeEnc_GetProcessed(&p->rc));
|
||||
if (res != SZ_OK)
|
||||
{
|
||||
res = SZ_ERROR_PROGRESS;
|
||||
@@ -2199,71 +3012,110 @@ SRes LzmaEnc_Encode(CLzmaEncHandle pp, ISeqOutStream *outStream, ISeqInStream *i
|
||||
}
|
||||
}
|
||||
}
|
||||
LzmaEnc_Finish(pp);
|
||||
|
||||
LzmaEnc_Finish((CLzmaEncHandle)(void *)p);
|
||||
|
||||
/*
|
||||
if (res == SZ_OK && !Inline_MatchFinder_IsFinishedOK(&MFB))
|
||||
res = SZ_ERROR_FAIL;
|
||||
}
|
||||
*/
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
SRes LzmaEnc_WriteProperties(CLzmaEncHandle pp, Byte *props, SizeT *size)
|
||||
|
||||
SRes LzmaEnc_Encode(CLzmaEncHandle p, ISeqOutStreamPtr outStream, ISeqInStreamPtr inStream, ICompressProgressPtr progress,
|
||||
ISzAllocPtr alloc, ISzAllocPtr allocBig)
|
||||
{
|
||||
// GET_CLzmaEnc_p
|
||||
RINOK(LzmaEnc_Prepare(p, outStream, inStream, alloc, allocBig))
|
||||
return LzmaEnc_Encode2(p, progress);
|
||||
}
|
||||
|
||||
|
||||
SRes LzmaEnc_WriteProperties(CLzmaEncHandle p, Byte *props, SizeT *size)
|
||||
{
|
||||
CLzmaEnc *p = (CLzmaEnc *)pp;
|
||||
int i;
|
||||
UInt32 dictSize = p->dictSize;
|
||||
if (*size < LZMA_PROPS_SIZE)
|
||||
return SZ_ERROR_PARAM;
|
||||
*size = LZMA_PROPS_SIZE;
|
||||
props[0] = (Byte)((p->pb * 5 + p->lp) * 9 + p->lc);
|
||||
|
||||
for (i = 11; i <= 30; i++)
|
||||
{
|
||||
if (dictSize <= ((UInt32)2 << i))
|
||||
// GET_CLzmaEnc_p
|
||||
const UInt32 dictSize = p->dictSize;
|
||||
UInt32 v;
|
||||
props[0] = (Byte)((p->pb * 5 + p->lp) * 9 + p->lc);
|
||||
|
||||
// we write aligned dictionary value to properties for lzma decoder
|
||||
if (dictSize >= ((UInt32)1 << 21))
|
||||
{
|
||||
dictSize = (2 << i);
|
||||
break;
|
||||
const UInt32 kDictMask = ((UInt32)1 << 20) - 1;
|
||||
v = (dictSize + kDictMask) & ~kDictMask;
|
||||
if (v < dictSize)
|
||||
v = dictSize;
|
||||
}
|
||||
if (dictSize <= ((UInt32)3 << i))
|
||||
else
|
||||
{
|
||||
dictSize = (3 << i);
|
||||
break;
|
||||
unsigned i = 11 * 2;
|
||||
do
|
||||
{
|
||||
v = (UInt32)(2 + (i & 1)) << (i >> 1);
|
||||
i++;
|
||||
}
|
||||
while (v < dictSize);
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
props[1 + i] = (Byte)(dictSize >> (8 * i));
|
||||
return SZ_OK;
|
||||
SetUi32(props + 1, v)
|
||||
return SZ_OK;
|
||||
}
|
||||
}
|
||||
|
||||
SRes LzmaEnc_MemEncode(CLzmaEncHandle pp, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
|
||||
int writeEndMark, ICompressProgress *progress, ISzAlloc *alloc, ISzAlloc *allocBig)
|
||||
|
||||
unsigned LzmaEnc_IsWriteEndMark(CLzmaEncHandle p)
|
||||
{
|
||||
// GET_CLzmaEnc_p
|
||||
return (unsigned)p->writeEndMark;
|
||||
}
|
||||
|
||||
|
||||
SRes LzmaEnc_MemEncode(CLzmaEncHandle p, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
|
||||
int writeEndMark, ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig)
|
||||
{
|
||||
SRes res;
|
||||
CLzmaEnc *p = (CLzmaEnc *)pp;
|
||||
// GET_CLzmaEnc_p
|
||||
|
||||
CSeqOutStreamBuf outStream;
|
||||
CLzmaEnc_SeqOutStreamBuf outStream;
|
||||
|
||||
LzmaEnc_SetInputBuf(p, src, srcLen);
|
||||
|
||||
outStream.funcTable.Write = MyWrite;
|
||||
outStream.vt.Write = SeqOutStreamBuf_Write;
|
||||
outStream.data = dest;
|
||||
outStream.rem = *destLen;
|
||||
outStream.overflow = False;
|
||||
|
||||
p->writeEndMark = writeEndMark;
|
||||
res = LzmaEnc_Encode(pp, &outStream.funcTable, &p->seqBufInStream.funcTable,
|
||||
progress, alloc, allocBig);
|
||||
p->rc.outStream = &outStream.vt;
|
||||
|
||||
*destLen -= outStream.rem;
|
||||
res = LzmaEnc_MemPrepare(p, src, srcLen, 0, alloc, allocBig);
|
||||
|
||||
if (res == SZ_OK)
|
||||
{
|
||||
res = LzmaEnc_Encode2(p, progress);
|
||||
if (res == SZ_OK && p->nowPos64 != srcLen)
|
||||
res = SZ_ERROR_FAIL;
|
||||
}
|
||||
|
||||
*destLen -= (SizeT)outStream.rem;
|
||||
if (outStream.overflow)
|
||||
return SZ_ERROR_OUTPUT_EOF;
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
|
||||
const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark,
|
||||
ICompressProgress *progress, ISzAlloc *alloc, ISzAlloc *allocBig)
|
||||
ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig)
|
||||
{
|
||||
CLzmaEnc *p = (CLzmaEnc *)LzmaEnc_Create(alloc);
|
||||
CLzmaEncHandle p = LzmaEnc_Create(alloc);
|
||||
SRes res;
|
||||
if (p == 0)
|
||||
if (!p)
|
||||
return SZ_ERROR_MEM;
|
||||
|
||||
res = LzmaEnc_SetProps(p, props);
|
||||
@@ -2278,3 +3130,15 @@ SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
|
||||
LzmaEnc_Destroy(p, alloc, allocBig);
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
#ifndef Z7_ST
|
||||
void LzmaEnc_GetLzThreads(CLzmaEncHandle p, HANDLE lz_threads[2])
|
||||
{
|
||||
GET_const_CLzmaEnc_p
|
||||
lz_threads[0] = p->matchFinderMt.hashSync.thread;
|
||||
lz_threads[1] = p->matchFinderMt.btSync.thread;
|
||||
}
|
||||
#endif
|
||||
*/
|
||||
|
||||
69
extern/lzma/LzmaEnc.h
vendored
69
extern/lzma/LzmaEnc.h
vendored
@@ -1,19 +1,21 @@
|
||||
/* LzmaEnc.h -- LZMA Encoder
|
||||
2008-10-04 : Igor Pavlov : Public domain */
|
||||
2023-04-13 : Igor Pavlov : Public domain */
|
||||
|
||||
#ifndef __LZMAENC_H
|
||||
#define __LZMAENC_H
|
||||
#ifndef ZIP7_INC_LZMA_ENC_H
|
||||
#define ZIP7_INC_LZMA_ENC_H
|
||||
|
||||
#include "Types.h"
|
||||
#include "7zTypes.h"
|
||||
|
||||
EXTERN_C_BEGIN
|
||||
|
||||
#define LZMA_PROPS_SIZE 5
|
||||
|
||||
typedef struct _CLzmaEncProps
|
||||
typedef struct
|
||||
{
|
||||
int level; /* 0 <= level <= 9 */
|
||||
int level; /* 0 <= level <= 9 */
|
||||
UInt32 dictSize; /* (1 << 12) <= dictSize <= (1 << 27) for 32-bit version
|
||||
(1 << 12) <= dictSize <= (1 << 30) for 64-bit version
|
||||
default = (1 << 24) */
|
||||
(1 << 12) <= dictSize <= (3 << 29) for 64-bit version
|
||||
default = (1 << 24) */
|
||||
int lc; /* 0 <= lc <= 8, default = 3 */
|
||||
int lp; /* 0 <= lp <= 4, default = 0 */
|
||||
int pb; /* 0 <= pb <= 4, default = 2 */
|
||||
@@ -21,9 +23,17 @@ typedef struct _CLzmaEncProps
|
||||
int fb; /* 5 <= fb <= 273, default = 32 */
|
||||
int btMode; /* 0 - hashChain Mode, 1 - binTree mode - normal, default = 1 */
|
||||
int numHashBytes; /* 2, 3 or 4, default = 4 */
|
||||
UInt32 mc; /* 1 <= mc <= (1 << 30), default = 32 */
|
||||
unsigned numHashOutBits; /* default = ? */
|
||||
UInt32 mc; /* 1 <= mc <= (1 << 30), default = 32 */
|
||||
unsigned writeEndMark; /* 0 - do not write EOPM, 1 - write EOPM, default = 0 */
|
||||
int numThreads; /* 1 or 2, default = 2 */
|
||||
|
||||
// int _pad;
|
||||
|
||||
UInt64 reduceSize; /* estimated size of data that will be compressed. default = (UInt64)(Int64)-1.
|
||||
Encoder uses this value to reduce dictionary size */
|
||||
|
||||
UInt64 affinity;
|
||||
} CLzmaEncProps;
|
||||
|
||||
void LzmaEncProps_Init(CLzmaEncProps *p);
|
||||
@@ -33,40 +43,41 @@ UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps *props2);
|
||||
|
||||
/* ---------- CLzmaEncHandle Interface ---------- */
|
||||
|
||||
/* LzmaEnc_* functions can return the following exit codes:
|
||||
Returns:
|
||||
/* LzmaEnc* functions can return the following exit codes:
|
||||
SRes:
|
||||
SZ_OK - OK
|
||||
SZ_ERROR_MEM - Memory allocation error
|
||||
SZ_ERROR_PARAM - Incorrect paramater in props
|
||||
SZ_ERROR_WRITE - Write callback error.
|
||||
SZ_ERROR_WRITE - ISeqOutStream write callback error
|
||||
SZ_ERROR_OUTPUT_EOF - output buffer overflow - version with (Byte *) output
|
||||
SZ_ERROR_PROGRESS - some break from progress callback
|
||||
SZ_ERROR_THREAD - errors in multithreading functions (only for Mt version)
|
||||
SZ_ERROR_THREAD - error in multithreading functions (only for Mt version)
|
||||
*/
|
||||
|
||||
typedef void * CLzmaEncHandle;
|
||||
typedef struct CLzmaEnc CLzmaEnc;
|
||||
typedef CLzmaEnc * CLzmaEncHandle;
|
||||
// Z7_DECLARE_HANDLE(CLzmaEncHandle)
|
||||
|
||||
CLzmaEncHandle LzmaEnc_Create(ISzAllocPtr alloc);
|
||||
void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAllocPtr alloc, ISzAllocPtr allocBig);
|
||||
|
||||
CLzmaEncHandle LzmaEnc_Create(ISzAlloc *alloc);
|
||||
void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAlloc *alloc, ISzAlloc *allocBig);
|
||||
SRes LzmaEnc_SetProps(CLzmaEncHandle p, const CLzmaEncProps *props);
|
||||
void LzmaEnc_SetDataSize(CLzmaEncHandle p, UInt64 expectedDataSiize);
|
||||
SRes LzmaEnc_WriteProperties(CLzmaEncHandle p, Byte *properties, SizeT *size);
|
||||
SRes LzmaEnc_Encode(CLzmaEncHandle p, ISeqOutStream *outStream, ISeqInStream *inStream,
|
||||
ICompressProgress *progress, ISzAlloc *alloc, ISzAlloc *allocBig);
|
||||
unsigned LzmaEnc_IsWriteEndMark(CLzmaEncHandle p);
|
||||
|
||||
SRes LzmaEnc_Encode(CLzmaEncHandle p, ISeqOutStreamPtr outStream, ISeqInStreamPtr inStream,
|
||||
ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig);
|
||||
SRes LzmaEnc_MemEncode(CLzmaEncHandle p, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
|
||||
int writeEndMark, ICompressProgress *progress, ISzAlloc *alloc, ISzAlloc *allocBig);
|
||||
int writeEndMark, ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig);
|
||||
|
||||
|
||||
/* ---------- One Call Interface ---------- */
|
||||
|
||||
/* LzmaEncode
|
||||
Return code:
|
||||
SZ_OK - OK
|
||||
SZ_ERROR_MEM - Memory allocation error
|
||||
SZ_ERROR_PARAM - Incorrect paramater
|
||||
SZ_ERROR_OUTPUT_EOF - output buffer overflow
|
||||
SZ_ERROR_THREAD - errors in multithreading functions (only for Mt version)
|
||||
*/
|
||||
|
||||
SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
|
||||
const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark,
|
||||
ICompressProgress *progress, ISzAlloc *alloc, ISzAlloc *allocBig);
|
||||
ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig);
|
||||
|
||||
EXTERN_C_END
|
||||
|
||||
#endif
|
||||
|
||||
18
extern/lzma/LzmaLib.c
vendored
18
extern/lzma/LzmaLib.c
vendored
@@ -1,18 +1,14 @@
|
||||
/* LzmaLib.c -- LZMA library wrapper
|
||||
2008-08-05
|
||||
Igor Pavlov
|
||||
Public domain */
|
||||
2023-04-02 : Igor Pavlov : Public domain */
|
||||
|
||||
#include "Precomp.h"
|
||||
|
||||
#include "LzmaEnc.h"
|
||||
#include "LzmaDec.h"
|
||||
#include "Alloc.h"
|
||||
#include "LzmaDec.h"
|
||||
#include "LzmaEnc.h"
|
||||
#include "LzmaLib.h"
|
||||
|
||||
static void *SzAlloc(void *p, size_t size) { p = p; return MyAlloc(size); }
|
||||
static void SzFree(void *p, void *address) { p = p; MyFree(address); }
|
||||
static ISzAlloc g_Alloc = { SzAlloc, SzFree };
|
||||
|
||||
MY_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t srcLen,
|
||||
Z7_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t srcLen,
|
||||
unsigned char *outProps, size_t *outPropsSize,
|
||||
int level, /* 0 <= level <= 9, default = 5 */
|
||||
unsigned dictSize, /* use (1 << N) or (3 << N). 4 KB < dictSize <= 128 MB */
|
||||
@@ -38,7 +34,7 @@ MY_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned cha
|
||||
}
|
||||
|
||||
|
||||
MY_STDAPI LzmaUncompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t *srcLen,
|
||||
Z7_STDAPI LzmaUncompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t *srcLen,
|
||||
const unsigned char *props, size_t propsSize)
|
||||
{
|
||||
ELzmaStatus status;
|
||||
|
||||
47
extern/lzma/LzmaLib.h
vendored
47
extern/lzma/LzmaLib.h
vendored
@@ -1,20 +1,14 @@
|
||||
/* LzmaLib.h -- LZMA library interface
|
||||
2008-08-05
|
||||
Igor Pavlov
|
||||
Public domain */
|
||||
2023-04-02 : Igor Pavlov : Public domain */
|
||||
|
||||
#ifndef __LZMALIB_H
|
||||
#define __LZMALIB_H
|
||||
#ifndef ZIP7_INC_LZMA_LIB_H
|
||||
#define ZIP7_INC_LZMA_LIB_H
|
||||
|
||||
#include "Types.h"
|
||||
#include "7zTypes.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
#define MY_EXTERN_C extern "C"
|
||||
#else
|
||||
#define MY_EXTERN_C extern
|
||||
#endif
|
||||
EXTERN_C_BEGIN
|
||||
|
||||
#define MY_STDAPI MY_EXTERN_C int MY_STD_CALL
|
||||
#define Z7_STDAPI int Z7_STDCALL
|
||||
|
||||
#define LZMA_PROPS_SIZE 5
|
||||
|
||||
@@ -46,14 +40,16 @@ outPropsSize -
|
||||
level - compression level: 0 <= level <= 9;
|
||||
|
||||
level dictSize algo fb
|
||||
0: 16 KB 0 32
|
||||
1: 64 KB 0 32
|
||||
2: 256 KB 0 32
|
||||
3: 1 MB 0 32
|
||||
4: 4 MB 0 32
|
||||
0: 64 KB 0 32
|
||||
1: 256 KB 0 32
|
||||
2: 1 MB 0 32
|
||||
3: 4 MB 0 32
|
||||
4: 16 MB 0 32
|
||||
5: 16 MB 1 32
|
||||
6: 32 MB 1 32
|
||||
7+: 64 MB 1 64
|
||||
7: 32 MB 1 64
|
||||
8: 64 MB 1 64
|
||||
9: 64 MB 1 64
|
||||
|
||||
The default value for "level" is 5.
|
||||
|
||||
@@ -89,6 +85,11 @@ fb - Word size (the number of fast bytes).
|
||||
numThreads - The number of thereads. 1 or 2. The default value is 2.
|
||||
Fast mode (algo = 0) can use only 1 thread.
|
||||
|
||||
In:
|
||||
dest - output data buffer
|
||||
destLen - output data buffer size
|
||||
src - input data
|
||||
srcLen - input data size
|
||||
Out:
|
||||
destLen - processed output size
|
||||
Returns:
|
||||
@@ -99,7 +100,7 @@ Returns:
|
||||
SZ_ERROR_THREAD - errors in multithreading functions (only for Mt version)
|
||||
*/
|
||||
|
||||
MY_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t srcLen,
|
||||
Z7_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t srcLen,
|
||||
unsigned char *outProps, size_t *outPropsSize, /* *outPropsSize must be = 5 */
|
||||
int level, /* 0 <= level <= 9, default = 5 */
|
||||
unsigned dictSize, /* default = (1 << 24) */
|
||||
@@ -114,8 +115,8 @@ MY_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char
|
||||
LzmaUncompress
|
||||
--------------
|
||||
In:
|
||||
dest - output data
|
||||
destLen - output data size
|
||||
dest - output data buffer
|
||||
destLen - output data buffer size
|
||||
src - input data
|
||||
srcLen - input data size
|
||||
Out:
|
||||
@@ -129,7 +130,9 @@ Returns:
|
||||
SZ_ERROR_INPUT_EOF - it needs more bytes in input buffer (src)
|
||||
*/
|
||||
|
||||
MY_STDAPI LzmaUncompress(unsigned char *dest, size_t *destLen, const unsigned char *src, SizeT *srcLen,
|
||||
Z7_STDAPI LzmaUncompress(unsigned char *dest, size_t *destLen, const unsigned char *src, SizeT *srcLen,
|
||||
const unsigned char *props, size_t propsSize);
|
||||
|
||||
EXTERN_C_END
|
||||
|
||||
#endif
|
||||
|
||||
10
extern/lzma/Precomp.h
vendored
Normal file
10
extern/lzma/Precomp.h
vendored
Normal file
@@ -0,0 +1,10 @@
|
||||
/* Precomp.h -- StdAfx
|
||||
2023-04-02 : Igor Pavlov : Public domain */
|
||||
|
||||
#ifndef ZIP7_INC_PRECOMP_H
|
||||
#define ZIP7_INC_PRECOMP_H
|
||||
|
||||
#include "Compiler.h"
|
||||
/* #include "7zTypes.h" */
|
||||
|
||||
#endif
|
||||
7
extern/lzma/README.blender
vendored
7
extern/lzma/README.blender
vendored
@@ -1,5 +1,8 @@
|
||||
Project: LZMA SDK
|
||||
URL: https://www.7-zip.org/sdk.html
|
||||
License: Public Domain
|
||||
Upstream version: 4.65
|
||||
Local modifications: None
|
||||
Upstream version: 23.01
|
||||
Local modifications: No code changes
|
||||
|
||||
- Took only files needed for Blender: C source for raw LZMA1 encoder/decoder.
|
||||
- CMakeLists.txt is made for Blender codebase
|
||||
|
||||
562
extern/lzma/Threads.c
vendored
Normal file
562
extern/lzma/Threads.c
vendored
Normal file
@@ -0,0 +1,562 @@
|
||||
/* Threads.c -- multithreading library
|
||||
2023-03-04 : Igor Pavlov : Public domain */
|
||||
|
||||
#include "Precomp.h"
|
||||
|
||||
#ifdef _WIN32
|
||||
|
||||
#ifndef USE_THREADS_CreateThread
|
||||
#include <process.h>
|
||||
#endif
|
||||
|
||||
#include "Threads.h"
|
||||
|
||||
static WRes GetError(void)
|
||||
{
|
||||
const DWORD res = GetLastError();
|
||||
return res ? (WRes)res : 1;
|
||||
}
|
||||
|
||||
static WRes HandleToWRes(HANDLE h) { return (h != NULL) ? 0 : GetError(); }
|
||||
static WRes BOOLToWRes(BOOL v) { return v ? 0 : GetError(); }
|
||||
|
||||
WRes HandlePtr_Close(HANDLE *p)
|
||||
{
|
||||
if (*p != NULL)
|
||||
{
|
||||
if (!CloseHandle(*p))
|
||||
return GetError();
|
||||
*p = NULL;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
WRes Handle_WaitObject(HANDLE h)
|
||||
{
|
||||
DWORD dw = WaitForSingleObject(h, INFINITE);
|
||||
/*
|
||||
(dw) result:
|
||||
WAIT_OBJECT_0 // 0
|
||||
WAIT_ABANDONED // 0x00000080 : is not compatible with Win32 Error space
|
||||
WAIT_TIMEOUT // 0x00000102 : is compatible with Win32 Error space
|
||||
WAIT_FAILED // 0xFFFFFFFF
|
||||
*/
|
||||
if (dw == WAIT_FAILED)
|
||||
{
|
||||
dw = GetLastError();
|
||||
if (dw == 0)
|
||||
return WAIT_FAILED;
|
||||
}
|
||||
return (WRes)dw;
|
||||
}
|
||||
|
||||
#define Thread_Wait(p) Handle_WaitObject(*(p))
|
||||
|
||||
WRes Thread_Wait_Close(CThread *p)
|
||||
{
|
||||
WRes res = Thread_Wait(p);
|
||||
WRes res2 = Thread_Close(p);
|
||||
return (res != 0 ? res : res2);
|
||||
}
|
||||
|
||||
WRes Thread_Create(CThread *p, THREAD_FUNC_TYPE func, LPVOID param)
|
||||
{
|
||||
/* Windows Me/98/95: threadId parameter may not be NULL in _beginthreadex/CreateThread functions */
|
||||
|
||||
#ifdef USE_THREADS_CreateThread
|
||||
|
||||
DWORD threadId;
|
||||
*p = CreateThread(NULL, 0, func, param, 0, &threadId);
|
||||
|
||||
#else
|
||||
|
||||
unsigned threadId;
|
||||
*p = (HANDLE)(_beginthreadex(NULL, 0, func, param, 0, &threadId));
|
||||
|
||||
#endif
|
||||
|
||||
/* maybe we must use errno here, but probably GetLastError() is also OK. */
|
||||
return HandleToWRes(*p);
|
||||
}
|
||||
|
||||
|
||||
WRes Thread_Create_With_Affinity(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, CAffinityMask affinity)
|
||||
{
|
||||
#ifdef USE_THREADS_CreateThread
|
||||
|
||||
UNUSED_VAR(affinity)
|
||||
return Thread_Create(p, func, param);
|
||||
|
||||
#else
|
||||
|
||||
/* Windows Me/98/95: threadId parameter may not be NULL in _beginthreadex/CreateThread functions */
|
||||
HANDLE h;
|
||||
WRes wres;
|
||||
unsigned threadId;
|
||||
h = (HANDLE)(_beginthreadex(NULL, 0, func, param, CREATE_SUSPENDED, &threadId));
|
||||
*p = h;
|
||||
wres = HandleToWRes(h);
|
||||
if (h)
|
||||
{
|
||||
{
|
||||
// DWORD_PTR prevMask =
|
||||
SetThreadAffinityMask(h, (DWORD_PTR)affinity);
|
||||
/*
|
||||
if (prevMask == 0)
|
||||
{
|
||||
// affinity change is non-critical error, so we can ignore it
|
||||
// wres = GetError();
|
||||
}
|
||||
*/
|
||||
}
|
||||
{
|
||||
DWORD prevSuspendCount = ResumeThread(h);
|
||||
/* ResumeThread() returns:
|
||||
0 : was_not_suspended
|
||||
1 : was_resumed
|
||||
-1 : error
|
||||
*/
|
||||
if (prevSuspendCount == (DWORD)-1)
|
||||
wres = GetError();
|
||||
}
|
||||
}
|
||||
|
||||
/* maybe we must use errno here, but probably GetLastError() is also OK. */
|
||||
return wres;
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
static WRes Event_Create(CEvent *p, BOOL manualReset, int signaled)
|
||||
{
|
||||
*p = CreateEvent(NULL, manualReset, (signaled ? TRUE : FALSE), NULL);
|
||||
return HandleToWRes(*p);
|
||||
}
|
||||
|
||||
WRes Event_Set(CEvent *p) { return BOOLToWRes(SetEvent(*p)); }
|
||||
WRes Event_Reset(CEvent *p) { return BOOLToWRes(ResetEvent(*p)); }
|
||||
|
||||
WRes ManualResetEvent_Create(CManualResetEvent *p, int signaled) { return Event_Create(p, TRUE, signaled); }
|
||||
WRes AutoResetEvent_Create(CAutoResetEvent *p, int signaled) { return Event_Create(p, FALSE, signaled); }
|
||||
WRes ManualResetEvent_CreateNotSignaled(CManualResetEvent *p) { return ManualResetEvent_Create(p, 0); }
|
||||
WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *p) { return AutoResetEvent_Create(p, 0); }
|
||||
|
||||
|
||||
WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount)
|
||||
{
|
||||
// negative ((LONG)maxCount) is not supported in WIN32::CreateSemaphore()
|
||||
*p = CreateSemaphore(NULL, (LONG)initCount, (LONG)maxCount, NULL);
|
||||
return HandleToWRes(*p);
|
||||
}
|
||||
|
||||
WRes Semaphore_OptCreateInit(CSemaphore *p, UInt32 initCount, UInt32 maxCount)
|
||||
{
|
||||
// if (Semaphore_IsCreated(p))
|
||||
{
|
||||
WRes wres = Semaphore_Close(p);
|
||||
if (wres != 0)
|
||||
return wres;
|
||||
}
|
||||
return Semaphore_Create(p, initCount, maxCount);
|
||||
}
|
||||
|
||||
static WRes Semaphore_Release(CSemaphore *p, LONG releaseCount, LONG *previousCount)
|
||||
{ return BOOLToWRes(ReleaseSemaphore(*p, releaseCount, previousCount)); }
|
||||
WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 num)
|
||||
{ return Semaphore_Release(p, (LONG)num, NULL); }
|
||||
WRes Semaphore_Release1(CSemaphore *p) { return Semaphore_ReleaseN(p, 1); }
|
||||
|
||||
WRes CriticalSection_Init(CCriticalSection *p)
|
||||
{
|
||||
/* InitializeCriticalSection() can raise exception:
|
||||
Windows XP, 2003 : can raise a STATUS_NO_MEMORY exception
|
||||
Windows Vista+ : no exceptions */
|
||||
#ifdef _MSC_VER
|
||||
#ifdef __clang__
|
||||
#pragma GCC diagnostic ignored "-Wlanguage-extension-token"
|
||||
#endif
|
||||
__try
|
||||
#endif
|
||||
{
|
||||
InitializeCriticalSection(p);
|
||||
/* InitializeCriticalSectionAndSpinCount(p, 0); */
|
||||
}
|
||||
#ifdef _MSC_VER
|
||||
__except (EXCEPTION_EXECUTE_HANDLER) { return ERROR_NOT_ENOUGH_MEMORY; }
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
#else // _WIN32
|
||||
|
||||
// ---------- POSIX ----------
|
||||
|
||||
#ifndef __APPLE__
|
||||
#ifndef Z7_AFFINITY_DISABLE
|
||||
// _GNU_SOURCE can be required for pthread_setaffinity_np() / CPU_ZERO / CPU_SET
|
||||
// clang < 3.6 : unknown warning group '-Wreserved-id-macro'
|
||||
// clang 3.6 - 12.01 : gives warning "macro name is a reserved identifier"
|
||||
// clang >= 13 : do not give warning
|
||||
#if !defined(_GNU_SOURCE)
|
||||
#if defined(__clang__) && (__clang_major__ >= 4) && (__clang_major__ <= 12)
|
||||
#pragma GCC diagnostic ignored "-Wreserved-id-macro"
|
||||
#endif
|
||||
#define _GNU_SOURCE
|
||||
#endif // !defined(_GNU_SOURCE)
|
||||
#endif // Z7_AFFINITY_DISABLE
|
||||
#endif // __APPLE__
|
||||
|
||||
#include "Threads.h"
|
||||
|
||||
#include <errno.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#ifdef Z7_AFFINITY_SUPPORTED
|
||||
// #include <sched.h>
|
||||
#endif
|
||||
|
||||
|
||||
// #include <stdio.h>
|
||||
// #define PRF(p) p
|
||||
#define PRF(p)
|
||||
#define Print(s) PRF(printf("\n%s\n", s);)
|
||||
|
||||
WRes Thread_Create_With_CpuSet(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, const CCpuSet *cpuSet)
|
||||
{
|
||||
// new thread in Posix probably inherits affinity from parrent thread
|
||||
Print("Thread_Create_With_CpuSet")
|
||||
|
||||
pthread_attr_t attr;
|
||||
int ret;
|
||||
// int ret2;
|
||||
|
||||
p->_created = 0;
|
||||
|
||||
RINOK(pthread_attr_init(&attr))
|
||||
|
||||
ret = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
|
||||
|
||||
if (!ret)
|
||||
{
|
||||
if (cpuSet)
|
||||
{
|
||||
#ifdef Z7_AFFINITY_SUPPORTED
|
||||
|
||||
/*
|
||||
printf("\n affinity :");
|
||||
unsigned i;
|
||||
for (i = 0; i < sizeof(*cpuSet) && i < 8; i++)
|
||||
{
|
||||
Byte b = *((const Byte *)cpuSet + i);
|
||||
char temp[32];
|
||||
#define GET_HEX_CHAR(t) ((char)(((t < 10) ? ('0' + t) : ('A' + (t - 10)))))
|
||||
temp[0] = GET_HEX_CHAR((b & 0xF));
|
||||
temp[1] = GET_HEX_CHAR((b >> 4));
|
||||
// temp[0] = GET_HEX_CHAR((b >> 4)); // big-endian
|
||||
// temp[1] = GET_HEX_CHAR((b & 0xF)); // big-endian
|
||||
temp[2] = 0;
|
||||
printf("%s", temp);
|
||||
}
|
||||
printf("\n");
|
||||
*/
|
||||
|
||||
// ret2 =
|
||||
pthread_attr_setaffinity_np(&attr, sizeof(*cpuSet), cpuSet);
|
||||
// if (ret2) ret = ret2;
|
||||
#endif
|
||||
}
|
||||
|
||||
ret = pthread_create(&p->_tid, &attr, func, param);
|
||||
|
||||
if (!ret)
|
||||
{
|
||||
p->_created = 1;
|
||||
/*
|
||||
if (cpuSet)
|
||||
{
|
||||
// ret2 =
|
||||
pthread_setaffinity_np(p->_tid, sizeof(*cpuSet), cpuSet);
|
||||
// if (ret2) ret = ret2;
|
||||
}
|
||||
*/
|
||||
}
|
||||
}
|
||||
// ret2 =
|
||||
pthread_attr_destroy(&attr);
|
||||
// if (ret2 != 0) ret = ret2;
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
WRes Thread_Create(CThread *p, THREAD_FUNC_TYPE func, LPVOID param)
|
||||
{
|
||||
return Thread_Create_With_CpuSet(p, func, param, NULL);
|
||||
}
|
||||
|
||||
|
||||
WRes Thread_Create_With_Affinity(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, CAffinityMask affinity)
|
||||
{
|
||||
Print("Thread_Create_WithAffinity")
|
||||
CCpuSet cs;
|
||||
unsigned i;
|
||||
CpuSet_Zero(&cs);
|
||||
for (i = 0; i < sizeof(affinity) * 8; i++)
|
||||
{
|
||||
if (affinity == 0)
|
||||
break;
|
||||
if (affinity & 1)
|
||||
{
|
||||
CpuSet_Set(&cs, i);
|
||||
}
|
||||
affinity >>= 1;
|
||||
}
|
||||
return Thread_Create_With_CpuSet(p, func, param, &cs);
|
||||
}
|
||||
|
||||
|
||||
WRes Thread_Close(CThread *p)
|
||||
{
|
||||
// Print("Thread_Close")
|
||||
int ret;
|
||||
if (!p->_created)
|
||||
return 0;
|
||||
|
||||
ret = pthread_detach(p->_tid);
|
||||
p->_tid = 0;
|
||||
p->_created = 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
WRes Thread_Wait_Close(CThread *p)
|
||||
{
|
||||
// Print("Thread_Wait_Close")
|
||||
void *thread_return;
|
||||
int ret;
|
||||
if (!p->_created)
|
||||
return EINVAL;
|
||||
|
||||
ret = pthread_join(p->_tid, &thread_return);
|
||||
// probably we can't use that (_tid) after pthread_join(), so we close thread here
|
||||
p->_created = 0;
|
||||
p->_tid = 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
|
||||
static WRes Event_Create(CEvent *p, int manualReset, int signaled)
|
||||
{
|
||||
RINOK(pthread_mutex_init(&p->_mutex, NULL))
|
||||
RINOK(pthread_cond_init(&p->_cond, NULL))
|
||||
p->_manual_reset = manualReset;
|
||||
p->_state = (signaled ? True : False);
|
||||
p->_created = 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
WRes ManualResetEvent_Create(CManualResetEvent *p, int signaled)
|
||||
{ return Event_Create(p, True, signaled); }
|
||||
WRes ManualResetEvent_CreateNotSignaled(CManualResetEvent *p)
|
||||
{ return ManualResetEvent_Create(p, 0); }
|
||||
WRes AutoResetEvent_Create(CAutoResetEvent *p, int signaled)
|
||||
{ return Event_Create(p, False, signaled); }
|
||||
WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *p)
|
||||
{ return AutoResetEvent_Create(p, 0); }
|
||||
|
||||
|
||||
WRes Event_Set(CEvent *p)
|
||||
{
|
||||
RINOK(pthread_mutex_lock(&p->_mutex))
|
||||
p->_state = True;
|
||||
int res1 = pthread_cond_broadcast(&p->_cond);
|
||||
int res2 = pthread_mutex_unlock(&p->_mutex);
|
||||
return (res2 ? res2 : res1);
|
||||
}
|
||||
|
||||
WRes Event_Reset(CEvent *p)
|
||||
{
|
||||
RINOK(pthread_mutex_lock(&p->_mutex))
|
||||
p->_state = False;
|
||||
return pthread_mutex_unlock(&p->_mutex);
|
||||
}
|
||||
|
||||
WRes Event_Wait(CEvent *p)
|
||||
{
|
||||
RINOK(pthread_mutex_lock(&p->_mutex))
|
||||
while (p->_state == False)
|
||||
{
|
||||
// ETIMEDOUT
|
||||
// ret =
|
||||
pthread_cond_wait(&p->_cond, &p->_mutex);
|
||||
// if (ret != 0) break;
|
||||
}
|
||||
if (p->_manual_reset == False)
|
||||
{
|
||||
p->_state = False;
|
||||
}
|
||||
return pthread_mutex_unlock(&p->_mutex);
|
||||
}
|
||||
|
||||
WRes Event_Close(CEvent *p)
|
||||
{
|
||||
if (!p->_created)
|
||||
return 0;
|
||||
p->_created = 0;
|
||||
{
|
||||
int res1 = pthread_mutex_destroy(&p->_mutex);
|
||||
int res2 = pthread_cond_destroy(&p->_cond);
|
||||
return (res1 ? res1 : res2);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount)
|
||||
{
|
||||
if (initCount > maxCount || maxCount < 1)
|
||||
return EINVAL;
|
||||
RINOK(pthread_mutex_init(&p->_mutex, NULL))
|
||||
RINOK(pthread_cond_init(&p->_cond, NULL))
|
||||
p->_count = initCount;
|
||||
p->_maxCount = maxCount;
|
||||
p->_created = 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
WRes Semaphore_OptCreateInit(CSemaphore *p, UInt32 initCount, UInt32 maxCount)
|
||||
{
|
||||
if (Semaphore_IsCreated(p))
|
||||
{
|
||||
/*
|
||||
WRes wres = Semaphore_Close(p);
|
||||
if (wres != 0)
|
||||
return wres;
|
||||
*/
|
||||
if (initCount > maxCount || maxCount < 1)
|
||||
return EINVAL;
|
||||
// return EINVAL; // for debug
|
||||
p->_count = initCount;
|
||||
p->_maxCount = maxCount;
|
||||
return 0;
|
||||
}
|
||||
return Semaphore_Create(p, initCount, maxCount);
|
||||
}
|
||||
|
||||
|
||||
WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 releaseCount)
|
||||
{
|
||||
UInt32 newCount;
|
||||
int ret;
|
||||
|
||||
if (releaseCount < 1)
|
||||
return EINVAL;
|
||||
|
||||
RINOK(pthread_mutex_lock(&p->_mutex))
|
||||
|
||||
newCount = p->_count + releaseCount;
|
||||
if (newCount > p->_maxCount)
|
||||
ret = ERROR_TOO_MANY_POSTS; // EINVAL;
|
||||
else
|
||||
{
|
||||
p->_count = newCount;
|
||||
ret = pthread_cond_broadcast(&p->_cond);
|
||||
}
|
||||
RINOK(pthread_mutex_unlock(&p->_mutex))
|
||||
return ret;
|
||||
}
|
||||
|
||||
WRes Semaphore_Wait(CSemaphore *p)
|
||||
{
|
||||
RINOK(pthread_mutex_lock(&p->_mutex))
|
||||
while (p->_count < 1)
|
||||
{
|
||||
pthread_cond_wait(&p->_cond, &p->_mutex);
|
||||
}
|
||||
p->_count--;
|
||||
return pthread_mutex_unlock(&p->_mutex);
|
||||
}
|
||||
|
||||
WRes Semaphore_Close(CSemaphore *p)
|
||||
{
|
||||
if (!p->_created)
|
||||
return 0;
|
||||
p->_created = 0;
|
||||
{
|
||||
int res1 = pthread_mutex_destroy(&p->_mutex);
|
||||
int res2 = pthread_cond_destroy(&p->_cond);
|
||||
return (res1 ? res1 : res2);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
WRes CriticalSection_Init(CCriticalSection *p)
|
||||
{
|
||||
// Print("CriticalSection_Init")
|
||||
if (!p)
|
||||
return EINTR;
|
||||
return pthread_mutex_init(&p->_mutex, NULL);
|
||||
}
|
||||
|
||||
void CriticalSection_Enter(CCriticalSection *p)
|
||||
{
|
||||
// Print("CriticalSection_Enter")
|
||||
if (p)
|
||||
{
|
||||
// int ret =
|
||||
pthread_mutex_lock(&p->_mutex);
|
||||
}
|
||||
}
|
||||
|
||||
void CriticalSection_Leave(CCriticalSection *p)
|
||||
{
|
||||
// Print("CriticalSection_Leave")
|
||||
if (p)
|
||||
{
|
||||
// int ret =
|
||||
pthread_mutex_unlock(&p->_mutex);
|
||||
}
|
||||
}
|
||||
|
||||
void CriticalSection_Delete(CCriticalSection *p)
|
||||
{
|
||||
// Print("CriticalSection_Delete")
|
||||
if (p)
|
||||
{
|
||||
// int ret =
|
||||
pthread_mutex_destroy(&p->_mutex);
|
||||
}
|
||||
}
|
||||
|
||||
LONG InterlockedIncrement(LONG volatile *addend)
|
||||
{
|
||||
// Print("InterlockedIncrement")
|
||||
#ifdef USE_HACK_UNSAFE_ATOMIC
|
||||
LONG val = *addend + 1;
|
||||
*addend = val;
|
||||
return val;
|
||||
#else
|
||||
|
||||
#if defined(__clang__) && (__clang_major__ >= 8)
|
||||
#pragma GCC diagnostic ignored "-Watomic-implicit-seq-cst"
|
||||
#endif
|
||||
return __sync_add_and_fetch(addend, 1);
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif // _WIN32
|
||||
|
||||
WRes AutoResetEvent_OptCreate_And_Reset(CAutoResetEvent *p)
|
||||
{
|
||||
if (Event_IsCreated(p))
|
||||
return Event_Reset(p);
|
||||
return AutoResetEvent_CreateNotSignaled(p);
|
||||
}
|
||||
|
||||
#undef PRF
|
||||
#undef Print
|
||||
240
extern/lzma/Threads.h
vendored
Normal file
240
extern/lzma/Threads.h
vendored
Normal file
@@ -0,0 +1,240 @@
|
||||
/* Threads.h -- multithreading library
|
||||
2023-04-02 : Igor Pavlov : Public domain */
|
||||
|
||||
#ifndef ZIP7_INC_THREADS_H
|
||||
#define ZIP7_INC_THREADS_H
|
||||
|
||||
#ifdef _WIN32
|
||||
#include "7zWindows.h"
|
||||
|
||||
#else
|
||||
|
||||
#if defined(__linux__)
|
||||
#if !defined(__APPLE__) && !defined(_AIX) && !defined(__ANDROID__)
|
||||
#ifndef Z7_AFFINITY_DISABLE
|
||||
#define Z7_AFFINITY_SUPPORTED
|
||||
// #pragma message(" ==== Z7_AFFINITY_SUPPORTED")
|
||||
// #define _GNU_SOURCE
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#include <pthread.h>
|
||||
|
||||
#endif
|
||||
|
||||
#include "7zTypes.h"
|
||||
|
||||
EXTERN_C_BEGIN
|
||||
|
||||
#ifdef _WIN32
|
||||
|
||||
WRes HandlePtr_Close(HANDLE *h);
|
||||
WRes Handle_WaitObject(HANDLE h);
|
||||
|
||||
typedef HANDLE CThread;
|
||||
|
||||
#define Thread_CONSTRUCT(p) { *(p) = NULL; }
|
||||
#define Thread_WasCreated(p) (*(p) != NULL)
|
||||
#define Thread_Close(p) HandlePtr_Close(p)
|
||||
// #define Thread_Wait(p) Handle_WaitObject(*(p))
|
||||
|
||||
#ifdef UNDER_CE
|
||||
// if (USE_THREADS_CreateThread is defined), we use _beginthreadex()
|
||||
// if (USE_THREADS_CreateThread is not definned), we use CreateThread()
|
||||
#define USE_THREADS_CreateThread
|
||||
#endif
|
||||
|
||||
typedef
|
||||
#ifdef USE_THREADS_CreateThread
|
||||
DWORD
|
||||
#else
|
||||
unsigned
|
||||
#endif
|
||||
THREAD_FUNC_RET_TYPE;
|
||||
|
||||
#define THREAD_FUNC_RET_ZERO 0
|
||||
|
||||
typedef DWORD_PTR CAffinityMask;
|
||||
typedef DWORD_PTR CCpuSet;
|
||||
|
||||
#define CpuSet_Zero(p) *(p) = (0)
|
||||
#define CpuSet_Set(p, cpu) *(p) |= ((DWORD_PTR)1 << (cpu))
|
||||
|
||||
#else // _WIN32
|
||||
|
||||
typedef struct
|
||||
{
|
||||
pthread_t _tid;
|
||||
int _created;
|
||||
} CThread;
|
||||
|
||||
#define Thread_CONSTRUCT(p) { (p)->_tid = 0; (p)->_created = 0; }
|
||||
#define Thread_WasCreated(p) ((p)->_created != 0)
|
||||
WRes Thread_Close(CThread *p);
|
||||
// #define Thread_Wait Thread_Wait_Close
|
||||
|
||||
typedef void * THREAD_FUNC_RET_TYPE;
|
||||
#define THREAD_FUNC_RET_ZERO NULL
|
||||
|
||||
|
||||
typedef UInt64 CAffinityMask;
|
||||
|
||||
#ifdef Z7_AFFINITY_SUPPORTED
|
||||
|
||||
typedef cpu_set_t CCpuSet;
|
||||
#define CpuSet_Zero(p) CPU_ZERO(p)
|
||||
#define CpuSet_Set(p, cpu) CPU_SET(cpu, p)
|
||||
#define CpuSet_IsSet(p, cpu) CPU_ISSET(cpu, p)
|
||||
|
||||
#else
|
||||
|
||||
typedef UInt64 CCpuSet;
|
||||
#define CpuSet_Zero(p) *(p) = (0)
|
||||
#define CpuSet_Set(p, cpu) *(p) |= ((UInt64)1 << (cpu))
|
||||
#define CpuSet_IsSet(p, cpu) ((*(p) & ((UInt64)1 << (cpu))) != 0)
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#endif // _WIN32
|
||||
|
||||
|
||||
#define THREAD_FUNC_CALL_TYPE Z7_STDCALL
|
||||
|
||||
#if defined(_WIN32) && defined(__GNUC__)
|
||||
/* GCC compiler for x86 32-bit uses the rule:
|
||||
the stack is 16-byte aligned before CALL instruction for function calling.
|
||||
But only root function main() contains instructions that
|
||||
set 16-byte alignment for stack pointer. And another functions
|
||||
just keep alignment, if it was set in some parent function.
|
||||
|
||||
The problem:
|
||||
if we create new thread in MinGW (GCC) 32-bit x86 via _beginthreadex() or CreateThread(),
|
||||
the root function of thread doesn't set 16-byte alignment.
|
||||
And stack frames in all child functions also will be unaligned in that case.
|
||||
|
||||
Here we set (force_align_arg_pointer) attribute for root function of new thread.
|
||||
Do we need (force_align_arg_pointer) also for another systems? */
|
||||
|
||||
#define THREAD_FUNC_ATTRIB_ALIGN_ARG __attribute__((force_align_arg_pointer))
|
||||
// #define THREAD_FUNC_ATTRIB_ALIGN_ARG // for debug : bad alignment in SSE functions
|
||||
#else
|
||||
#define THREAD_FUNC_ATTRIB_ALIGN_ARG
|
||||
#endif
|
||||
|
||||
#define THREAD_FUNC_DECL THREAD_FUNC_ATTRIB_ALIGN_ARG THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE
|
||||
|
||||
typedef THREAD_FUNC_RET_TYPE (THREAD_FUNC_CALL_TYPE * THREAD_FUNC_TYPE)(void *);
|
||||
WRes Thread_Create(CThread *p, THREAD_FUNC_TYPE func, LPVOID param);
|
||||
WRes Thread_Create_With_Affinity(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, CAffinityMask affinity);
|
||||
WRes Thread_Wait_Close(CThread *p);
|
||||
|
||||
#ifdef _WIN32
|
||||
#define Thread_Create_With_CpuSet(p, func, param, cs) \
|
||||
Thread_Create_With_Affinity(p, func, param, *cs)
|
||||
#else
|
||||
WRes Thread_Create_With_CpuSet(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, const CCpuSet *cpuSet);
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef _WIN32
|
||||
|
||||
typedef HANDLE CEvent;
|
||||
typedef CEvent CAutoResetEvent;
|
||||
typedef CEvent CManualResetEvent;
|
||||
#define Event_Construct(p) *(p) = NULL
|
||||
#define Event_IsCreated(p) (*(p) != NULL)
|
||||
#define Event_Close(p) HandlePtr_Close(p)
|
||||
#define Event_Wait(p) Handle_WaitObject(*(p))
|
||||
WRes Event_Set(CEvent *p);
|
||||
WRes Event_Reset(CEvent *p);
|
||||
WRes ManualResetEvent_Create(CManualResetEvent *p, int signaled);
|
||||
WRes ManualResetEvent_CreateNotSignaled(CManualResetEvent *p);
|
||||
WRes AutoResetEvent_Create(CAutoResetEvent *p, int signaled);
|
||||
WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *p);
|
||||
|
||||
typedef HANDLE CSemaphore;
|
||||
#define Semaphore_Construct(p) *(p) = NULL
|
||||
#define Semaphore_IsCreated(p) (*(p) != NULL)
|
||||
#define Semaphore_Close(p) HandlePtr_Close(p)
|
||||
#define Semaphore_Wait(p) Handle_WaitObject(*(p))
|
||||
WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount);
|
||||
WRes Semaphore_OptCreateInit(CSemaphore *p, UInt32 initCount, UInt32 maxCount);
|
||||
WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 num);
|
||||
WRes Semaphore_Release1(CSemaphore *p);
|
||||
|
||||
typedef CRITICAL_SECTION CCriticalSection;
|
||||
WRes CriticalSection_Init(CCriticalSection *p);
|
||||
#define CriticalSection_Delete(p) DeleteCriticalSection(p)
|
||||
#define CriticalSection_Enter(p) EnterCriticalSection(p)
|
||||
#define CriticalSection_Leave(p) LeaveCriticalSection(p)
|
||||
|
||||
|
||||
#else // _WIN32
|
||||
|
||||
typedef struct _CEvent
|
||||
{
|
||||
int _created;
|
||||
int _manual_reset;
|
||||
int _state;
|
||||
pthread_mutex_t _mutex;
|
||||
pthread_cond_t _cond;
|
||||
} CEvent;
|
||||
|
||||
typedef CEvent CAutoResetEvent;
|
||||
typedef CEvent CManualResetEvent;
|
||||
|
||||
#define Event_Construct(p) (p)->_created = 0
|
||||
#define Event_IsCreated(p) ((p)->_created)
|
||||
|
||||
WRes ManualResetEvent_Create(CManualResetEvent *p, int signaled);
|
||||
WRes ManualResetEvent_CreateNotSignaled(CManualResetEvent *p);
|
||||
WRes AutoResetEvent_Create(CAutoResetEvent *p, int signaled);
|
||||
WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *p);
|
||||
|
||||
WRes Event_Set(CEvent *p);
|
||||
WRes Event_Reset(CEvent *p);
|
||||
WRes Event_Wait(CEvent *p);
|
||||
WRes Event_Close(CEvent *p);
|
||||
|
||||
|
||||
typedef struct _CSemaphore
|
||||
{
|
||||
int _created;
|
||||
UInt32 _count;
|
||||
UInt32 _maxCount;
|
||||
pthread_mutex_t _mutex;
|
||||
pthread_cond_t _cond;
|
||||
} CSemaphore;
|
||||
|
||||
#define Semaphore_Construct(p) (p)->_created = 0
|
||||
#define Semaphore_IsCreated(p) ((p)->_created)
|
||||
|
||||
WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount);
|
||||
WRes Semaphore_OptCreateInit(CSemaphore *p, UInt32 initCount, UInt32 maxCount);
|
||||
WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 num);
|
||||
#define Semaphore_Release1(p) Semaphore_ReleaseN(p, 1)
|
||||
WRes Semaphore_Wait(CSemaphore *p);
|
||||
WRes Semaphore_Close(CSemaphore *p);
|
||||
|
||||
|
||||
typedef struct _CCriticalSection
|
||||
{
|
||||
pthread_mutex_t _mutex;
|
||||
} CCriticalSection;
|
||||
|
||||
WRes CriticalSection_Init(CCriticalSection *p);
|
||||
void CriticalSection_Delete(CCriticalSection *cs);
|
||||
void CriticalSection_Enter(CCriticalSection *cs);
|
||||
void CriticalSection_Leave(CCriticalSection *cs);
|
||||
|
||||
LONG InterlockedIncrement(LONG volatile *addend);
|
||||
|
||||
#endif // _WIN32
|
||||
|
||||
WRes AutoResetEvent_OptCreate_And_Reset(CAutoResetEvent *p);
|
||||
|
||||
EXTERN_C_END
|
||||
|
||||
#endif
|
||||
208
extern/lzma/Types.h
vendored
208
extern/lzma/Types.h
vendored
@@ -1,208 +0,0 @@
|
||||
/* Types.h -- Basic types
|
||||
2008-11-23 : Igor Pavlov : Public domain */
|
||||
|
||||
#ifndef __7Z_TYPES_H
|
||||
#define __7Z_TYPES_H
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <windows.h>
|
||||
#endif
|
||||
|
||||
#define SZ_OK 0
|
||||
|
||||
#define SZ_ERROR_DATA 1
|
||||
#define SZ_ERROR_MEM 2
|
||||
#define SZ_ERROR_CRC 3
|
||||
#define SZ_ERROR_UNSUPPORTED 4
|
||||
#define SZ_ERROR_PARAM 5
|
||||
#define SZ_ERROR_INPUT_EOF 6
|
||||
#define SZ_ERROR_OUTPUT_EOF 7
|
||||
#define SZ_ERROR_READ 8
|
||||
#define SZ_ERROR_WRITE 9
|
||||
#define SZ_ERROR_PROGRESS 10
|
||||
#define SZ_ERROR_FAIL 11
|
||||
#define SZ_ERROR_THREAD 12
|
||||
|
||||
#define SZ_ERROR_ARCHIVE 16
|
||||
#define SZ_ERROR_NO_ARCHIVE 17
|
||||
|
||||
typedef int SRes;
|
||||
|
||||
#ifdef _WIN32
|
||||
typedef DWORD WRes;
|
||||
#else
|
||||
typedef int WRes;
|
||||
#endif
|
||||
|
||||
#ifndef RINOK
|
||||
#define RINOK(x) { int __result__ = (x); if (__result__ != 0) return __result__; }
|
||||
#endif
|
||||
|
||||
typedef unsigned char Byte;
|
||||
typedef short Int16;
|
||||
typedef unsigned short UInt16;
|
||||
|
||||
#ifdef _LZMA_UINT32_IS_ULONG
|
||||
typedef long Int32;
|
||||
typedef unsigned long UInt32;
|
||||
#else
|
||||
typedef int Int32;
|
||||
typedef unsigned int UInt32;
|
||||
#endif
|
||||
|
||||
#ifdef _SZ_NO_INT_64
|
||||
|
||||
/* define _SZ_NO_INT_64, if your compiler doesn't support 64-bit integers.
|
||||
NOTES: Some code will work incorrectly in that case! */
|
||||
|
||||
typedef long Int64;
|
||||
typedef unsigned long UInt64;
|
||||
|
||||
#else
|
||||
|
||||
#if defined(_MSC_VER) || defined(__BORLANDC__)
|
||||
typedef __int64 Int64;
|
||||
typedef unsigned __int64 UInt64;
|
||||
#else
|
||||
typedef long long int Int64;
|
||||
typedef unsigned long long int UInt64;
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef _LZMA_NO_SYSTEM_SIZE_T
|
||||
typedef UInt32 SizeT;
|
||||
#else
|
||||
typedef size_t SizeT;
|
||||
#endif
|
||||
|
||||
typedef int Bool;
|
||||
#define True 1
|
||||
#define False 0
|
||||
|
||||
|
||||
#ifdef _MSC_VER
|
||||
|
||||
#if _MSC_VER >= 1300
|
||||
#define MY_NO_INLINE __declspec(noinline)
|
||||
#else
|
||||
#define MY_NO_INLINE
|
||||
#endif
|
||||
|
||||
#define MY_CDECL __cdecl
|
||||
#define MY_STD_CALL __stdcall
|
||||
#define MY_FAST_CALL MY_NO_INLINE __fastcall
|
||||
|
||||
#else
|
||||
|
||||
#define MY_CDECL
|
||||
#define MY_STD_CALL
|
||||
#define MY_FAST_CALL
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/* The following interfaces use first parameter as pointer to structure */
|
||||
|
||||
typedef struct
|
||||
{
|
||||
SRes (*Read)(void *p, void *buf, size_t *size);
|
||||
/* if (input(*size) != 0 && output(*size) == 0) means end_of_stream.
|
||||
(output(*size) < input(*size)) is allowed */
|
||||
} ISeqInStream;
|
||||
|
||||
/* it can return SZ_ERROR_INPUT_EOF */
|
||||
SRes SeqInStream_Read(ISeqInStream *stream, void *buf, size_t size);
|
||||
SRes SeqInStream_Read2(ISeqInStream *stream, void *buf, size_t size, SRes errorType);
|
||||
SRes SeqInStream_ReadByte(ISeqInStream *stream, Byte *buf);
|
||||
|
||||
typedef struct
|
||||
{
|
||||
size_t (*Write)(void *p, const void *buf, size_t size);
|
||||
/* Returns: result - the number of actually written bytes.
|
||||
(result < size) means error */
|
||||
} ISeqOutStream;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
SZ_SEEK_SET = 0,
|
||||
SZ_SEEK_CUR = 1,
|
||||
SZ_SEEK_END = 2
|
||||
} ESzSeek;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
SRes (*Read)(void *p, void *buf, size_t *size); /* same as ISeqInStream::Read */
|
||||
SRes (*Seek)(void *p, Int64 *pos, ESzSeek origin);
|
||||
} ISeekInStream;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
SRes (*Look)(void *p, void **buf, size_t *size);
|
||||
/* if (input(*size) != 0 && output(*size) == 0) means end_of_stream.
|
||||
(output(*size) > input(*size)) is not allowed
|
||||
(output(*size) < input(*size)) is allowed */
|
||||
SRes (*Skip)(void *p, size_t offset);
|
||||
/* offset must be <= output(*size) of Look */
|
||||
|
||||
SRes (*Read)(void *p, void *buf, size_t *size);
|
||||
/* reads directly (without buffer). It's same as ISeqInStream::Read */
|
||||
SRes (*Seek)(void *p, Int64 *pos, ESzSeek origin);
|
||||
} ILookInStream;
|
||||
|
||||
SRes LookInStream_LookRead(ILookInStream *stream, void *buf, size_t *size);
|
||||
SRes LookInStream_SeekTo(ILookInStream *stream, UInt64 offset);
|
||||
|
||||
/* reads via ILookInStream::Read */
|
||||
SRes LookInStream_Read2(ILookInStream *stream, void *buf, size_t size, SRes errorType);
|
||||
SRes LookInStream_Read(ILookInStream *stream, void *buf, size_t size);
|
||||
|
||||
#define LookToRead_BUF_SIZE (1 << 14)
|
||||
|
||||
typedef struct
|
||||
{
|
||||
ILookInStream s;
|
||||
ISeekInStream *realStream;
|
||||
size_t pos;
|
||||
size_t size;
|
||||
Byte buf[LookToRead_BUF_SIZE];
|
||||
} CLookToRead;
|
||||
|
||||
void LookToRead_CreateVTable(CLookToRead *p, int lookahead);
|
||||
void LookToRead_Init(CLookToRead *p);
|
||||
|
||||
typedef struct
|
||||
{
|
||||
ISeqInStream s;
|
||||
ILookInStream *realStream;
|
||||
} CSecToLook;
|
||||
|
||||
void SecToLook_CreateVTable(CSecToLook *p);
|
||||
|
||||
typedef struct
|
||||
{
|
||||
ISeqInStream s;
|
||||
ILookInStream *realStream;
|
||||
} CSecToRead;
|
||||
|
||||
void SecToRead_CreateVTable(CSecToRead *p);
|
||||
|
||||
typedef struct
|
||||
{
|
||||
SRes (*Progress)(void *p, UInt64 inSize, UInt64 outSize);
|
||||
/* Returns: result. (result != SZ_OK) means break.
|
||||
Value (UInt64)(Int64)-1 for size means unknown value. */
|
||||
} ICompressProgress;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
void *(*Alloc)(void *p, size_t size);
|
||||
void (*Free)(void *p, void *address); /* address can be 0 */
|
||||
} ISzAlloc;
|
||||
|
||||
#define IAlloc_Alloc(p, size) (p)->Alloc((p), size)
|
||||
#define IAlloc_Free(p, a) (p)->Free((p), a)
|
||||
|
||||
#endif
|
||||
236
extern/lzma/history.txt
vendored
236
extern/lzma/history.txt
vendored
@@ -1,236 +0,0 @@
|
||||
HISTORY of the LZMA SDK
|
||||
-----------------------
|
||||
|
||||
4.65 2009-02-03
|
||||
-------------------------
|
||||
- Some minor fixes
|
||||
|
||||
|
||||
4.63 2008-12-31
|
||||
-------------------------
|
||||
- Some minor fixes
|
||||
|
||||
|
||||
4.61 beta 2008-11-23
|
||||
-------------------------
|
||||
- The bug in ANSI-C LZMA Decoder was fixed:
|
||||
If encoded stream was corrupted, decoder could access memory
|
||||
outside of allocated range.
|
||||
- Some changes in ANSI-C 7z Decoder interfaces.
|
||||
- LZMA SDK is placed in the public domain.
|
||||
|
||||
|
||||
4.60 beta 2008-08-19
|
||||
-------------------------
|
||||
- Some minor fixes.
|
||||
|
||||
|
||||
4.59 beta 2008-08-13
|
||||
-------------------------
|
||||
- The bug was fixed:
|
||||
LZMA Encoder in fast compression mode could access memory outside of
|
||||
allocated range in some rare cases.
|
||||
|
||||
|
||||
4.58 beta 2008-05-05
|
||||
-------------------------
|
||||
- ANSI-C LZMA Decoder was rewritten for speed optimizations.
|
||||
- ANSI-C LZMA Encoder was included to LZMA SDK.
|
||||
- C++ LZMA code now is just wrapper over ANSI-C code.
|
||||
|
||||
|
||||
4.57 2007-12-12
|
||||
-------------------------
|
||||
- Speed optimizations in <20>++ LZMA Decoder.
|
||||
- Small changes for more compatibility with some C/C++ compilers.
|
||||
|
||||
|
||||
4.49 beta 2007-07-05
|
||||
-------------------------
|
||||
- .7z ANSI-C Decoder:
|
||||
- now it supports BCJ and BCJ2 filters
|
||||
- now it supports files larger than 4 GB.
|
||||
- now it supports "Last Write Time" field for files.
|
||||
- C++ code for .7z archives compressing/decompressing from 7-zip
|
||||
was included to LZMA SDK.
|
||||
|
||||
|
||||
4.43 2006-06-04
|
||||
-------------------------
|
||||
- Small changes for more compatibility with some C/C++ compilers.
|
||||
|
||||
|
||||
4.42 2006-05-15
|
||||
-------------------------
|
||||
- Small changes in .h files in ANSI-C version.
|
||||
|
||||
|
||||
4.39 beta 2006-04-14
|
||||
-------------------------
|
||||
- The bug in versions 4.33b:4.38b was fixed:
|
||||
C++ version of LZMA encoder could not correctly compress
|
||||
files larger than 2 GB with HC4 match finder (-mfhc4).
|
||||
|
||||
|
||||
4.37 beta 2005-04-06
|
||||
-------------------------
|
||||
- Fixes in C++ code: code could no be compiled if _NO_EXCEPTIONS was defined.
|
||||
|
||||
|
||||
4.35 beta 2005-03-02
|
||||
-------------------------
|
||||
- The bug was fixed in C++ version of LZMA Decoder:
|
||||
If encoded stream was corrupted, decoder could access memory
|
||||
outside of allocated range.
|
||||
|
||||
|
||||
4.34 beta 2006-02-27
|
||||
-------------------------
|
||||
- Compressing speed and memory requirements for compressing were increased
|
||||
- LZMA now can use only these match finders: HC4, BT2, BT3, BT4
|
||||
|
||||
|
||||
4.32 2005-12-09
|
||||
-------------------------
|
||||
- Java version of LZMA SDK was included
|
||||
|
||||
|
||||
4.30 2005-11-20
|
||||
-------------------------
|
||||
- Compression ratio was improved in -a2 mode
|
||||
- Speed optimizations for compressing in -a2 mode
|
||||
- -fb switch now supports values up to 273
|
||||
- The bug in 7z_C (7zIn.c) was fixed:
|
||||
It used Alloc/Free functions from different memory pools.
|
||||
So if program used two memory pools, it worked incorrectly.
|
||||
- 7z_C: .7z format supporting was improved
|
||||
- LZMA# SDK (C#.NET version) was included
|
||||
|
||||
|
||||
4.27 (Updated) 2005-09-21
|
||||
-------------------------
|
||||
- Some GUIDs/interfaces in C++ were changed.
|
||||
IStream.h:
|
||||
ISequentialInStream::Read now works as old ReadPart
|
||||
ISequentialOutStream::Write now works as old WritePart
|
||||
|
||||
|
||||
4.27 2005-08-07
|
||||
-------------------------
|
||||
- The bug in LzmaDecodeSize.c was fixed:
|
||||
if _LZMA_IN_CB and _LZMA_OUT_READ were defined,
|
||||
decompressing worked incorrectly.
|
||||
|
||||
|
||||
4.26 2005-08-05
|
||||
-------------------------
|
||||
- Fixes in 7z_C code and LzmaTest.c:
|
||||
previous versions could work incorrectly,
|
||||
if malloc(0) returns 0
|
||||
|
||||
|
||||
4.23 2005-06-29
|
||||
-------------------------
|
||||
- Small fixes in C++ code
|
||||
|
||||
|
||||
4.22 2005-06-10
|
||||
-------------------------
|
||||
- Small fixes
|
||||
|
||||
|
||||
4.21 2005-06-08
|
||||
-------------------------
|
||||
- Interfaces for ANSI-C LZMA Decoder (LzmaDecode.c) were changed
|
||||
- New additional version of ANSI-C LZMA Decoder with zlib-like interface:
|
||||
- LzmaStateDecode.h
|
||||
- LzmaStateDecode.c
|
||||
- LzmaStateTest.c
|
||||
- ANSI-C LZMA Decoder now can decompress files larger than 4 GB
|
||||
|
||||
|
||||
4.17 2005-04-18
|
||||
-------------------------
|
||||
- New example for RAM->RAM compressing/decompressing:
|
||||
LZMA + BCJ (filter for x86 code):
|
||||
- LzmaRam.h
|
||||
- LzmaRam.cpp
|
||||
- LzmaRamDecode.h
|
||||
- LzmaRamDecode.c
|
||||
- -f86 switch for lzma.exe
|
||||
|
||||
|
||||
4.16 2005-03-29
|
||||
-------------------------
|
||||
- The bug was fixed in LzmaDecode.c (ANSI-C LZMA Decoder):
|
||||
If _LZMA_OUT_READ was defined, and if encoded stream was corrupted,
|
||||
decoder could access memory outside of allocated range.
|
||||
- Speed optimization of ANSI-C LZMA Decoder (now it's about 20% faster).
|
||||
Old version of LZMA Decoder now is in file LzmaDecodeSize.c.
|
||||
LzmaDecodeSize.c can provide slightly smaller code than LzmaDecode.c
|
||||
- Small speed optimization in LZMA C++ code
|
||||
- filter for SPARC's code was added
|
||||
- Simplified version of .7z ANSI-C Decoder was included
|
||||
|
||||
|
||||
4.06 2004-09-05
|
||||
-------------------------
|
||||
- The bug in v4.05 was fixed:
|
||||
LZMA-Encoder didn't release output stream in some cases.
|
||||
|
||||
|
||||
4.05 2004-08-25
|
||||
-------------------------
|
||||
- Source code of filters for x86, IA-64, ARM, ARM-Thumb
|
||||
and PowerPC code was included to SDK
|
||||
- Some internal minor changes
|
||||
|
||||
|
||||
4.04 2004-07-28
|
||||
-------------------------
|
||||
- More compatibility with some C++ compilers
|
||||
|
||||
|
||||
4.03 2004-06-18
|
||||
-------------------------
|
||||
- "Benchmark" command was added. It measures compressing
|
||||
and decompressing speed and shows rating values.
|
||||
Also it checks hardware errors.
|
||||
|
||||
|
||||
4.02 2004-06-10
|
||||
-------------------------
|
||||
- C++ LZMA Encoder/Decoder code now is more portable
|
||||
and it can be compiled by GCC on Linux.
|
||||
|
||||
|
||||
4.01 2004-02-15
|
||||
-------------------------
|
||||
- Some detection of data corruption was enabled.
|
||||
LzmaDecode.c / RangeDecoderReadByte
|
||||
.....
|
||||
{
|
||||
rd->ExtraBytes = 1;
|
||||
return 0xFF;
|
||||
}
|
||||
|
||||
|
||||
4.00 2004-02-13
|
||||
-------------------------
|
||||
- Original version of LZMA SDK
|
||||
|
||||
|
||||
|
||||
HISTORY of the LZMA
|
||||
-------------------
|
||||
2001-2008: Improvements to LZMA compressing/decompressing code,
|
||||
keeping compatibility with original LZMA format
|
||||
1996-2001: Development of LZMA compression format
|
||||
|
||||
Some milestones:
|
||||
|
||||
2001-08-30: LZMA compression was added to 7-Zip
|
||||
1999-01-02: First version of 7-Zip was released
|
||||
|
||||
|
||||
End of document
|
||||
594
extern/lzma/lzma.txt
vendored
594
extern/lzma/lzma.txt
vendored
@@ -1,594 +0,0 @@
|
||||
LZMA SDK 4.65
|
||||
-------------
|
||||
|
||||
LZMA SDK provides the documentation, samples, header files, libraries,
|
||||
and tools you need to develop applications that use LZMA compression.
|
||||
|
||||
LZMA is default and general compression method of 7z format
|
||||
in 7-Zip compression program (www.7-zip.org). LZMA provides high
|
||||
compression ratio and very fast decompression.
|
||||
|
||||
LZMA is an improved version of famous LZ77 compression algorithm.
|
||||
It was improved in way of maximum increasing of compression ratio,
|
||||
keeping high decompression speed and low memory requirements for
|
||||
decompressing.
|
||||
|
||||
|
||||
|
||||
LICENSE
|
||||
-------
|
||||
|
||||
LZMA SDK is written and placed in the public domain by Igor Pavlov.
|
||||
|
||||
|
||||
LZMA SDK Contents
|
||||
-----------------
|
||||
|
||||
LZMA SDK includes:
|
||||
|
||||
- ANSI-C/C++/C#/Java source code for LZMA compressing and decompressing
|
||||
- Compiled file->file LZMA compressing/decompressing program for Windows system
|
||||
|
||||
|
||||
UNIX/Linux version
|
||||
------------------
|
||||
To compile C++ version of file->file LZMA encoding, go to directory
|
||||
C++/7zip/Compress/LZMA_Alone
|
||||
and call make to recompile it:
|
||||
make -f makefile.gcc clean all
|
||||
|
||||
In some UNIX/Linux versions you must compile LZMA with static libraries.
|
||||
To compile with static libraries, you can use
|
||||
LIB = -lm -static
|
||||
|
||||
|
||||
Files
|
||||
---------------------
|
||||
lzma.txt - LZMA SDK description (this file)
|
||||
7zFormat.txt - 7z Format description
|
||||
7zC.txt - 7z ANSI-C Decoder description
|
||||
methods.txt - Compression method IDs for .7z
|
||||
lzma.exe - Compiled file->file LZMA encoder/decoder for Windows
|
||||
history.txt - history of the LZMA SDK
|
||||
|
||||
|
||||
Source code structure
|
||||
---------------------
|
||||
|
||||
C/ - C files
|
||||
7zCrc*.* - CRC code
|
||||
Alloc.* - Memory allocation functions
|
||||
Bra*.* - Filters for x86, IA-64, ARM, ARM-Thumb, PowerPC and SPARC code
|
||||
LzFind.* - Match finder for LZ (LZMA) encoders
|
||||
LzFindMt.* - Match finder for LZ (LZMA) encoders for multithreading encoding
|
||||
LzHash.h - Additional file for LZ match finder
|
||||
LzmaDec.* - LZMA decoding
|
||||
LzmaEnc.* - LZMA encoding
|
||||
LzmaLib.* - LZMA Library for DLL calling
|
||||
Types.h - Basic types for another .c files
|
||||
Threads.* - The code for multithreading.
|
||||
|
||||
LzmaLib - LZMA Library (.DLL for Windows)
|
||||
|
||||
LzmaUtil - LZMA Utility (file->file LZMA encoder/decoder).
|
||||
|
||||
Archive - files related to archiving
|
||||
7z - 7z ANSI-C Decoder
|
||||
|
||||
CPP/ -- CPP files
|
||||
|
||||
Common - common files for C++ projects
|
||||
Windows - common files for Windows related code
|
||||
|
||||
7zip - files related to 7-Zip Project
|
||||
|
||||
Common - common files for 7-Zip
|
||||
|
||||
Compress - files related to compression/decompression
|
||||
|
||||
Copy - Copy coder
|
||||
RangeCoder - Range Coder (special code of compression/decompression)
|
||||
LZMA - LZMA compression/decompression on C++
|
||||
LZMA_Alone - file->file LZMA compression/decompression
|
||||
Branch - Filters for x86, IA-64, ARM, ARM-Thumb, PowerPC and SPARC code
|
||||
|
||||
Archive - files related to archiving
|
||||
|
||||
Common - common files for archive handling
|
||||
7z - 7z C++ Encoder/Decoder
|
||||
|
||||
Bundles - Modules that are bundles of other modules
|
||||
|
||||
Alone7z - 7zr.exe: Standalone version of 7z.exe that supports only 7z/LZMA/BCJ/BCJ2
|
||||
Format7zR - 7zr.dll: Reduced version of 7za.dll: extracting/compressing to 7z/LZMA/BCJ/BCJ2
|
||||
Format7zExtractR - 7zxr.dll: Reduced version of 7zxa.dll: extracting from 7z/LZMA/BCJ/BCJ2.
|
||||
|
||||
UI - User Interface files
|
||||
|
||||
Client7z - Test application for 7za.dll, 7zr.dll, 7zxr.dll
|
||||
Common - Common UI files
|
||||
Console - Code for console archiver
|
||||
|
||||
|
||||
|
||||
CS/ - C# files
|
||||
7zip
|
||||
Common - some common files for 7-Zip
|
||||
Compress - files related to compression/decompression
|
||||
LZ - files related to LZ (Lempel-Ziv) compression algorithm
|
||||
LZMA - LZMA compression/decompression
|
||||
LzmaAlone - file->file LZMA compression/decompression
|
||||
RangeCoder - Range Coder (special code of compression/decompression)
|
||||
|
||||
Java/ - Java files
|
||||
SevenZip
|
||||
Compression - files related to compression/decompression
|
||||
LZ - files related to LZ (Lempel-Ziv) compression algorithm
|
||||
LZMA - LZMA compression/decompression
|
||||
RangeCoder - Range Coder (special code of compression/decompression)
|
||||
|
||||
|
||||
C/C++ source code of LZMA SDK is part of 7-Zip project.
|
||||
7-Zip source code can be downloaded from 7-Zip's SourceForge page:
|
||||
|
||||
http://sourceforge.net/projects/sevenzip/
|
||||
|
||||
|
||||
|
||||
LZMA features
|
||||
-------------
|
||||
- Variable dictionary size (up to 1 GB)
|
||||
- Estimated compressing speed: about 2 MB/s on 2 GHz CPU
|
||||
- Estimated decompressing speed:
|
||||
- 20-30 MB/s on 2 GHz Core 2 or AMD Athlon 64
|
||||
- 1-2 MB/s on 200 MHz ARM, MIPS, PowerPC or other simple RISC
|
||||
- Small memory requirements for decompressing (16 KB + DictionarySize)
|
||||
- Small code size for decompressing: 5-8 KB
|
||||
|
||||
LZMA decoder uses only integer operations and can be
|
||||
implemented in any modern 32-bit CPU (or on 16-bit CPU with some conditions).
|
||||
|
||||
Some critical operations that affect the speed of LZMA decompression:
|
||||
1) 32*16 bit integer multiply
|
||||
2) Misspredicted branches (penalty mostly depends from pipeline length)
|
||||
3) 32-bit shift and arithmetic operations
|
||||
|
||||
The speed of LZMA decompressing mostly depends from CPU speed.
|
||||
Memory speed has no big meaning. But if your CPU has small data cache,
|
||||
overall weight of memory speed will slightly increase.
|
||||
|
||||
|
||||
How To Use
|
||||
----------
|
||||
|
||||
Using LZMA encoder/decoder executable
|
||||
--------------------------------------
|
||||
|
||||
Usage: LZMA <e|d> inputFile outputFile [<switches>...]
|
||||
|
||||
e: encode file
|
||||
|
||||
d: decode file
|
||||
|
||||
b: Benchmark. There are two tests: compressing and decompressing
|
||||
with LZMA method. Benchmark shows rating in MIPS (million
|
||||
instructions per second). Rating value is calculated from
|
||||
measured speed and it is normalized with Intel's Core 2 results.
|
||||
Also Benchmark checks possible hardware errors (RAM
|
||||
errors in most cases). Benchmark uses these settings:
|
||||
(-a1, -d21, -fb32, -mfbt4). You can change only -d parameter.
|
||||
Also you can change the number of iterations. Example for 30 iterations:
|
||||
LZMA b 30
|
||||
Default number of iterations is 10.
|
||||
|
||||
<Switches>
|
||||
|
||||
|
||||
-a{N}: set compression mode 0 = fast, 1 = normal
|
||||
default: 1 (normal)
|
||||
|
||||
d{N}: Sets Dictionary size - [0, 30], default: 23 (8MB)
|
||||
The maximum value for dictionary size is 1 GB = 2^30 bytes.
|
||||
Dictionary size is calculated as DictionarySize = 2^N bytes.
|
||||
For decompressing file compressed by LZMA method with dictionary
|
||||
size D = 2^N you need about D bytes of memory (RAM).
|
||||
|
||||
-fb{N}: set number of fast bytes - [5, 273], default: 128
|
||||
Usually big number gives a little bit better compression ratio
|
||||
and slower compression process.
|
||||
|
||||
-lc{N}: set number of literal context bits - [0, 8], default: 3
|
||||
Sometimes lc=4 gives gain for big files.
|
||||
|
||||
-lp{N}: set number of literal pos bits - [0, 4], default: 0
|
||||
lp switch is intended for periodical data when period is
|
||||
equal 2^N. For example, for 32-bit (4 bytes)
|
||||
periodical data you can use lp=2. Often it's better to set lc0,
|
||||
if you change lp switch.
|
||||
|
||||
-pb{N}: set number of pos bits - [0, 4], default: 2
|
||||
pb switch is intended for periodical data
|
||||
when period is equal 2^N.
|
||||
|
||||
-mf{MF_ID}: set Match Finder. Default: bt4.
|
||||
Algorithms from hc* group doesn't provide good compression
|
||||
ratio, but they often works pretty fast in combination with
|
||||
fast mode (-a0).
|
||||
|
||||
Memory requirements depend from dictionary size
|
||||
(parameter "d" in table below).
|
||||
|
||||
MF_ID Memory Description
|
||||
|
||||
bt2 d * 9.5 + 4MB Binary Tree with 2 bytes hashing.
|
||||
bt3 d * 11.5 + 4MB Binary Tree with 3 bytes hashing.
|
||||
bt4 d * 11.5 + 4MB Binary Tree with 4 bytes hashing.
|
||||
hc4 d * 7.5 + 4MB Hash Chain with 4 bytes hashing.
|
||||
|
||||
-eos: write End Of Stream marker. By default LZMA doesn't write
|
||||
eos marker, since LZMA decoder knows uncompressed size
|
||||
stored in .lzma file header.
|
||||
|
||||
-si: Read data from stdin (it will write End Of Stream marker).
|
||||
-so: Write data to stdout
|
||||
|
||||
|
||||
Examples:
|
||||
|
||||
1) LZMA e file.bin file.lzma -d16 -lc0
|
||||
|
||||
compresses file.bin to file.lzma with 64 KB dictionary (2^16=64K)
|
||||
and 0 literal context bits. -lc0 allows to reduce memory requirements
|
||||
for decompression.
|
||||
|
||||
|
||||
2) LZMA e file.bin file.lzma -lc0 -lp2
|
||||
|
||||
compresses file.bin to file.lzma with settings suitable
|
||||
for 32-bit periodical data (for example, ARM or MIPS code).
|
||||
|
||||
3) LZMA d file.lzma file.bin
|
||||
|
||||
decompresses file.lzma to file.bin.
|
||||
|
||||
|
||||
Compression ratio hints
|
||||
-----------------------
|
||||
|
||||
Recommendations
|
||||
---------------
|
||||
|
||||
To increase the compression ratio for LZMA compressing it's desirable
|
||||
to have aligned data (if it's possible) and also it's desirable to locate
|
||||
data in such order, where code is grouped in one place and data is
|
||||
grouped in other place (it's better than such mixing: code, data, code,
|
||||
data, ...).
|
||||
|
||||
|
||||
Filters
|
||||
-------
|
||||
You can increase the compression ratio for some data types, using
|
||||
special filters before compressing. For example, it's possible to
|
||||
increase the compression ratio on 5-10% for code for those CPU ISAs:
|
||||
x86, IA-64, ARM, ARM-Thumb, PowerPC, SPARC.
|
||||
|
||||
You can find C source code of such filters in C/Bra*.* files
|
||||
|
||||
You can check the compression ratio gain of these filters with such
|
||||
7-Zip commands (example for ARM code):
|
||||
No filter:
|
||||
7z a a1.7z a.bin -m0=lzma
|
||||
|
||||
With filter for little-endian ARM code:
|
||||
7z a a2.7z a.bin -m0=arm -m1=lzma
|
||||
|
||||
It works in such manner:
|
||||
Compressing = Filter_encoding + LZMA_encoding
|
||||
Decompressing = LZMA_decoding + Filter_decoding
|
||||
|
||||
Compressing and decompressing speed of such filters is very high,
|
||||
so it will not increase decompressing time too much.
|
||||
Moreover, it reduces decompression time for LZMA_decoding,
|
||||
since compression ratio with filtering is higher.
|
||||
|
||||
These filters convert CALL (calling procedure) instructions
|
||||
from relative offsets to absolute addresses, so such data becomes more
|
||||
compressible.
|
||||
|
||||
For some ISAs (for example, for MIPS) it's impossible to get gain from such filter.
|
||||
|
||||
|
||||
LZMA compressed file format
|
||||
---------------------------
|
||||
Offset Size Description
|
||||
0 1 Special LZMA properties (lc,lp, pb in encoded form)
|
||||
1 4 Dictionary size (little endian)
|
||||
5 8 Uncompressed size (little endian). -1 means unknown size
|
||||
13 Compressed data
|
||||
|
||||
|
||||
ANSI-C LZMA Decoder
|
||||
~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Please note that interfaces for ANSI-C code were changed in LZMA SDK 4.58.
|
||||
If you want to use old interfaces you can download previous version of LZMA SDK
|
||||
from sourceforge.net site.
|
||||
|
||||
To use ANSI-C LZMA Decoder you need the following files:
|
||||
1) LzmaDec.h + LzmaDec.c + Types.h
|
||||
LzmaUtil/LzmaUtil.c is example application that uses these files.
|
||||
|
||||
|
||||
Memory requirements for LZMA decoding
|
||||
-------------------------------------
|
||||
|
||||
Stack usage of LZMA decoding function for local variables is not
|
||||
larger than 200-400 bytes.
|
||||
|
||||
LZMA Decoder uses dictionary buffer and internal state structure.
|
||||
Internal state structure consumes
|
||||
state_size = (4 + (1.5 << (lc + lp))) KB
|
||||
by default (lc=3, lp=0), state_size = 16 KB.
|
||||
|
||||
|
||||
How To decompress data
|
||||
----------------------
|
||||
|
||||
LZMA Decoder (ANSI-C version) now supports 2 interfaces:
|
||||
1) Single-call Decompressing
|
||||
2) Multi-call State Decompressing (zlib-like interface)
|
||||
|
||||
You must use external allocator:
|
||||
Example:
|
||||
void *SzAlloc(void *p, size_t size) { p = p; return malloc(size); }
|
||||
void SzFree(void *p, void *address) { p = p; free(address); }
|
||||
ISzAlloc alloc = { SzAlloc, SzFree };
|
||||
|
||||
You can use p = p; operator to disable compiler warnings.
|
||||
|
||||
|
||||
Single-call Decompressing
|
||||
-------------------------
|
||||
When to use: RAM->RAM decompressing
|
||||
Compile files: LzmaDec.h + LzmaDec.c + Types.h
|
||||
Compile defines: no defines
|
||||
Memory Requirements:
|
||||
- Input buffer: compressed size
|
||||
- Output buffer: uncompressed size
|
||||
- LZMA Internal Structures: state_size (16 KB for default settings)
|
||||
|
||||
Interface:
|
||||
int LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
|
||||
const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode,
|
||||
ELzmaStatus *status, ISzAlloc *alloc);
|
||||
In:
|
||||
dest - output data
|
||||
destLen - output data size
|
||||
src - input data
|
||||
srcLen - input data size
|
||||
propData - LZMA properties (5 bytes)
|
||||
propSize - size of propData buffer (5 bytes)
|
||||
finishMode - It has meaning only if the decoding reaches output limit (*destLen).
|
||||
LZMA_FINISH_ANY - Decode just destLen bytes.
|
||||
LZMA_FINISH_END - Stream must be finished after (*destLen).
|
||||
You can use LZMA_FINISH_END, when you know that
|
||||
current output buffer covers last bytes of stream.
|
||||
alloc - Memory allocator.
|
||||
|
||||
Out:
|
||||
destLen - processed output size
|
||||
srcLen - processed input size
|
||||
|
||||
Output:
|
||||
SZ_OK
|
||||
status:
|
||||
LZMA_STATUS_FINISHED_WITH_MARK
|
||||
LZMA_STATUS_NOT_FINISHED
|
||||
LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK
|
||||
SZ_ERROR_DATA - Data error
|
||||
SZ_ERROR_MEM - Memory allocation error
|
||||
SZ_ERROR_UNSUPPORTED - Unsupported properties
|
||||
SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src).
|
||||
|
||||
If LZMA decoder sees end_marker before reaching output limit, it returns OK result,
|
||||
and output value of destLen will be less than output buffer size limit.
|
||||
|
||||
You can use multiple checks to test data integrity after full decompression:
|
||||
1) Check Result and "status" variable.
|
||||
2) Check that output(destLen) = uncompressedSize, if you know real uncompressedSize.
|
||||
3) Check that output(srcLen) = compressedSize, if you know real compressedSize.
|
||||
You must use correct finish mode in that case. */
|
||||
|
||||
|
||||
Multi-call State Decompressing (zlib-like interface)
|
||||
----------------------------------------------------
|
||||
|
||||
When to use: file->file decompressing
|
||||
Compile files: LzmaDec.h + LzmaDec.c + Types.h
|
||||
|
||||
Memory Requirements:
|
||||
- Buffer for input stream: any size (for example, 16 KB)
|
||||
- Buffer for output stream: any size (for example, 16 KB)
|
||||
- LZMA Internal Structures: state_size (16 KB for default settings)
|
||||
- LZMA dictionary (dictionary size is encoded in LZMA properties header)
|
||||
|
||||
1) read LZMA properties (5 bytes) and uncompressed size (8 bytes, little-endian) to header:
|
||||
unsigned char header[LZMA_PROPS_SIZE + 8];
|
||||
ReadFile(inFile, header, sizeof(header)
|
||||
|
||||
2) Allocate CLzmaDec structures (state + dictionary) using LZMA properties
|
||||
|
||||
CLzmaDec state;
|
||||
LzmaDec_Constr(&state);
|
||||
res = LzmaDec_Allocate(&state, header, LZMA_PROPS_SIZE, &g_Alloc);
|
||||
if (res != SZ_OK)
|
||||
return res;
|
||||
|
||||
3) Init LzmaDec structure before any new LZMA stream. And call LzmaDec_DecodeToBuf in loop
|
||||
|
||||
LzmaDec_Init(&state);
|
||||
for (;;)
|
||||
{
|
||||
...
|
||||
int res = LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen,
|
||||
const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode);
|
||||
...
|
||||
}
|
||||
|
||||
|
||||
4) Free all allocated structures
|
||||
LzmaDec_Free(&state, &g_Alloc);
|
||||
|
||||
For full code example, look at C/LzmaUtil/LzmaUtil.c code.
|
||||
|
||||
|
||||
How To compress data
|
||||
--------------------
|
||||
|
||||
Compile files: LzmaEnc.h + LzmaEnc.c + Types.h +
|
||||
LzFind.c + LzFind.h + LzFindMt.c + LzFindMt.h + LzHash.h
|
||||
|
||||
Memory Requirements:
|
||||
- (dictSize * 11.5 + 6 MB) + state_size
|
||||
|
||||
Lzma Encoder can use two memory allocators:
|
||||
1) alloc - for small arrays.
|
||||
2) allocBig - for big arrays.
|
||||
|
||||
For example, you can use Large RAM Pages (2 MB) in allocBig allocator for
|
||||
better compression speed. Note that Windows has bad implementation for
|
||||
Large RAM Pages.
|
||||
It's OK to use same allocator for alloc and allocBig.
|
||||
|
||||
|
||||
Single-call Compression with callbacks
|
||||
--------------------------------------
|
||||
|
||||
Check C/LzmaUtil/LzmaUtil.c as example,
|
||||
|
||||
When to use: file->file decompressing
|
||||
|
||||
1) you must implement callback structures for interfaces:
|
||||
ISeqInStream
|
||||
ISeqOutStream
|
||||
ICompressProgress
|
||||
ISzAlloc
|
||||
|
||||
static void *SzAlloc(void *p, size_t size) { p = p; return MyAlloc(size); }
|
||||
static void SzFree(void *p, void *address) { p = p; MyFree(address); }
|
||||
static ISzAlloc g_Alloc = { SzAlloc, SzFree };
|
||||
|
||||
CFileSeqInStream inStream;
|
||||
CFileSeqOutStream outStream;
|
||||
|
||||
inStream.funcTable.Read = MyRead;
|
||||
inStream.file = inFile;
|
||||
outStream.funcTable.Write = MyWrite;
|
||||
outStream.file = outFile;
|
||||
|
||||
|
||||
2) Create CLzmaEncHandle object;
|
||||
|
||||
CLzmaEncHandle enc;
|
||||
|
||||
enc = LzmaEnc_Create(&g_Alloc);
|
||||
if (enc == 0)
|
||||
return SZ_ERROR_MEM;
|
||||
|
||||
|
||||
3) initialize CLzmaEncProps properties;
|
||||
|
||||
LzmaEncProps_Init(&props);
|
||||
|
||||
Then you can change some properties in that structure.
|
||||
|
||||
4) Send LZMA properties to LZMA Encoder
|
||||
|
||||
res = LzmaEnc_SetProps(enc, &props);
|
||||
|
||||
5) Write encoded properties to header
|
||||
|
||||
Byte header[LZMA_PROPS_SIZE + 8];
|
||||
size_t headerSize = LZMA_PROPS_SIZE;
|
||||
UInt64 fileSize;
|
||||
int i;
|
||||
|
||||
res = LzmaEnc_WriteProperties(enc, header, &headerSize);
|
||||
fileSize = MyGetFileLength(inFile);
|
||||
for (i = 0; i < 8; i++)
|
||||
header[headerSize++] = (Byte)(fileSize >> (8 * i));
|
||||
MyWriteFileAndCheck(outFile, header, headerSize)
|
||||
|
||||
6) Call encoding function:
|
||||
res = LzmaEnc_Encode(enc, &outStream.funcTable, &inStream.funcTable,
|
||||
NULL, &g_Alloc, &g_Alloc);
|
||||
|
||||
7) Destroy LZMA Encoder Object
|
||||
LzmaEnc_Destroy(enc, &g_Alloc, &g_Alloc);
|
||||
|
||||
|
||||
If callback function return some error code, LzmaEnc_Encode also returns that code.
|
||||
|
||||
|
||||
Single-call RAM->RAM Compression
|
||||
--------------------------------
|
||||
|
||||
Single-call RAM->RAM Compression is similar to Compression with callbacks,
|
||||
but you provide pointers to buffers instead of pointers to stream callbacks:
|
||||
|
||||
HRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
|
||||
CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark,
|
||||
ICompressProgress *progress, ISzAlloc *alloc, ISzAlloc *allocBig);
|
||||
|
||||
Return code:
|
||||
SZ_OK - OK
|
||||
SZ_ERROR_MEM - Memory allocation error
|
||||
SZ_ERROR_PARAM - Incorrect paramater
|
||||
SZ_ERROR_OUTPUT_EOF - output buffer overflow
|
||||
SZ_ERROR_THREAD - errors in multithreading functions (only for Mt version)
|
||||
|
||||
|
||||
|
||||
LZMA Defines
|
||||
------------
|
||||
|
||||
_LZMA_SIZE_OPT - Enable some optimizations in LZMA Decoder to get smaller executable code.
|
||||
|
||||
_LZMA_PROB32 - It can increase the speed on some 32-bit CPUs, but memory usage for
|
||||
some structures will be doubled in that case.
|
||||
|
||||
_LZMA_UINT32_IS_ULONG - Define it if int is 16-bit on your compiler and long is 32-bit.
|
||||
|
||||
_LZMA_NO_SYSTEM_SIZE_T - Define it if you don't want to use size_t type.
|
||||
|
||||
|
||||
C++ LZMA Encoder/Decoder
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
C++ LZMA code use COM-like interfaces. So if you want to use it,
|
||||
you can study basics of COM/OLE.
|
||||
C++ LZMA code is just wrapper over ANSI-C code.
|
||||
|
||||
|
||||
C++ Notes
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
If you use some C++ code folders in 7-Zip (for example, C++ code for .7z handling),
|
||||
you must check that you correctly work with "new" operator.
|
||||
7-Zip can be compiled with MSVC 6.0 that doesn't throw "exception" from "new" operator.
|
||||
So 7-Zip uses "CPP\Common\NewHandler.cpp" that redefines "new" operator:
|
||||
operator new(size_t size)
|
||||
{
|
||||
void *p = ::malloc(size);
|
||||
if (p == 0)
|
||||
throw CNewException();
|
||||
return p;
|
||||
}
|
||||
If you use MSCV that throws exception for "new" operator, you can compile without
|
||||
"NewHandler.cpp". So standard exception will be used. Actually some code of
|
||||
7-Zip catches any exception in internal code and converts it to HRESULT code.
|
||||
So you don't need to catch CNewException, if you call COM interfaces of 7-Zip.
|
||||
|
||||
---
|
||||
|
||||
http://www.7-zip.org
|
||||
http://www.7-zip.org/sdk.html
|
||||
http://www.7-zip.org/support.html
|
||||
Reference in New Issue
Block a user