Sync decompression code from wimlib
parent
3ddd227ee8
commit
5c337bc502
|
@ -7,15 +7,21 @@ plugindir = $(libdir)/ntfs-3g
|
|||
plugin_LTLIBRARIES = ntfs-plugin-80000017.la
|
||||
|
||||
ntfs_plugin_80000017_la_SOURCES = \
|
||||
src/aligned_malloc.c \
|
||||
src/common_defs.h \
|
||||
src/decompress_common.c \
|
||||
src/decompress_common.h \
|
||||
src/lzx_common.c \
|
||||
src/lzx_common.h \
|
||||
src/lzx_constants.h \
|
||||
src/lzx_decompress.c \
|
||||
src/plugin.c \
|
||||
src/system_compression.c \
|
||||
src/system_compression.h \
|
||||
src/xpress_constants.h \
|
||||
src/xpress_decompress.c
|
||||
|
||||
ntfs_plugin_80000017_la_LDFLAGS = -module -shared -avoid-version
|
||||
ntfs_plugin_80000017_la_CPPFLAGS = -D_FILE_OFFSET_BITS=64
|
||||
ntfs_plugin_80000017_la_CFLAGS = $(LIBNTFS_3G_CFLAGS)
|
||||
ntfs_plugin_80000017_la_CFLAGS = $(LIBNTFS_3G_CFLAGS) -std=gnu99
|
||||
ntfs_plugin_80000017_la_LIBADD = $(LIBNTFS_3G_LIBS)
|
||||
|
|
17
README.md
17
README.md
|
@ -34,7 +34,22 @@ directory (`$libdir`). An example full path to the installed plugin is
|
|||
platforms. `make install` will create the plugin directory if it does not
|
||||
already exist.
|
||||
|
||||
# License
|
||||
# Implementation note
|
||||
|
||||
The XPRESS and LZX decompression formats used in system-compressed files are
|
||||
identical to the formats used in Windows Imaging (WIM) archives. Therefore, for
|
||||
the system compression plugin I borrowed the XPRESS and LZX decompressors I had
|
||||
already written for the wimlib project (https://wimlib.net/). I made some
|
||||
slight modifications for integration purposes. The code in wimlib is currently
|
||||
licensed LGPLv3+, but I have relicensed the version in this plugin to GPLv2+ for
|
||||
consistency with NTFS-3G's license. (Public domain portions remain public
|
||||
domain.)
|
||||
|
||||
# Notices
|
||||
|
||||
The NTFS-3G system compression plugin was written by Eric Biggers, with
|
||||
contributions from Jean-Pierre André. You can contact the author at
|
||||
ebiggers3@gmail.com.
|
||||
|
||||
This software may be redistributed and/or modified under the terms of the GNU
|
||||
General Public License as published by the Free Software Foundation, either
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
AC_INIT([ntfs-3g-system-compression], [0.1], [ebiggers3@gmail.com])
|
||||
AC_INIT([ntfs-3g-system-compression], [0.2], [ebiggers3@gmail.com])
|
||||
|
||||
AC_CONFIG_SRCDIR([src/plugin.c])
|
||||
AC_CONFIG_MACRO_DIR([m4])
|
||||
|
|
|
@ -0,0 +1,34 @@
|
|||
/*
|
||||
* aligned_malloc.c - aligned memory allocation
|
||||
*
|
||||
* This file provides portable aligned memory allocation functions that only use
|
||||
* malloc() and free(). This avoids portability problems with posix_memalign(),
|
||||
* aligned_alloc(), etc.
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "common_defs.h"
|
||||
|
||||
void *
|
||||
aligned_malloc(size_t size, size_t alignment)
|
||||
{
|
||||
const uintptr_t mask = alignment - 1;
|
||||
char *ptr = NULL;
|
||||
char *raw_ptr;
|
||||
|
||||
raw_ptr = malloc(mask + sizeof(size_t) + size);
|
||||
if (raw_ptr) {
|
||||
ptr = (char *)raw_ptr + sizeof(size_t);
|
||||
ptr = (void *)(((uintptr_t)ptr + mask) & ~mask);
|
||||
*((size_t *)ptr - 1) = ptr - raw_ptr;
|
||||
}
|
||||
return ptr;
|
||||
}
|
||||
|
||||
void
|
||||
aligned_free(void *ptr)
|
||||
{
|
||||
if (ptr)
|
||||
free((char *)ptr - *((size_t *)ptr - 1));
|
||||
}
|
|
@ -0,0 +1,290 @@
|
|||
#ifndef _COMMON_DEFS_H
|
||||
#define _COMMON_DEFS_H
|
||||
|
||||
#include <ntfs-3g/endians.h>
|
||||
#include <ntfs-3g/types.h>
|
||||
|
||||
/* ========================================================================== */
|
||||
/* Type definitions */
|
||||
/* ========================================================================== */
|
||||
|
||||
/*
|
||||
* Type of a machine word. 'unsigned long' would be logical, but that is only
|
||||
* 32 bits on x86_64 Windows. The same applies to 'uint_fast32_t'. So the best
|
||||
* we can do without a bunch of #ifdefs appears to be 'size_t'.
|
||||
*/
|
||||
typedef size_t machine_word_t;
|
||||
|
||||
#define WORDBYTES sizeof(machine_word_t)
|
||||
#define WORDBITS (8 * WORDBYTES)
|
||||
|
||||
/* ========================================================================== */
|
||||
/* Compiler-specific definitions */
|
||||
/* ========================================================================== */
|
||||
|
||||
#ifdef __GNUC__ /* GCC, or GCC-compatible compiler such as clang */
|
||||
# define forceinline inline __attribute__((always_inline))
|
||||
# define likely(expr) __builtin_expect(!!(expr), 1)
|
||||
# define unlikely(expr) __builtin_expect(!!(expr), 0)
|
||||
# define _aligned_attribute(n) __attribute__((aligned(n)))
|
||||
# define bsr32(n) (31 - __builtin_clz(n))
|
||||
# define bsr64(n) (63 - __builtin_clzll(n))
|
||||
# define bsf32(n) __builtin_ctz(n)
|
||||
# define bsf64(n) __builtin_ctzll(n)
|
||||
# ifndef min
|
||||
# define min(a, b) ({ __typeof__(a) _a = (a); __typeof__(b) _b = (b); \
|
||||
(_a < _b) ? _a : _b; })
|
||||
# endif
|
||||
# ifndef max
|
||||
# define max(a, b) ({ __typeof__(a) _a = (a); __typeof__(b) _b = (b); \
|
||||
(_a > _b) ? _a : _b; })
|
||||
# endif
|
||||
|
||||
# define DEFINE_UNALIGNED_TYPE(type) \
|
||||
struct type##_unaligned { \
|
||||
type v; \
|
||||
} __attribute__((packed)); \
|
||||
\
|
||||
static inline type \
|
||||
load_##type##_unaligned(const void *p) \
|
||||
{ \
|
||||
return ((const struct type##_unaligned *)p)->v; \
|
||||
} \
|
||||
\
|
||||
static inline void \
|
||||
store_##type##_unaligned(type val, void *p) \
|
||||
{ \
|
||||
((struct type##_unaligned *)p)->v = val; \
|
||||
}
|
||||
|
||||
#endif /* __GNUC__ */
|
||||
|
||||
/* Declare that the annotated function should always be inlined. This might be
|
||||
* desirable in highly tuned code, e.g. compression codecs */
|
||||
#ifndef forceinline
|
||||
# define forceinline inline
|
||||
#endif
|
||||
|
||||
/* Hint that the expression is usually true */
|
||||
#ifndef likely
|
||||
# define likely(expr) (expr)
|
||||
#endif
|
||||
|
||||
/* Hint that the expression is usually false */
|
||||
#ifndef unlikely
|
||||
# define unlikely(expr) (expr)
|
||||
#endif
|
||||
|
||||
/* Declare that the annotated variable, or variables of the annotated type, are
|
||||
* to be aligned on n-byte boundaries */
|
||||
#ifndef _aligned_attribute
|
||||
# define _aligned_attribute(n)
|
||||
#endif
|
||||
|
||||
/* min() and max() macros */
|
||||
#ifndef min
|
||||
# define min(a, b) ((a) < (b) ? (a) : (b))
|
||||
#endif
|
||||
#ifndef max
|
||||
# define max(a, b) ((a) > (b) ? (a) : (b))
|
||||
#endif
|
||||
|
||||
/* STATIC_ASSERT() - verify the truth of an expression at compilation time */
|
||||
#define STATIC_ASSERT(expr) ((void)sizeof(char[1 - 2 * !(expr)]))
|
||||
|
||||
/* STATIC_ASSERT_ZERO() - verify the truth of an expression at compilation time
|
||||
* and also produce a result of value '0' to be used in constant expressions */
|
||||
#define STATIC_ASSERT_ZERO(expr) ((int)sizeof(char[-!(expr)]))
|
||||
|
||||
/* UNALIGNED_ACCESS_IS_FAST should be defined to 1 if unaligned memory accesses
|
||||
* can be performed efficiently on the target platform. */
|
||||
#if defined(__x86_64__) || defined(__i386__) || defined(__ARM_FEATURE_UNALIGNED)
|
||||
# define UNALIGNED_ACCESS_IS_FAST 1
|
||||
#else
|
||||
# define UNALIGNED_ACCESS_IS_FAST 0
|
||||
#endif
|
||||
|
||||
/*
|
||||
* DEFINE_UNALIGNED_TYPE(type) - a macro that, given an integer type 'type',
|
||||
* defines load_type_unaligned(addr) and store_type_unaligned(v, addr) functions
|
||||
* which load and store variables of type 'type' from/to unaligned memory
|
||||
* addresses.
|
||||
*/
|
||||
#ifndef DEFINE_UNALIGNED_TYPE
|
||||
|
||||
#include <string.h>
|
||||
/*
|
||||
* Although memcpy() may seem inefficient, it *usually* gets optimized
|
||||
* appropriately by modern compilers. It's portable and may be the best we can
|
||||
* do for a fallback...
|
||||
*/
|
||||
#define DEFINE_UNALIGNED_TYPE(type) \
|
||||
\
|
||||
static forceinline type \
|
||||
load_##type##_unaligned(const void *p) \
|
||||
{ \
|
||||
type v; \
|
||||
memcpy(&v, p, sizeof(v)); \
|
||||
return v; \
|
||||
} \
|
||||
\
|
||||
static forceinline void \
|
||||
store_##type##_unaligned(type v, void *p) \
|
||||
{ \
|
||||
memcpy(p, &v, sizeof(v)); \
|
||||
}
|
||||
|
||||
#endif /* !DEFINE_UNALIGNED_TYPE */
|
||||
|
||||
|
||||
/* ========================================================================== */
|
||||
/* Unaligned memory accesses */
|
||||
/* ========================================================================== */
|
||||
|
||||
DEFINE_UNALIGNED_TYPE(le16);
|
||||
DEFINE_UNALIGNED_TYPE(le32);
|
||||
DEFINE_UNALIGNED_TYPE(machine_word_t);
|
||||
|
||||
#define load_word_unaligned load_machine_word_t_unaligned
|
||||
#define store_word_unaligned store_machine_word_t_unaligned
|
||||
|
||||
static inline u16
|
||||
get_unaligned_le16(const u8 *p)
|
||||
{
|
||||
if (UNALIGNED_ACCESS_IS_FAST)
|
||||
return le16_to_cpu(load_le16_unaligned(p));
|
||||
else
|
||||
return ((u16)p[1] << 8) | p[0];
|
||||
}
|
||||
|
||||
static inline u32
|
||||
get_unaligned_le32(const u8 *p)
|
||||
{
|
||||
if (UNALIGNED_ACCESS_IS_FAST)
|
||||
return le32_to_cpu(load_le32_unaligned(p));
|
||||
else
|
||||
return ((u32)p[3] << 24) | ((u32)p[2] << 16) |
|
||||
((u32)p[1] << 8) | p[0];
|
||||
}
|
||||
|
||||
static inline void
|
||||
put_unaligned_le16(u16 v, u8 *p)
|
||||
{
|
||||
if (UNALIGNED_ACCESS_IS_FAST) {
|
||||
store_le16_unaligned(cpu_to_le16(v), p);
|
||||
} else {
|
||||
p[0] = (u8)(v >> 0);
|
||||
p[1] = (u8)(v >> 8);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
put_unaligned_le32(u32 v, u8 *p)
|
||||
{
|
||||
if (UNALIGNED_ACCESS_IS_FAST) {
|
||||
store_le32_unaligned(cpu_to_le32(v), p);
|
||||
} else {
|
||||
p[0] = (u8)(v >> 0);
|
||||
p[1] = (u8)(v >> 8);
|
||||
p[2] = (u8)(v >> 16);
|
||||
p[3] = (u8)(v >> 24);
|
||||
}
|
||||
}
|
||||
|
||||
/* ========================================================================== */
|
||||
/* Bit scan functions */
|
||||
/* ========================================================================== */
|
||||
|
||||
/*
|
||||
* Bit Scan Reverse (BSR) - find the 0-based index (relative to the least
|
||||
* significant end) of the *most* significant 1 bit in the input value. The
|
||||
* input value must be nonzero!
|
||||
*/
|
||||
|
||||
#ifndef bsr32
|
||||
static forceinline unsigned
|
||||
bsr32(u32 v)
|
||||
{
|
||||
unsigned bit = 0;
|
||||
while ((v >>= 1) != 0)
|
||||
bit++;
|
||||
return bit;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef bsr64
|
||||
static forceinline unsigned
|
||||
bsr64(u64 v)
|
||||
{
|
||||
unsigned bit = 0;
|
||||
while ((v >>= 1) != 0)
|
||||
bit++;
|
||||
return bit;
|
||||
}
|
||||
#endif
|
||||
|
||||
static forceinline unsigned
|
||||
bsrw(machine_word_t v)
|
||||
{
|
||||
STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64);
|
||||
if (WORDBITS == 32)
|
||||
return bsr32(v);
|
||||
else
|
||||
return bsr64(v);
|
||||
}
|
||||
|
||||
/*
|
||||
* Bit Scan Forward (BSF) - find the 0-based index (relative to the least
|
||||
* significant end) of the *least* significant 1 bit in the input value. The
|
||||
* input value must be nonzero!
|
||||
*/
|
||||
|
||||
#ifndef bsf32
|
||||
static forceinline unsigned
|
||||
bsf32(u32 v)
|
||||
{
|
||||
unsigned bit;
|
||||
for (bit = 0; !(v & 1); bit++, v >>= 1)
|
||||
;
|
||||
return bit;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef bsf64
|
||||
static forceinline unsigned
|
||||
bsf64(u64 v)
|
||||
{
|
||||
unsigned bit;
|
||||
for (bit = 0; !(v & 1); bit++, v >>= 1)
|
||||
;
|
||||
return bit;
|
||||
}
|
||||
#endif
|
||||
|
||||
static forceinline unsigned
|
||||
bsfw(machine_word_t v)
|
||||
{
|
||||
STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64);
|
||||
if (WORDBITS == 32)
|
||||
return bsf32(v);
|
||||
else
|
||||
return bsf64(v);
|
||||
}
|
||||
|
||||
/* Return the log base 2 of 'n', rounded up to the nearest integer. */
|
||||
static forceinline unsigned
|
||||
ilog2_ceil(size_t n)
|
||||
{
|
||||
if (n <= 1)
|
||||
return 0;
|
||||
return 1 + bsrw(n - 1);
|
||||
}
|
||||
|
||||
/* ========================================================================== */
|
||||
/* Aligned memory allocation */
|
||||
/* ========================================================================== */
|
||||
|
||||
extern void *aligned_malloc(size_t size, size_t alignment);
|
||||
extern void aligned_free(void *ptr);
|
||||
|
||||
#endif /* _COMMON_DEFS_H */
|
|
@ -1,325 +1,335 @@
|
|||
/*
|
||||
* decompress_common.c - Code shared by the XPRESS and LZX decompressors
|
||||
* decompress_common.c
|
||||
*
|
||||
* Copyright (C) 2015 Eric Biggers
|
||||
* Code for decompression shared among multiple compression formats.
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify it under
|
||||
* the terms of the GNU General Public License as published by the Free Software
|
||||
* Foundation, either version 2 of the License, or (at your option) any later
|
||||
* version.
|
||||
* The following copying information applies to this specific source code file:
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* Written in 2012-2016 by Eric Biggers <ebiggers3@gmail.com>
|
||||
*
|
||||
* To the extent possible under law, the author(s) have dedicated all copyright
|
||||
* and related and neighboring rights to this software to the public domain
|
||||
* worldwide via the Creative Commons Zero 1.0 Universal Public Domain
|
||||
* Dedication (the "CC0").
|
||||
*
|
||||
* This software is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
* details.
|
||||
* FOR A PARTICULAR PURPOSE. See the CC0 for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along with
|
||||
* this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
* You should have received a copy of the CC0 along with this software; if not
|
||||
* see <http://creativecommons.org/publicdomain/zero/1.0/>.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
# include "config.h"
|
||||
#endif
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#ifdef __SSE2__
|
||||
# include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
#include "decompress_common.h"
|
||||
|
||||
/*
|
||||
* make_huffman_decode_table() -
|
||||
*
|
||||
* Build a decoding table for a canonical prefix code, or "Huffman code".
|
||||
* Given an alphabet of symbols and the length of each symbol's codeword in a
|
||||
* canonical prefix code, build a table for quickly decoding symbols that were
|
||||
* encoded with that code.
|
||||
*
|
||||
* This is an internal function, not part of the library API!
|
||||
* A _prefix code_ is an assignment of bitstrings called _codewords_ to symbols
|
||||
* such that no whole codeword is a prefix of any other. A prefix code might be
|
||||
* a _Huffman code_, which means that it is an optimum prefix code for a given
|
||||
* list of symbol frequencies and was generated by the Huffman algorithm.
|
||||
* Although the prefix codes processed here will ordinarily be "Huffman codes",
|
||||
* strictly speaking the decoder cannot know whether a given code was actually
|
||||
* generated by the Huffman algorithm or not.
|
||||
*
|
||||
* This takes as input the length of the codeword for each symbol in the
|
||||
* alphabet and produces as output a table that can be used for fast
|
||||
* decoding of prefix-encoded symbols using read_huffsym().
|
||||
* A prefix code is _canonical_ if and only if a longer codeword never
|
||||
* lexicographically precedes a shorter codeword, and the lexicographic ordering
|
||||
* of codewords of equal length is the same as the lexicographic ordering of the
|
||||
* corresponding symbols. The advantage of using a canonical prefix code is
|
||||
* that the codewords can be reconstructed from only the symbol => codeword
|
||||
* length mapping. This eliminates the need to transmit the codewords
|
||||
* explicitly. Instead, they can be enumerated in lexicographic order after
|
||||
* sorting the symbols primarily by increasing codeword length and secondarily
|
||||
* by increasing symbol value.
|
||||
*
|
||||
* Strictly speaking, a canonical prefix code might not be a Huffman
|
||||
* code. But this algorithm will work either way; and in fact, since
|
||||
* Huffman codes are defined in terms of symbol frequencies, there is no
|
||||
* way for the decompressor to know whether the code is a true Huffman
|
||||
* code or not until all symbols have been decoded.
|
||||
* However, the decoder's real goal is to decode symbols with the code, not just
|
||||
* generate the list of codewords. Consequently, this function directly builds
|
||||
* a table for efficiently decoding symbols using the code. The basic idea is
|
||||
* that given the next 'max_codeword_len' bits of input, the decoder can look up
|
||||
* the next decoded symbol by indexing a table containing '2^max_codeword_len'
|
||||
* entries. A codeword with length 'max_codeword_len' will have exactly one
|
||||
* entry in this table, whereas a codeword shorter than 'max_codeword_len' will
|
||||
* have multiple entries in this table. Precisely, a codeword of length 'n'
|
||||
* will have '2^(max_codeword_len - n)' entries. The index of each such entry,
|
||||
* considered as a bitstring of length 'max_codeword_len', will contain the
|
||||
* corresponding codeword as a prefix.
|
||||
*
|
||||
* Because the prefix code is assumed to be "canonical", it can be
|
||||
* reconstructed directly from the codeword lengths. A prefix code is
|
||||
* canonical if and only if a longer codeword never lexicographically
|
||||
* precedes a shorter codeword, and the lexicographic ordering of
|
||||
* codewords of the same length is the same as the lexicographic ordering
|
||||
* of the corresponding symbols. Consequently, we can sort the symbols
|
||||
* primarily by codeword length and secondarily by symbol value, then
|
||||
* reconstruct the prefix code by generating codewords lexicographically
|
||||
* in that order.
|
||||
* That's the basic idea, but we extend it in two ways:
|
||||
*
|
||||
* This function does not, however, generate the prefix code explicitly.
|
||||
* Instead, it directly builds a table for decoding symbols using the
|
||||
* code. The basic idea is this: given the next 'max_codeword_len' bits
|
||||
* in the input, we can look up the decoded symbol by indexing a table
|
||||
* containing 2**max_codeword_len entries. A codeword with length
|
||||
* 'max_codeword_len' will have exactly one entry in this table, whereas
|
||||
* a codeword shorter than 'max_codeword_len' will have multiple entries
|
||||
* in this table. Precisely, a codeword of length n will be represented
|
||||
* by 2**(max_codeword_len - n) entries in this table. The 0-based index
|
||||
* of each such entry will contain the corresponding codeword as a prefix
|
||||
* when zero-padded on the left to 'max_codeword_len' binary digits.
|
||||
* - Often the maximum codeword length is too long for it to be efficient to
|
||||
* build the full decode table whenever a new code is used. Instead, we build
|
||||
* a "root" table using only '2^table_bits' entries, where 'table_bits <=
|
||||
* max_codeword_len'. Then, a lookup of 'table_bits' bits produces either a
|
||||
* symbol directly (for codewords not longer than 'table_bits'), or the index
|
||||
* of a subtable which must be indexed with additional bits of input to fully
|
||||
* decode the symbol (for codewords longer than 'table_bits').
|
||||
*
|
||||
* That's the basic idea, but we implement two optimizations regarding
|
||||
* the format of the decode table itself:
|
||||
* - Whenever the decoder decodes a symbol, it needs to know the codeword length
|
||||
* so that it can remove the appropriate number of input bits. The obvious
|
||||
* solution would be to simply retain the codeword lengths array and use the
|
||||
* decoded symbol as an index into it. However, that would require two array
|
||||
* accesses when decoding each symbol. Our strategy is to instead store the
|
||||
* codeword length directly in the decode table entry along with the symbol.
|
||||
*
|
||||
* - For many compression formats, the maximum codeword length is too
|
||||
* long for it to be efficient to build the full decoding table
|
||||
* whenever a new prefix code is used. Instead, we can build the table
|
||||
* using only 2**table_bits entries, where 'table_bits' is some number
|
||||
* less than or equal to 'max_codeword_len'. Then, only codewords of
|
||||
* length 'table_bits' and shorter can be directly looked up. For
|
||||
* longer codewords, the direct lookup instead produces the root of a
|
||||
* binary tree. Using this tree, the decoder can do traditional
|
||||
* bit-by-bit decoding of the remainder of the codeword. Child nodes
|
||||
* are allocated in extra entries at the end of the table; leaf nodes
|
||||
* contain symbols. Note that the long-codeword case is, in general,
|
||||
* not performance critical, since in Huffman codes the most frequently
|
||||
* used symbols are assigned the shortest codeword lengths.
|
||||
*
|
||||
* - When we decode a symbol using a direct lookup of the table, we still
|
||||
* need to know its length so that the bitstream can be advanced by the
|
||||
* appropriate number of bits. The simple solution is to simply retain
|
||||
* the 'lens' array and use the decoded symbol as an index into it.
|
||||
* However, this requires two separate array accesses in the fast path.
|
||||
* The optimization is to store the length directly in the decode
|
||||
* table. We use the bottom 11 bits for the symbol and the top 5 bits
|
||||
* for the length. In addition, to combine this optimization with the
|
||||
* previous one, we introduce a special case where the top 2 bits of
|
||||
* the length are both set if the entry is actually the root of a
|
||||
* binary tree.
|
||||
* See MAKE_DECODE_TABLE_ENTRY() for full details on the format of decode table
|
||||
* entries, and see read_huffsym() for full details on how symbols are decoded.
|
||||
*
|
||||
* @decode_table:
|
||||
* The array in which to create the decoding table. This must have
|
||||
* a length of at least ((2**table_bits) + 2 * num_syms) entries.
|
||||
* The array in which to build the decode table. This must have been
|
||||
* declared by the DECODE_TABLE() macro. This may alias @lens, since all
|
||||
* @lens are consumed before the decode table is written to.
|
||||
*
|
||||
* @num_syms:
|
||||
* The number of symbols in the alphabet; also, the length of the
|
||||
* 'lens' array. Must be less than or equal to 2048.
|
||||
* The number of symbols in the alphabet.
|
||||
*
|
||||
* @table_bits:
|
||||
* The order of the decode table size, as explained above. Must be
|
||||
* less than or equal to 13.
|
||||
* The log base 2 of the number of entries in the root table.
|
||||
*
|
||||
* @lens:
|
||||
* An array of length @num_syms, indexable by symbol, that gives the
|
||||
* length of the codeword, in bits, for that symbol. The length can
|
||||
* be 0, which means that the symbol does not have a codeword
|
||||
* assigned.
|
||||
* An array of length @num_syms, indexed by symbol, that gives the length
|
||||
* of the codeword, in bits, for each symbol. The length can be 0, which
|
||||
* means that the symbol does not have a codeword assigned. In addition,
|
||||
* @lens may alias @decode_table, as noted above.
|
||||
*
|
||||
* @max_codeword_len:
|
||||
* The longest codeword length allowed in the compression format.
|
||||
* All entries in 'lens' must be less than or equal to this value.
|
||||
* This must be less than or equal to 23.
|
||||
* The maximum codeword length permitted for this code. All entries in
|
||||
* 'lens' must be less than or equal to this value.
|
||||
*
|
||||
* @working_space
|
||||
* A temporary array of length '2 * (max_codeword_len + 1) +
|
||||
* num_syms'.
|
||||
* A temporary array that was declared with DECODE_TABLE_WORKING_SPACE().
|
||||
*
|
||||
* Returns 0 on success, or -1 if the lengths do not form a valid prefix
|
||||
* code.
|
||||
* Returns 0 on success, or -1 if the lengths do not form a valid prefix code.
|
||||
*/
|
||||
int make_huffman_decode_table(u16 decode_table[], const unsigned num_syms,
|
||||
const unsigned table_bits, const u8 lens[],
|
||||
const unsigned max_codeword_len,
|
||||
u16 working_space[])
|
||||
int
|
||||
make_huffman_decode_table(u16 decode_table[], unsigned num_syms,
|
||||
unsigned table_bits, const u8 lens[],
|
||||
unsigned max_codeword_len, u16 working_space[])
|
||||
{
|
||||
const unsigned table_num_entries = 1 << table_bits;
|
||||
u16 * const len_counts = &working_space[0];
|
||||
u16 * const offsets = &working_space[1 * (max_codeword_len + 1)];
|
||||
u16 * const sorted_syms = &working_space[2 * (max_codeword_len + 1)];
|
||||
int left;
|
||||
void *decode_table_ptr;
|
||||
s32 remainder = 1;
|
||||
void *entry_ptr = decode_table;
|
||||
unsigned codeword_len = 1;
|
||||
unsigned sym_idx;
|
||||
unsigned codeword_len;
|
||||
unsigned stores_per_loop;
|
||||
unsigned decode_table_pos;
|
||||
unsigned len;
|
||||
unsigned sym;
|
||||
unsigned codeword;
|
||||
unsigned subtable_pos;
|
||||
unsigned subtable_bits;
|
||||
unsigned subtable_prefix;
|
||||
|
||||
/* Count how many symbols have each possible codeword length.
|
||||
* Note that a length of 0 indicates the corresponding symbol is not
|
||||
* used in the code and therefore does not have a codeword. */
|
||||
for (len = 0; len <= max_codeword_len; len++)
|
||||
/* Count how many codewords have each length, including 0. */
|
||||
for (unsigned len = 0; len <= max_codeword_len; len++)
|
||||
len_counts[len] = 0;
|
||||
for (sym = 0; sym < num_syms; sym++)
|
||||
for (unsigned sym = 0; sym < num_syms; sym++)
|
||||
len_counts[lens[sym]]++;
|
||||
|
||||
/* We can assume all lengths are <= max_codeword_len, but we
|
||||
* cannot assume they form a valid prefix code. A codeword of
|
||||
* length n should require a proportion of the codespace equaling
|
||||
* (1/2)^n. The code is valid if and only if the codespace is
|
||||
* exactly filled by the lengths, by this measure. */
|
||||
left = 1;
|
||||
for (len = 1; len <= max_codeword_len; len++) {
|
||||
left <<= 1;
|
||||
left -= len_counts[len];
|
||||
if (left < 0) {
|
||||
/* The lengths overflow the codespace; that is, the code
|
||||
* is over-subscribed. */
|
||||
/* It is already guaranteed that all lengths are <= max_codeword_len,
|
||||
* but it cannot be assumed they form a complete prefix code. A
|
||||
* codeword of length n should require a proportion of the codespace
|
||||
* equaling (1/2)^n. The code is complete if and only if, by this
|
||||
* measure, the codespace is exactly filled by the lengths. */
|
||||
for (unsigned len = 1; len <= max_codeword_len; len++) {
|
||||
remainder = (remainder << 1) - len_counts[len];
|
||||
/* Do the lengths overflow the codespace? */
|
||||
if (unlikely(remainder < 0))
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
if (left != 0) {
|
||||
if (remainder != 0) {
|
||||
/* The lengths do not fill the codespace; that is, they form an
|
||||
* incomplete set. */
|
||||
if (left == (1 << max_codeword_len)) {
|
||||
/* The code is completely empty. This is arguably
|
||||
* invalid, but in fact it is valid in LZX and XPRESS,
|
||||
* so we must allow it. By definition, no symbols can
|
||||
* be decoded with an empty code. Consequently, we
|
||||
* technically don't even need to fill in the decode
|
||||
* table. However, to avoid accessing uninitialized
|
||||
* memory if the algorithm nevertheless attempts to
|
||||
* decode symbols using such a code, we zero out the
|
||||
* decode table. */
|
||||
memset(decode_table, 0,
|
||||
table_num_entries * sizeof(decode_table[0]));
|
||||
return 0;
|
||||
}
|
||||
return -1;
|
||||
* incomplete code. This is permitted only if the code is empty
|
||||
* (contains no symbols). */
|
||||
|
||||
if (unlikely(remainder != 1U << max_codeword_len))
|
||||
return -1;
|
||||
|
||||
/* The code is empty. When processing a well-formed stream, the
|
||||
* decode table need not be initialized in this case. However,
|
||||
* we cannot assume the stream is well-formed, so we must
|
||||
* initialize the decode table anyway. Setting all entries to 0
|
||||
* makes the decode table always produce symbol '0' without
|
||||
* consuming any bits, which is good enough. */
|
||||
memset(decode_table, 0, sizeof(decode_table[0]) << table_bits);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Sort the symbols primarily by length and secondarily by symbol order.
|
||||
*/
|
||||
/* Sort the symbols primarily by increasing codeword length and
|
||||
* secondarily by increasing symbol value. */
|
||||
|
||||
/* Initialize 'offsets' so that offsets[len] for 1 <= len <=
|
||||
* max_codeword_len is the number of codewords shorter than 'len' bits.
|
||||
*/
|
||||
offsets[1] = 0;
|
||||
for (len = 1; len < max_codeword_len; len++)
|
||||
/* Initialize 'offsets' so that 'offsets[len]' is the number of
|
||||
* codewords shorter than 'len' bits, including length 0. */
|
||||
offsets[0] = 0;
|
||||
for (unsigned len = 0; len < max_codeword_len; len++)
|
||||
offsets[len + 1] = offsets[len] + len_counts[len];
|
||||
|
||||
/* Use the 'offsets' array to sort the symbols. Note that we do not
|
||||
* include symbols that are not used in the code. Consequently, fewer
|
||||
* than 'num_syms' entries in 'sorted_syms' may be filled. */
|
||||
for (sym = 0; sym < num_syms; sym++)
|
||||
if (lens[sym] != 0)
|
||||
sorted_syms[offsets[lens[sym]]++] = sym;
|
||||
/* Use the 'offsets' array to sort the symbols. */
|
||||
for (unsigned sym = 0; sym < num_syms; sym++)
|
||||
sorted_syms[offsets[lens[sym]]++] = sym;
|
||||
|
||||
/* Fill entries for codewords with length <= table_bits
|
||||
* --- that is, those short enough for a direct mapping.
|
||||
/*
|
||||
* Fill the root table entries for codewords no longer than table_bits.
|
||||
*
|
||||
* The table will start with entries for the shortest codeword(s), which
|
||||
* have the most entries. From there, the number of entries per
|
||||
* codeword will decrease. */
|
||||
decode_table_ptr = decode_table;
|
||||
sym_idx = 0;
|
||||
codeword_len = 1;
|
||||
stores_per_loop = (1 << (table_bits - codeword_len));
|
||||
for (; stores_per_loop != 0; codeword_len++, stores_per_loop >>= 1) {
|
||||
* will have the most entries. From there, the number of entries per
|
||||
* codeword will decrease. As an optimization, we may begin filling
|
||||
* entries with SSE2 vector accesses (8 entries/store), then change to
|
||||
* word accesses (2 or 4 entries/store), then change to 16-bit accesses
|
||||
* (1 entry/store).
|
||||
*/
|
||||
sym_idx = offsets[0];
|
||||
|
||||
#ifdef __SSE2__
|
||||
/* Fill entries one 128-bit vector (8 entries) at a time. */
|
||||
for (unsigned stores_per_loop = (1U << (table_bits - codeword_len)) /
|
||||
(sizeof(__m128i) / sizeof(decode_table[0]));
|
||||
stores_per_loop != 0; codeword_len++, stores_per_loop >>= 1)
|
||||
{
|
||||
unsigned end_sym_idx = sym_idx + len_counts[codeword_len];
|
||||
for (; sym_idx < end_sym_idx; sym_idx++) {
|
||||
u16 entry;
|
||||
u16 *p;
|
||||
unsigned n;
|
||||
|
||||
entry = ((u32)codeword_len << 11) | sorted_syms[sym_idx];
|
||||
p = (u16*)decode_table_ptr;
|
||||
n = stores_per_loop;
|
||||
|
||||
/* Note: unlike in the "word" version below, the __m128i
|
||||
* type already has __attribute__((may_alias)), so using
|
||||
* it to access an array of u16 will not violate strict
|
||||
* aliasing. */
|
||||
__m128i v = _mm_set1_epi16(
|
||||
MAKE_DECODE_TABLE_ENTRY(sorted_syms[sym_idx],
|
||||
codeword_len));
|
||||
unsigned n = stores_per_loop;
|
||||
do {
|
||||
*p++ = entry;
|
||||
*(__m128i *)entry_ptr = v;
|
||||
entry_ptr += sizeof(v);
|
||||
} while (--n);
|
||||
}
|
||||
}
|
||||
#endif /* __SSE2__ */
|
||||
|
||||
decode_table_ptr = p;
|
||||
#ifdef __GNUC__
|
||||
/* Fill entries one word (2 or 4 entries) at a time. */
|
||||
for (unsigned stores_per_loop = (1U << (table_bits - codeword_len)) /
|
||||
(WORDBYTES / sizeof(decode_table[0]));
|
||||
stores_per_loop != 0; codeword_len++, stores_per_loop >>= 1)
|
||||
{
|
||||
unsigned end_sym_idx = sym_idx + len_counts[codeword_len];
|
||||
for (; sym_idx < end_sym_idx; sym_idx++) {
|
||||
|
||||
/* Accessing the array of u16 as u32 or u64 would
|
||||
* violate strict aliasing and would require compiling
|
||||
* the code with -fno-strict-aliasing to guarantee
|
||||
* correctness. To work around this problem, use the
|
||||
* gcc 'may_alias' extension. */
|
||||
typedef machine_word_t
|
||||
__attribute__((may_alias)) aliased_word_t;
|
||||
aliased_word_t v = repeat_u16(
|
||||
MAKE_DECODE_TABLE_ENTRY(sorted_syms[sym_idx],
|
||||
codeword_len));
|
||||
unsigned n = stores_per_loop;
|
||||
do {
|
||||
*(aliased_word_t *)entry_ptr = v;
|
||||
entry_ptr += sizeof(v);
|
||||
} while (--n);
|
||||
}
|
||||
}
|
||||
#endif /* __GNUC__ */
|
||||
|
||||
/* Fill entries one at a time. */
|
||||
for (unsigned stores_per_loop = (1U << (table_bits - codeword_len));
|
||||
stores_per_loop != 0; codeword_len++, stores_per_loop >>= 1)
|
||||
{
|
||||
unsigned end_sym_idx = sym_idx + len_counts[codeword_len];
|
||||
for (; sym_idx < end_sym_idx; sym_idx++) {
|
||||
u16 v = MAKE_DECODE_TABLE_ENTRY(sorted_syms[sym_idx],
|
||||
codeword_len);
|
||||
unsigned n = stores_per_loop;
|
||||
do {
|
||||
*(u16 *)entry_ptr = v;
|
||||
entry_ptr += sizeof(v);
|
||||
} while (--n);
|
||||
}
|
||||
}
|
||||
|
||||
/* If we've filled in the entire table, we are done. Otherwise,
|
||||
* there are codewords longer than table_bits for which we must
|
||||
* generate binary trees. */
|
||||
/* If all symbols were processed, then no subtables are required. */
|
||||
if (sym_idx == num_syms)
|
||||
return 0;
|
||||
|
||||
decode_table_pos = (u16*)decode_table_ptr - decode_table;
|
||||
if (decode_table_pos != table_num_entries) {
|
||||
unsigned j;
|
||||
unsigned next_free_tree_slot;
|
||||
unsigned cur_codeword;
|
||||
/* At least one subtable is required. Process the remaining symbols. */
|
||||
codeword = ((u16 *)entry_ptr - decode_table) << 1;
|
||||
subtable_pos = 1U << table_bits;
|
||||
subtable_bits = table_bits;
|
||||
subtable_prefix = -1;
|
||||
do {
|
||||
while (len_counts[codeword_len] == 0) {
|
||||
codeword_len++;
|
||||
codeword <<= 1;
|
||||
}
|
||||
|
||||
/* First, zero out the remaining entries. This is
|
||||
* necessary so that these entries appear as
|
||||
* "unallocated" in the next part. Each of these entries
|
||||
* will eventually be filled with the representation of
|
||||
* the root node of a binary tree. */
|
||||
j = decode_table_pos;
|
||||
do {
|
||||
decode_table[j] = 0;
|
||||
} while (++j != table_num_entries);
|
||||
unsigned prefix = codeword >> (codeword_len - table_bits);
|
||||
|
||||
/* We allocate child nodes starting at the end of the
|
||||
* direct lookup table. Note that there should be
|
||||
* 2*num_syms extra entries for this purpose, although
|
||||
* fewer than this may actually be needed. */
|
||||
next_free_tree_slot = table_num_entries;
|
||||
/* Start a new subtable if the first 'table_bits' bits of the
|
||||
* codeword don't match the prefix for the previous subtable, or
|
||||
* if this will be the first subtable. */
|
||||
if (prefix != subtable_prefix) {
|
||||
|
||||
/* Iterate through each codeword with length greater than
|
||||
* 'table_bits', primarily in order of codeword length
|
||||
* and secondarily in order of symbol. */
|
||||
for (cur_codeword = decode_table_pos << 1;
|
||||
codeword_len <= max_codeword_len;
|
||||
codeword_len++, cur_codeword <<= 1)
|
||||
{
|
||||
unsigned end_sym_idx = sym_idx + len_counts[codeword_len];
|
||||
for (; sym_idx < end_sym_idx; sym_idx++, cur_codeword++)
|
||||
{
|
||||
/* 'sorted_sym' is the symbol represented by the
|
||||
* codeword. */
|
||||
unsigned sorted_sym = sorted_syms[sym_idx];
|
||||
subtable_prefix = prefix;
|
||||
|
||||
unsigned extra_bits = codeword_len - table_bits;
|
||||
|
||||
unsigned node_idx = cur_codeword >> extra_bits;
|
||||
|
||||
/* Go through each bit of the current codeword
|
||||
* beyond the prefix of length @table_bits and
|
||||
* walk the appropriate binary tree, allocating
|
||||
* any slots that have not yet been allocated.
|
||||
*
|
||||
* Note that the 'pointer' entry to the binary
|
||||
* tree, which is stored in the direct lookup
|
||||
* portion of the table, is represented
|
||||
* identically to other internal (non-leaf)
|
||||
* nodes of the binary tree; it can be thought
|
||||
* of as simply the root of the tree. The
|
||||
* representation of these internal nodes is
|
||||
* simply the index of the left child combined
|
||||
* with the special bits 0xC000 to distingush
|
||||
* the entry from direct mapping and leaf node
|
||||
* entries. */
|
||||
do {
|
||||
|
||||
/* At least one bit remains in the
|
||||
* codeword, but the current node is an
|
||||
* unallocated leaf. Change it to an
|
||||
* internal node. */
|
||||
if (decode_table[node_idx] == 0) {
|
||||
decode_table[node_idx] =
|
||||
next_free_tree_slot | 0xC000;
|
||||
decode_table[next_free_tree_slot++] = 0;
|
||||
decode_table[next_free_tree_slot++] = 0;
|
||||
}
|
||||
|
||||
/* Go to the left child if the next bit
|
||||
* in the codeword is 0; otherwise go to
|
||||
* the right child. */
|
||||
node_idx = decode_table[node_idx] & 0x3FFF;
|
||||
--extra_bits;
|
||||
node_idx += (cur_codeword >> extra_bits) & 1;
|
||||
} while (extra_bits != 0);
|
||||
|
||||
/* We've traversed the tree using the entire
|
||||
* codeword, and we're now at the entry where
|
||||
* the actual symbol will be stored. This is
|
||||
* distinguished from internal nodes by not
|
||||
* having its high two bits set. */
|
||||
decode_table[node_idx] = sorted_sym;
|
||||
/*
|
||||
* Calculate the subtable length. If the codeword
|
||||
* length exceeds 'table_bits' by n, then the subtable
|
||||
* needs at least 2^n entries. But it may need more; if
|
||||
* there are fewer than 2^n codewords of length
|
||||
* 'table_bits + n' remaining, then n will need to be
|
||||
* incremented to bring in longer codewords until the
|
||||
* subtable can be filled completely. Note that it
|
||||
* always will, eventually, be possible to fill the
|
||||
* subtable, since it was previously verified that the
|
||||
* code is complete.
|
||||
*/
|
||||
subtable_bits = codeword_len - table_bits;
|
||||
remainder = (s32)1 << subtable_bits;
|
||||
for (;;) {
|
||||
remainder -= len_counts[table_bits +
|
||||
subtable_bits];
|
||||
if (remainder <= 0)
|
||||
break;
|
||||
subtable_bits++;
|
||||
remainder <<= 1;
|
||||
}
|
||||
|
||||
/* Create the entry that points from the root table to
|
||||
* the subtable. This entry contains the index of the
|
||||
* start of the subtable and the number of bits with
|
||||
* which the subtable is indexed (the log base 2 of the
|
||||
* number of entries it contains). */
|
||||
decode_table[subtable_prefix] =
|
||||
MAKE_DECODE_TABLE_ENTRY(subtable_pos,
|
||||
subtable_bits);
|
||||
}
|
||||
}
|
||||
|
||||
/* Fill the subtable entries for this symbol. */
|
||||
u16 entry = MAKE_DECODE_TABLE_ENTRY(sorted_syms[sym_idx],
|
||||
codeword_len - table_bits);
|
||||
unsigned n = 1U << (subtable_bits - (codeword_len -
|
||||
table_bits));
|
||||
do {
|
||||
decode_table[subtable_pos++] = entry;
|
||||
} while (--n);
|
||||
|
||||
len_counts[codeword_len]--;
|
||||
codeword++;
|
||||
} while (++sym_idx < num_syms);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -1,99 +1,36 @@
|
|||
/*
|
||||
* decompress_common.h - Code shared by the XPRESS and LZX decompressors
|
||||
* decompress_common.h
|
||||
*
|
||||
* Copyright (C) 2015 Eric Biggers
|
||||
* Header for decompression code shared by multiple compression formats.
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify it under
|
||||
* the terms of the GNU General Public License as published by the Free Software
|
||||
* Foundation, either version 2 of the License, or (at your option) any later
|
||||
* version.
|
||||
* The following copying information applies to this specific source code file:
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* Written in 2012-2016 by Eric Biggers <ebiggers3@gmail.com>
|
||||
*
|
||||
* To the extent possible under law, the author(s) have dedicated all copyright
|
||||
* and related and neighboring rights to this software to the public domain
|
||||
* worldwide via the Creative Commons Zero 1.0 Universal Public Domain
|
||||
* Dedication (the "CC0").
|
||||
*
|
||||
* This software is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
* details.
|
||||
* FOR A PARTICULAR PURPOSE. See the CC0 for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along with
|
||||
* this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
* You should have received a copy of the CC0 along with this software; if not
|
||||
* see <http://creativecommons.org/publicdomain/zero/1.0/>.
|
||||
*/
|
||||
|
||||
#include <stddef.h>
|
||||
#ifndef _DECOMPRESS_COMMON_H
|
||||
#define _DECOMPRESS_COMMON_H
|
||||
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <ntfs-3g/endians.h>
|
||||
#include <ntfs-3g/types.h>
|
||||
#include "common_defs.h"
|
||||
|
||||
/* "Force inline" macro (not required, but helpful for performance) */
|
||||
#ifdef __GNUC__
|
||||
# define forceinline inline __attribute__((always_inline))
|
||||
#else
|
||||
# define forceinline inline
|
||||
#endif
|
||||
|
||||
/* Enable whole-word match copying on selected architectures */
|
||||
#if defined(__i386__) || defined(__x86_64__) || defined(__ARM_FEATURE_UNALIGNED)
|
||||
# define FAST_UNALIGNED_ACCESS
|
||||
#endif
|
||||
|
||||
/* Size of a machine word */
|
||||
#define WORDBYTES (sizeof(size_t))
|
||||
|
||||
/* Inline functions to read and write unaligned data.
|
||||
* We use just memcpy() for this. It is standard and modern compilers will
|
||||
* usually replace it with load/store instructions. */
|
||||
|
||||
static forceinline u16 get_unaligned_le16(const u8 *p)
|
||||
{
|
||||
le16 v_le;
|
||||
memcpy(&v_le, p, 2);
|
||||
return le16_to_cpu(v_le);
|
||||
}
|
||||
|
||||
static forceinline u32 get_unaligned_le32(const u8 *p)
|
||||
{
|
||||
le32 v_le;
|
||||
memcpy(&v_le, p, 4);
|
||||
return le32_to_cpu(v_le);
|
||||
}
|
||||
|
||||
static forceinline void put_unaligned_le32(u32 v, u8 *p)
|
||||
{
|
||||
le32 v_le = cpu_to_le32(v);
|
||||
memcpy(p, &v_le, 4);
|
||||
}
|
||||
|
||||
/* Load a "word" with platform-dependent size and endianness. */
|
||||
static forceinline size_t get_unaligned_word(const u8 *p)
|
||||
{
|
||||
size_t v;
|
||||
memcpy(&v, p, WORDBYTES);
|
||||
return v;
|
||||
}
|
||||
|
||||
/* Store a "word" with platform-dependent size and endianness. */
|
||||
static forceinline void put_unaligned_word(size_t v, u8 *p)
|
||||
{
|
||||
memcpy(p, &v, WORDBYTES);
|
||||
}
|
||||
|
||||
/* Copy a "word" with platform-dependent size. */
|
||||
static forceinline void copy_unaligned_word(const u8 *src, u8 *dst)
|
||||
{
|
||||
put_unaligned_word(get_unaligned_word(src), dst);
|
||||
}
|
||||
|
||||
/* Generate a "word" with platform-dependent size whose bytes all contain the
|
||||
* value 'b'. */
|
||||
static forceinline size_t repeat_byte(u8 b)
|
||||
{
|
||||
size_t v;
|
||||
|
||||
v = b;
|
||||
v |= v << 8;
|
||||
v |= v << 16;
|
||||
v |= v << ((WORDBYTES == 8) ? 32 : 0);
|
||||
return v;
|
||||
}
|
||||
/******************************************************************************/
|
||||
/* Input bitstream for XPRESS and LZX */
|
||||
/*----------------------------------------------------------------------------*/
|
||||
|
||||
/* Structure that encapsulates a block of in-memory data being interpreted as a
|
||||
* stream of bits, optionally with interwoven literal bytes. Bits are assumed
|
||||
|
@ -106,18 +43,18 @@ struct input_bitstream {
|
|||
u32 bitbuf;
|
||||
|
||||
/* Number of bits currently held in @bitbuf. */
|
||||
unsigned bitsleft;
|
||||
u32 bitsleft;
|
||||
|
||||
/* Pointer to the next byte to be retrieved from the input buffer. */
|
||||
const u8 *next;
|
||||
|
||||
/* Pointer to just past the end of the input buffer. */
|
||||
/* Pointer past the end of the input buffer. */
|
||||
const u8 *end;
|
||||
};
|
||||
|
||||
/* Initialize a bitstream to read from the specified input buffer. */
|
||||
static forceinline void init_input_bitstream(struct input_bitstream *is,
|
||||
const void *buffer, u32 size)
|
||||
static forceinline void
|
||||
init_input_bitstream(struct input_bitstream *is, const void *buffer, u32 size)
|
||||
{
|
||||
is->bitbuf = 0;
|
||||
is->bitsleft = 0;
|
||||
|
@ -125,39 +62,60 @@ static forceinline void init_input_bitstream(struct input_bitstream *is,
|
|||
is->end = is->next + size;
|
||||
}
|
||||
|
||||
/* Note: for performance reasons, the following methods don't return error codes
|
||||
* to the caller if the input buffer is overrun. Instead, they just assume that
|
||||
* all overrun data is zeroes. This has no effect on well-formed compressed
|
||||
* data. The only disadvantage is that bad compressed data may go undetected,
|
||||
* but even this is irrelevant if higher level code checksums the uncompressed
|
||||
* data anyway. */
|
||||
|
||||
/* Ensure the bit buffer variable for the bitstream contains at least @num_bits
|
||||
* bits. Following this, bitstream_peek_bits() and/or bitstream_remove_bits()
|
||||
* may be called on the bitstream to peek or remove up to @num_bits bits. Note
|
||||
* that @num_bits must be <= 16. */
|
||||
static forceinline void bitstream_ensure_bits(struct input_bitstream *is,
|
||||
unsigned num_bits)
|
||||
* may be called on the bitstream to peek or remove up to @num_bits bits. */
|
||||
static forceinline void
|
||||
bitstream_ensure_bits(struct input_bitstream *is, const unsigned num_bits)
|
||||
{
|
||||
if (is->bitsleft < num_bits) {
|
||||
if (is->end - is->next >= 2) {
|
||||
is->bitbuf |= (u32)get_unaligned_le16(is->next)
|
||||
<< (16 - is->bitsleft);
|
||||
is->next += 2;
|
||||
}
|
||||
is->bitsleft += 16;
|
||||
/* This currently works for at most 17 bits. */
|
||||
|
||||
if (is->bitsleft >= num_bits)
|
||||
return;
|
||||
|
||||
if (unlikely(is->end - is->next < 2))
|
||||
goto overflow;
|
||||
|
||||
is->bitbuf |= (u32)get_unaligned_le16(is->next) << (16 - is->bitsleft);
|
||||
is->next += 2;
|
||||
is->bitsleft += 16;
|
||||
|
||||
if (unlikely(num_bits == 17 && is->bitsleft == 16)) {
|
||||
if (unlikely(is->end - is->next < 2))
|
||||
goto overflow;
|
||||
|
||||
is->bitbuf |= (u32)get_unaligned_le16(is->next);
|
||||
is->next += 2;
|
||||
is->bitsleft = 32;
|
||||
}
|
||||
|
||||
return;
|
||||
|
||||
overflow:
|
||||
is->bitsleft = 32;
|
||||
}
|
||||
|
||||
/* Return the next @num_bits bits from the bitstream, without removing them.
|
||||
* There must be at least @num_bits remaining in the buffer variable, from a
|
||||
* previous call to bitstream_ensure_bits(). */
|
||||
static forceinline u32 bitstream_peek_bits(const struct input_bitstream *is,
|
||||
unsigned num_bits)
|
||||
static forceinline u32
|
||||
bitstream_peek_bits(const struct input_bitstream *is, const unsigned num_bits)
|
||||
{
|
||||
if (num_bits == 0)
|
||||
return 0;
|
||||
return is->bitbuf >> (32 - num_bits);
|
||||
return (is->bitbuf >> 1) >> (sizeof(is->bitbuf) * 8 - num_bits - 1);
|
||||
}
|
||||
|
||||
/* Remove @num_bits from the bitstream. There must be at least @num_bits
|
||||
* remaining in the buffer variable, from a previous call to
|
||||
* bitstream_ensure_bits(). */
|
||||
static forceinline void bitstream_remove_bits(struct input_bitstream *is,
|
||||
unsigned num_bits)
|
||||
static forceinline void
|
||||
bitstream_remove_bits(struct input_bitstream *is, unsigned num_bits)
|
||||
{
|
||||
is->bitbuf <<= num_bits;
|
||||
is->bitsleft -= num_bits;
|
||||
|
@ -166,8 +124,8 @@ static forceinline void bitstream_remove_bits(struct input_bitstream *is,
|
|||
/* Remove and return @num_bits bits from the bitstream. There must be at least
|
||||
* @num_bits remaining in the buffer variable, from a previous call to
|
||||
* bitstream_ensure_bits(). */
|
||||
static forceinline u32 bitstream_pop_bits(struct input_bitstream *is,
|
||||
unsigned num_bits)
|
||||
static forceinline u32
|
||||
bitstream_pop_bits(struct input_bitstream *is, unsigned num_bits)
|
||||
{
|
||||
u32 bits = bitstream_peek_bits(is, num_bits);
|
||||
bitstream_remove_bits(is, num_bits);
|
||||
|
@ -175,27 +133,29 @@ static forceinline u32 bitstream_pop_bits(struct input_bitstream *is,
|
|||
}
|
||||
|
||||
/* Read and return the next @num_bits bits from the bitstream. */
|
||||
static forceinline u32 bitstream_read_bits(struct input_bitstream *is,
|
||||
unsigned num_bits)
|
||||
static forceinline u32
|
||||
bitstream_read_bits(struct input_bitstream *is, unsigned num_bits)
|
||||
{
|
||||
bitstream_ensure_bits(is, num_bits);
|
||||
return bitstream_pop_bits(is, num_bits);
|
||||
}
|
||||
|
||||
/* Read and return the next literal byte embedded in the bitstream. */
|
||||
static forceinline u8 bitstream_read_byte(struct input_bitstream *is)
|
||||
static forceinline u8
|
||||
bitstream_read_byte(struct input_bitstream *is)
|
||||
{
|
||||
if (is->end == is->next)
|
||||
if (unlikely(is->end == is->next))
|
||||
return 0;
|
||||
return *is->next++;
|
||||
}
|
||||
|
||||
/* Read and return the next 16-bit integer embedded in the bitstream. */
|
||||
static forceinline u16 bitstream_read_u16(struct input_bitstream *is)
|
||||
static forceinline u16
|
||||
bitstream_read_u16(struct input_bitstream *is)
|
||||
{
|
||||
u16 v;
|
||||
|
||||
if (is->end - is->next < 2)
|
||||
if (unlikely(is->end - is->next < 2))
|
||||
return 0;
|
||||
v = get_unaligned_le16(is->next);
|
||||
is->next += 2;
|
||||
|
@ -203,11 +163,12 @@ static forceinline u16 bitstream_read_u16(struct input_bitstream *is)
|
|||
}
|
||||
|
||||
/* Read and return the next 32-bit integer embedded in the bitstream. */
|
||||
static forceinline u32 bitstream_read_u32(struct input_bitstream *is)
|
||||
static forceinline u32
|
||||
bitstream_read_u32(struct input_bitstream *is)
|
||||
{
|
||||
u32 v;
|
||||
|
||||
if (is->end - is->next < 4)
|
||||
if (unlikely(is->end - is->next < 4))
|
||||
return 0;
|
||||
v = get_unaligned_le32(is->next);
|
||||
is->next += 4;
|
||||
|
@ -215,161 +176,370 @@ static forceinline u32 bitstream_read_u32(struct input_bitstream *is)
|
|||
}
|
||||
|
||||
/* Read into @dst_buffer an array of literal bytes embedded in the bitstream.
|
||||
* Return either a pointer to the byte past the last written, or NULL if the
|
||||
* read overflows the input buffer. */
|
||||
static forceinline void *bitstream_read_bytes(struct input_bitstream *is,
|
||||
void *dst_buffer, size_t count)
|
||||
* Return 0 if there were enough bytes remaining in the input, otherwise -1. */
|
||||
static forceinline int
|
||||
bitstream_read_bytes(struct input_bitstream *is, void *dst_buffer, size_t count)
|
||||
{
|
||||
if ((size_t)(is->end - is->next) < count)
|
||||
return NULL;
|
||||
if (unlikely(is->end - is->next < count))
|
||||
return -1;
|
||||
memcpy(dst_buffer, is->next, count);
|
||||
is->next += count;
|
||||
return (u8 *)dst_buffer + count;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Align the input bitstream on a coding-unit boundary. */
|
||||
static forceinline void bitstream_align(struct input_bitstream *is)
|
||||
static forceinline void
|
||||
bitstream_align(struct input_bitstream *is)
|
||||
{
|
||||
is->bitsleft = 0;
|
||||
is->bitbuf = 0;
|
||||
}
|
||||
|
||||
extern int make_huffman_decode_table(u16 decode_table[], const unsigned num_syms,
|
||||
const unsigned num_bits, const u8 lens[],
|
||||
const unsigned max_codeword_len,
|
||||
u16 working_space[]);
|
||||
/******************************************************************************/
|
||||
/* Huffman decoding */
|
||||
/*----------------------------------------------------------------------------*/
|
||||
|
||||
/*
|
||||
* Required alignment for the Huffman decode tables. We require this alignment
|
||||
* so that we can fill the entries with vector or word instructions and not have
|
||||
* to deal with misaligned buffers.
|
||||
*/
|
||||
#define DECODE_TABLE_ALIGNMENT 16
|
||||
|
||||
/* Reads and returns the next Huffman-encoded symbol from a bitstream. If the
|
||||
* input data is exhausted, the Huffman symbol is decoded as if the missing bits
|
||||
* are all zeroes. */
|
||||
static forceinline unsigned read_huffsym(struct input_bitstream *istream,
|
||||
const u16 decode_table[],
|
||||
unsigned table_bits,
|
||||
unsigned max_codeword_len)
|
||||
/*
|
||||
* Each decode table entry is 16 bits divided into two fields: 'symbol' (high 12
|
||||
* bits) and 'length' (low 4 bits). The precise meaning of these fields depends
|
||||
* on the type of entry:
|
||||
*
|
||||
* Root table entries which are *not* subtable pointers:
|
||||
* symbol: symbol to decode
|
||||
* length: codeword length in bits
|
||||
*
|
||||
* Root table entries which are subtable pointers:
|
||||
* symbol: index of start of subtable
|
||||
* length: number of bits with which the subtable is indexed
|
||||
*
|
||||
* Subtable entries:
|
||||
* symbol: symbol to decode
|
||||
* length: codeword length in bits, minus the number of bits with which the
|
||||
* root table is indexed
|
||||
*/
|
||||
#define DECODE_TABLE_SYMBOL_SHIFT 4
|
||||
#define DECODE_TABLE_MAX_SYMBOL ((1 << (16 - DECODE_TABLE_SYMBOL_SHIFT)) - 1)
|
||||
#define DECODE_TABLE_MAX_LENGTH ((1 << DECODE_TABLE_SYMBOL_SHIFT) - 1)
|
||||
#define DECODE_TABLE_LENGTH_MASK DECODE_TABLE_MAX_LENGTH
|
||||
#define MAKE_DECODE_TABLE_ENTRY(symbol, length) \
|
||||
(((symbol) << DECODE_TABLE_SYMBOL_SHIFT) | (length))
|
||||
|
||||
/*
|
||||
* Read and return the next Huffman-encoded symbol from the given bitstream
|
||||
* using the given decode table.
|
||||
*
|
||||
* If the input data is exhausted, then the Huffman symbol will be decoded as if
|
||||
* the missing bits were all zeroes.
|
||||
*
|
||||
* XXX: This is mostly duplicated in lzms_decode_huffman_symbol() in
|
||||
* lzms_decompress.c; keep them in sync!
|
||||
*/
|
||||
static forceinline unsigned
|
||||
read_huffsym(struct input_bitstream *is, const u16 decode_table[],
|
||||
unsigned table_bits, unsigned max_codeword_len)
|
||||
{
|
||||
unsigned entry;
|
||||
unsigned key_bits;
|
||||
unsigned symbol;
|
||||
unsigned length;
|
||||
|
||||
bitstream_ensure_bits(istream, max_codeword_len);
|
||||
/* Preload the bitbuffer with 'max_codeword_len' bits so that we're
|
||||
* guaranteed to be able to fully decode a codeword. */
|
||||
bitstream_ensure_bits(is, max_codeword_len);
|
||||
|
||||
/* Index the decode table by the next table_bits bits of the input. */
|
||||
key_bits = bitstream_peek_bits(istream, table_bits);
|
||||
entry = decode_table[key_bits];
|
||||
if (entry < 0xC000) {
|
||||
/* Fast case: The decode table directly provided the
|
||||
* symbol and codeword length. The low 11 bits are the
|
||||
* symbol, and the high 5 bits are the codeword length. */
|
||||
bitstream_remove_bits(istream, entry >> 11);
|
||||
return entry & 0x7FF;
|
||||
} else {
|
||||
/* Slow case: The codeword for the symbol is longer than
|
||||
* table_bits, so the symbol does not have an entry
|
||||
* directly in the first (1 << table_bits) entries of the
|
||||
* decode table. Traverse the appropriate binary tree
|
||||
* bit-by-bit to decode the symbol. */
|
||||
bitstream_remove_bits(istream, table_bits);
|
||||
do {
|
||||
key_bits = (entry & 0x3FFF) + bitstream_pop_bits(istream, 1);
|
||||
} while ((entry = decode_table[key_bits]) >= 0xC000);
|
||||
return entry;
|
||||
/* Index the root table by the next 'table_bits' bits of input. */
|
||||
entry = decode_table[bitstream_peek_bits(is, table_bits)];
|
||||
|
||||
/* Extract the "symbol" and "length" from the entry. */
|
||||
symbol = entry >> DECODE_TABLE_SYMBOL_SHIFT;
|
||||
length = entry & DECODE_TABLE_LENGTH_MASK;
|
||||
|
||||
/* If the root table is indexed by the full 'max_codeword_len' bits,
|
||||
* then there cannot be any subtables, and this will be known at compile
|
||||
* time. Otherwise, we must check whether the decoded symbol is really
|
||||
* a subtable pointer. If so, we must discard the bits with which the
|
||||
* root table was indexed, then index the subtable by the next 'length'
|
||||
* bits of input to get the real entry. */
|
||||
if (max_codeword_len > table_bits &&
|
||||
entry >= (1U << (table_bits + DECODE_TABLE_SYMBOL_SHIFT)))
|
||||
{
|
||||
/* Subtable required */
|
||||
bitstream_remove_bits(is, table_bits);
|
||||
entry = decode_table[symbol + bitstream_peek_bits(is, length)];
|
||||
symbol = entry >> DECODE_TABLE_SYMBOL_SHIFT;
|
||||
length = entry & DECODE_TABLE_LENGTH_MASK;
|
||||
}
|
||||
|
||||
/* Discard the bits (or the remaining bits, if a subtable was required)
|
||||
* of the codeword. */
|
||||
bitstream_remove_bits(is, length);
|
||||
|
||||
/* Return the decoded symbol. */
|
||||
return symbol;
|
||||
}
|
||||
|
||||
/*
|
||||
* Copy an LZ77 match at (dst - offset) to dst.
|
||||
* The DECODE_TABLE_ENOUGH() macro evaluates to the maximum number of decode
|
||||
* table entries, including all subtable entries, that may be required for
|
||||
* decoding a given Huffman code. This depends on three parameters:
|
||||
*
|
||||
* The length and offset must be already validated --- that is, (dst - offset)
|
||||
* can't underrun the output buffer, and (dst + length) can't overrun the output
|
||||
* buffer. Also, the length cannot be 0.
|
||||
* num_syms: the maximum number of symbols in the code
|
||||
* table_bits: the number of bits with which the root table will be indexed
|
||||
* max_codeword_len: the maximum allowed codeword length in the code
|
||||
*
|
||||
* @bufend points to the byte past the end of the output buffer. This function
|
||||
* won't write any data beyond this position.
|
||||
*
|
||||
* Returns dst + length.
|
||||
* Given these parameters, the utility program 'enough' from zlib, when passed
|
||||
* the three arguments 'num_syms', 'table_bits', and 'max_codeword_len', will
|
||||
* compute the maximum number of entries required. This has already been done
|
||||
* for the combinations we need and incorporated into the macro below so that
|
||||
* the mapping can be done at compilation time. If an unknown combination is
|
||||
* used, then a compilation error will result. To fix this, use 'enough' to
|
||||
* find the missing value and add it below. If that still doesn't fix the
|
||||
* compilation error, then most likely a constraint would be violated by the
|
||||
* requested parameters, so they cannot be used, at least without other changes
|
||||
* to the decode table --- see DECODE_TABLE_SIZE().
|
||||
*/
|
||||
static forceinline u8 *lz_copy(u8 *dst, u32 length, u32 offset, const u8 *bufend,
|
||||
u32 min_length)
|
||||
#define DECODE_TABLE_ENOUGH(num_syms, table_bits, max_codeword_len) ( \
|
||||
((num_syms) == 8 && (table_bits) == 7 && (max_codeword_len) == 15) ? 128 : \
|
||||
((num_syms) == 8 && (table_bits) == 5 && (max_codeword_len) == 7) ? 36 : \
|
||||
((num_syms) == 8 && (table_bits) == 6 && (max_codeword_len) == 7) ? 66 : \
|
||||
((num_syms) == 8 && (table_bits) == 7 && (max_codeword_len) == 7) ? 128 : \
|
||||
((num_syms) == 20 && (table_bits) == 5 && (max_codeword_len) == 15) ? 1062 : \
|
||||
((num_syms) == 20 && (table_bits) == 6 && (max_codeword_len) == 15) ? 582 : \
|
||||
((num_syms) == 20 && (table_bits) == 7 && (max_codeword_len) == 15) ? 390 : \
|
||||
((num_syms) == 54 && (table_bits) == 9 && (max_codeword_len) == 15) ? 618 : \
|
||||
((num_syms) == 54 && (table_bits) == 10 && (max_codeword_len) == 15) ? 1098 : \
|
||||
((num_syms) == 249 && (table_bits) == 9 && (max_codeword_len) == 16) ? 878 : \
|
||||
((num_syms) == 249 && (table_bits) == 10 && (max_codeword_len) == 16) ? 1326 : \
|
||||
((num_syms) == 249 && (table_bits) == 11 && (max_codeword_len) == 16) ? 2318 : \
|
||||
((num_syms) == 256 && (table_bits) == 9 && (max_codeword_len) == 15) ? 822 : \
|
||||
((num_syms) == 256 && (table_bits) == 10 && (max_codeword_len) == 15) ? 1302 : \
|
||||
((num_syms) == 256 && (table_bits) == 11 && (max_codeword_len) == 15) ? 2310 : \
|
||||
((num_syms) == 512 && (table_bits) == 10 && (max_codeword_len) == 15) ? 1558 : \
|
||||
((num_syms) == 512 && (table_bits) == 11 && (max_codeword_len) == 15) ? 2566 : \
|
||||
((num_syms) == 512 && (table_bits) == 12 && (max_codeword_len) == 15) ? 4606 : \
|
||||
((num_syms) == 656 && (table_bits) == 10 && (max_codeword_len) == 16) ? 1734 : \
|
||||
((num_syms) == 656 && (table_bits) == 11 && (max_codeword_len) == 16) ? 2726 : \
|
||||
((num_syms) == 656 && (table_bits) == 12 && (max_codeword_len) == 16) ? 4758 : \
|
||||
((num_syms) == 799 && (table_bits) == 9 && (max_codeword_len) == 15) ? 1366 : \
|
||||
((num_syms) == 799 && (table_bits) == 10 && (max_codeword_len) == 15) ? 1846 : \
|
||||
((num_syms) == 799 && (table_bits) == 11 && (max_codeword_len) == 15) ? 2854 : \
|
||||
-1)
|
||||
|
||||
/* Wrapper around DECODE_TABLE_ENOUGH() that does additional compile-time
|
||||
* validation. */
|
||||
#define DECODE_TABLE_SIZE(num_syms, table_bits, max_codeword_len) ( \
|
||||
\
|
||||
/* All values must be positive. */ \
|
||||
STATIC_ASSERT_ZERO((num_syms) > 0) + \
|
||||
STATIC_ASSERT_ZERO((table_bits) > 0) + \
|
||||
STATIC_ASSERT_ZERO((max_codeword_len) > 0) + \
|
||||
\
|
||||
/* There cannot be more symbols than possible codewords. */ \
|
||||
STATIC_ASSERT_ZERO((num_syms) <= 1U << (max_codeword_len)) + \
|
||||
\
|
||||
/* There is no reason for the root table to be indexed with
|
||||
* more bits than the maximum codeword length. */ \
|
||||
STATIC_ASSERT_ZERO((table_bits) <= (max_codeword_len)) + \
|
||||
\
|
||||
/* The maximum symbol value must fit in the 'symbol' field. */ \
|
||||
STATIC_ASSERT_ZERO((num_syms) - 1 <= DECODE_TABLE_MAX_SYMBOL) + \
|
||||
\
|
||||
/* The maximum codeword length in the root table must fit in
|
||||
* the 'length' field. */ \
|
||||
STATIC_ASSERT_ZERO((table_bits) <= DECODE_TABLE_MAX_LENGTH) + \
|
||||
\
|
||||
/* The maximum codeword length in a subtable must fit in the
|
||||
* 'length' field. */ \
|
||||
STATIC_ASSERT_ZERO((max_codeword_len) - (table_bits) <= \
|
||||
DECODE_TABLE_MAX_LENGTH) + \
|
||||
\
|
||||
/* The minimum subtable index must be greater than the maximum
|
||||
* symbol value. If this were not the case, then there would
|
||||
* be no way to tell whether a given root table entry is a
|
||||
* "subtable pointer" or not. (An alternate solution would be
|
||||
* to reserve a flag bit specifically for this purpose.) */ \
|
||||
STATIC_ASSERT_ZERO((1U << table_bits) > (num_syms) - 1) + \
|
||||
\
|
||||
/* The needed 'enough' value must have been defined. */ \
|
||||
STATIC_ASSERT_ZERO(DECODE_TABLE_ENOUGH( \
|
||||
(num_syms), (table_bits), \
|
||||
(max_codeword_len)) > 0) + \
|
||||
\
|
||||
/* The maximum subtable index must fit in the 'symbol' field. */\
|
||||
STATIC_ASSERT_ZERO(DECODE_TABLE_ENOUGH( \
|
||||
(num_syms), (table_bits), \
|
||||
(max_codeword_len)) - 1 <= \
|
||||
DECODE_TABLE_MAX_SYMBOL) + \
|
||||
\
|
||||
/* Finally, make the macro evaluate to the needed maximum
|
||||
* number of decode table entries. */ \
|
||||
DECODE_TABLE_ENOUGH((num_syms), (table_bits), \
|
||||
(max_codeword_len)) \
|
||||
)
|
||||
|
||||
/*
|
||||
* Declare the decode table for a Huffman code, given several compile-time
|
||||
* constants that describe the code. See DECODE_TABLE_ENOUGH() for details.
|
||||
*
|
||||
* Decode tables must be aligned to a DECODE_TABLE_ALIGNMENT-byte boundary.
|
||||
* This implies that if a decode table is nested inside a dynamically allocated
|
||||
* structure, then the outer structure must be allocated on a
|
||||
* DECODE_TABLE_ALIGNMENT-byte aligned boundary as well.
|
||||
*/
|
||||
#define DECODE_TABLE(name, num_syms, table_bits, max_codeword_len) \
|
||||
u16 name[DECODE_TABLE_SIZE((num_syms), (table_bits), \
|
||||
(max_codeword_len))] \
|
||||
_aligned_attribute(DECODE_TABLE_ALIGNMENT)
|
||||
|
||||
/*
|
||||
* Declare the temporary "working_space" array needed for building the decode
|
||||
* table for a Huffman code.
|
||||
*/
|
||||
#define DECODE_TABLE_WORKING_SPACE(name, num_syms, max_codeword_len) \
|
||||
u16 name[2 * ((max_codeword_len) + 1) + (num_syms)];
|
||||
|
||||
extern int
|
||||
make_huffman_decode_table(u16 decode_table[], unsigned num_syms,
|
||||
unsigned table_bits, const u8 lens[],
|
||||
unsigned max_codeword_len, u16 working_space[]);
|
||||
|
||||
/******************************************************************************/
|
||||
/* LZ match copying */
|
||||
/*----------------------------------------------------------------------------*/
|
||||
|
||||
static forceinline void
|
||||
copy_word_unaligned(const void *src, void *dst)
|
||||
{
|
||||
const u8 *src = dst - offset;
|
||||
store_word_unaligned(load_word_unaligned(src), dst);
|
||||
}
|
||||
|
||||
static forceinline machine_word_t
|
||||
repeat_u16(u16 b)
|
||||
{
|
||||
machine_word_t v = b;
|
||||
|
||||
STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64);
|
||||
v |= v << 16;
|
||||
v |= v << ((WORDBITS == 64) ? 32 : 0);
|
||||
return v;
|
||||
}
|
||||
|
||||
static forceinline machine_word_t
|
||||
repeat_byte(u8 b)
|
||||
{
|
||||
return repeat_u16(((u16)b << 8) | b);
|
||||
}
|
||||
|
||||
/*
|
||||
* Copy an LZ77 match of 'length' bytes from the match source at 'out_next -
|
||||
* offset' to the match destination at 'out_next'. The source and destination
|
||||
* may overlap.
|
||||
*
|
||||
* This handles validating the length and offset. It is validated that the
|
||||
* beginning of the match source is '>= out_begin' and that end of the match
|
||||
* destination is '<= out_end'. The return value is 0 if the match was valid
|
||||
* (and was copied), otherwise -1.
|
||||
*
|
||||
* 'min_length' is a hint which specifies the minimum possible match length.
|
||||
* This should be a compile-time constant.
|
||||
*/
|
||||
static forceinline int
|
||||
lz_copy(u32 length, u32 offset, u8 *out_begin, u8 *out_next, u8 *out_end,
|
||||
u32 min_length)
|
||||
{
|
||||
const u8 *src;
|
||||
u8 *end;
|
||||
|
||||
/* Validate the offset. */
|
||||
if (unlikely(offset > out_next - out_begin))
|
||||
return -1;
|
||||
|
||||
/*
|
||||
* Try to copy one machine word at a time. On i386 and x86_64 this is
|
||||
* faster than copying one byte at a time, unless the data is
|
||||
* near-random and all the matches have very short lengths. Note that
|
||||
* since this requires unaligned memory accesses, it won't necessarily
|
||||
* be faster on every architecture.
|
||||
* Fast path: copy a match which is no longer than a few words, is not
|
||||
* overlapped such that copying a word at a time would produce incorrect
|
||||
* results, and is not too close to the end of the buffer. Note that
|
||||
* this might copy more than the length of the match, but that's okay in
|
||||
* this scenario.
|
||||
*/
|
||||
src = out_next - offset;
|
||||
if (UNALIGNED_ACCESS_IS_FAST && length <= 3 * WORDBYTES &&
|
||||
offset >= WORDBYTES && out_end - out_next >= 3 * WORDBYTES)
|
||||
{
|
||||
copy_word_unaligned(src + WORDBYTES*0, out_next + WORDBYTES*0);
|
||||
copy_word_unaligned(src + WORDBYTES*1, out_next + WORDBYTES*1);
|
||||
copy_word_unaligned(src + WORDBYTES*2, out_next + WORDBYTES*2);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Validate the length. This isn't needed in the fast path above, due
|
||||
* to the additional conditions tested, but we do need it here. */
|
||||
if (unlikely(length > out_end - out_next))
|
||||
return -1;
|
||||
end = out_next + length;
|
||||
|
||||
/*
|
||||
* Try to copy one word at a time. On i386 and x86_64 this is faster
|
||||
* than copying one byte at a time, unless the data is near-random and
|
||||
* all the matches have very short lengths. Note that since this
|
||||
* requires unaligned memory accesses, it won't necessarily be faster on
|
||||
* every architecture.
|
||||
*
|
||||
* Also note that we might copy more than the length of the match. For
|
||||
* example, if a word is 8 bytes and the match is of length 5, then
|
||||
* we'll simply copy 8 bytes. This is okay as long as we don't write
|
||||
* beyond the end of the output buffer, hence the check for (bufend -
|
||||
* beyond the end of the output buffer, hence the check for (out_end -
|
||||
* end >= WORDBYTES - 1).
|
||||
*/
|
||||
#ifdef FAST_UNALIGNED_ACCESS
|
||||
u8 * const end = dst + length;
|
||||
if (bufend - end >= (ptrdiff_t)(WORDBYTES - 1)) {
|
||||
|
||||
if (UNALIGNED_ACCESS_IS_FAST && likely(out_end - end >= WORDBYTES - 1))
|
||||
{
|
||||
if (offset >= WORDBYTES) {
|
||||
/* The source and destination words don't overlap. */
|
||||
|
||||
/* To improve branch prediction, one iteration of this
|
||||
* loop is unrolled. Most matches are short and will
|
||||
* fail the first check. But if that check passes, then
|
||||
* it becomes increasing likely that the match is long
|
||||
* and we'll need to continue copying. */
|
||||
|
||||
copy_unaligned_word(src, dst);
|
||||
src += WORDBYTES;
|
||||
dst += WORDBYTES;
|
||||
|
||||
if (dst < end) {
|
||||
do {
|
||||
copy_unaligned_word(src, dst);
|
||||
src += WORDBYTES;
|
||||
dst += WORDBYTES;
|
||||
} while (dst < end);
|
||||
}
|
||||
return end;
|
||||
/* The source and destination words don't overlap. */
|
||||
do {
|
||||
copy_word_unaligned(src, out_next);
|
||||
src += WORDBYTES;
|
||||
out_next += WORDBYTES;
|
||||
} while (out_next < end);
|
||||
return 0;
|
||||
} else if (offset == 1) {
|
||||
|
||||
/* Offset 1 matches are equivalent to run-length
|
||||
* encoding of the previous byte. This case is common
|
||||
* if the data contains many repeated bytes. */
|
||||
|
||||
size_t v = repeat_byte(*(dst - 1));
|
||||
* if the data contains many repeated bytes. */
|
||||
machine_word_t v = repeat_byte(*(out_next - 1));
|
||||
do {
|
||||
put_unaligned_word(v, dst);
|
||||
store_word_unaligned(v, out_next);
|
||||
src += WORDBYTES;
|
||||
dst += WORDBYTES;
|
||||
} while (dst < end);
|
||||
return end;
|
||||
out_next += WORDBYTES;
|
||||
} while (out_next < end);
|
||||
return 0;
|
||||
}
|
||||
/*
|
||||
* We don't bother with special cases for other 'offset <
|
||||
* WORDBYTES', which are usually rarer than 'offset == 1'. Extra
|
||||
* checks will just slow things down. Actually, it's possible
|
||||
* to handle all the 'offset < WORDBYTES' cases using the same
|
||||
* code, but it still becomes more complicated doesn't seem any
|
||||
* faster overall; it definitely slows down the more common
|
||||
* 'offset == 1' case.
|
||||
* WORDBYTES', which are usually rarer than 'offset == 1'.
|
||||
* Extra checks will just slow things down. Actually, it's
|
||||
* possible to handle all the 'offset < WORDBYTES' cases using
|
||||
* the same code, but it still becomes more complicated doesn't
|
||||
* seem any faster overall; it definitely slows down the more
|
||||
* common 'offset == 1' case.
|
||||
*/
|
||||
}
|
||||
#endif /* FAST_UNALIGNED_ACCESS */
|
||||
|
||||
/* Fall back to a bytewise copy. */
|
||||
|
||||
if (min_length >= 2) {
|
||||
*dst++ = *src++;
|
||||
length--;
|
||||
}
|
||||
if (min_length >= 3) {
|
||||
*dst++ = *src++;
|
||||
length--;
|
||||
}
|
||||
if (min_length >= 2)
|
||||
*out_next++ = *src++;
|
||||
if (min_length >= 3)
|
||||
*out_next++ = *src++;
|
||||
if (min_length >= 4)
|
||||
*out_next++ = *src++;
|
||||
do {
|
||||
*dst++ = *src++;
|
||||
} while (--length);
|
||||
|
||||
return dst;
|
||||
*out_next++ = *src++;
|
||||
} while (out_next != end);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif /* _DECOMPRESS_COMMON_H */
|
||||
|
|
|
@ -0,0 +1,324 @@
|
|||
/*
|
||||
* lzx_common.c - Common code for LZX compression and decompression.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (C) 2012-2016 Eric Biggers
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify it under
|
||||
* the terms of the GNU General Public License as published by the Free Software
|
||||
* Foundation, either version 2 of the License, or (at your option) any later
|
||||
* version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along with
|
||||
* this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
# include "config.h"
|
||||
#endif
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#ifdef __SSE2__
|
||||
# include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
#ifdef __AVX2__
|
||||
# include <immintrin.h>
|
||||
#endif
|
||||
|
||||
#include "common_defs.h"
|
||||
#include "lzx_common.h"
|
||||
|
||||
/* Mapping: offset slot => first match offset that uses that offset slot.
|
||||
* The offset slots for repeat offsets map to "fake" offsets < 1. */
|
||||
const s32 lzx_offset_slot_base[LZX_MAX_OFFSET_SLOTS + 1] = {
|
||||
-2 , -1 , 0 , 1 , 2 , /* 0 --- 4 */
|
||||
4 , 6 , 10 , 14 , 22 , /* 5 --- 9 */
|
||||
30 , 46 , 62 , 94 , 126 , /* 10 --- 14 */
|
||||
190 , 254 , 382 , 510 , 766 , /* 15 --- 19 */
|
||||
1022 , 1534 , 2046 , 3070 , 4094 , /* 20 --- 24 */
|
||||
6142 , 8190 , 12286 , 16382 , 24574 , /* 25 --- 29 */
|
||||
32766 , 49150 , 65534 , 98302 , 131070 , /* 30 --- 34 */
|
||||
196606 , 262142 , 393214 , 524286 , 655358 , /* 35 --- 39 */
|
||||
786430 , 917502 , 1048574, 1179646, 1310718, /* 40 --- 44 */
|
||||
1441790, 1572862, 1703934, 1835006, 1966078, /* 45 --- 49 */
|
||||
2097150 /* extra */
|
||||
};
|
||||
|
||||
/* Mapping: offset slot => how many extra bits must be read and added to the
|
||||
* corresponding offset slot base to decode the match offset. */
|
||||
const u8 lzx_extra_offset_bits[LZX_MAX_OFFSET_SLOTS] = {
|
||||
0 , 0 , 0 , 0 , 1 ,
|
||||
1 , 2 , 2 , 3 , 3 ,
|
||||
4 , 4 , 5 , 5 , 6 ,
|
||||
6 , 7 , 7 , 8 , 8 ,
|
||||
9 , 9 , 10, 10, 11,
|
||||
11, 12, 12, 13, 13,
|
||||
14, 14, 15, 15, 16,
|
||||
16, 17, 17, 17, 17,
|
||||
17, 17, 17, 17, 17,
|
||||
17, 17, 17, 17, 17,
|
||||
};
|
||||
|
||||
/* Round the specified buffer size up to the next valid LZX window size, and
|
||||
* return its order (log2). Or, if the buffer size is 0 or greater than the
|
||||
* largest valid LZX window size, return 0. */
|
||||
unsigned
|
||||
lzx_get_window_order(size_t max_bufsize)
|
||||
{
|
||||
if (max_bufsize == 0 || max_bufsize > LZX_MAX_WINDOW_SIZE)
|
||||
return 0;
|
||||
|
||||
return max(ilog2_ceil(max_bufsize), LZX_MIN_WINDOW_ORDER);
|
||||
}
|
||||
|
||||
/* Given a valid LZX window order, return the number of symbols that will exist
|
||||
* in the main Huffman code. */
|
||||
unsigned
|
||||
lzx_get_num_main_syms(unsigned window_order)
|
||||
{
|
||||
/* Note: one would expect that the maximum match offset would be
|
||||
* 'window_size - LZX_MIN_MATCH_LEN', which would occur if the first two
|
||||
* bytes were to match the last two bytes. However, the format
|
||||
* disallows this case. This reduces the number of needed offset slots
|
||||
* by 1. */
|
||||
u32 window_size = (u32)1 << window_order;
|
||||
u32 max_offset = window_size - LZX_MIN_MATCH_LEN - 1;
|
||||
unsigned num_offset_slots = 30;
|
||||
while (max_offset >= lzx_offset_slot_base[num_offset_slots])
|
||||
num_offset_slots++;
|
||||
|
||||
return LZX_NUM_CHARS + (num_offset_slots * LZX_NUM_LEN_HEADERS);
|
||||
}
|
||||
|
||||
static void
|
||||
do_translate_target(void *target, s32 input_pos)
|
||||
{
|
||||
s32 abs_offset, rel_offset;
|
||||
|
||||
rel_offset = get_unaligned_le32(target);
|
||||
if (rel_offset >= -input_pos && rel_offset < LZX_WIM_MAGIC_FILESIZE) {
|
||||
if (rel_offset < LZX_WIM_MAGIC_FILESIZE - input_pos) {
|
||||
/* "good translation" */
|
||||
abs_offset = rel_offset + input_pos;
|
||||
} else {
|
||||
/* "compensating translation" */
|
||||
abs_offset = rel_offset - LZX_WIM_MAGIC_FILESIZE;
|
||||
}
|
||||
put_unaligned_le32(abs_offset, target);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
undo_translate_target(void *target, s32 input_pos)
|
||||
{
|
||||
s32 abs_offset, rel_offset;
|
||||
|
||||
abs_offset = get_unaligned_le32(target);
|
||||
if (abs_offset >= 0) {
|
||||
if (abs_offset < LZX_WIM_MAGIC_FILESIZE) {
|
||||
/* "good translation" */
|
||||
rel_offset = abs_offset - input_pos;
|
||||
put_unaligned_le32(rel_offset, target);
|
||||
}
|
||||
} else {
|
||||
if (abs_offset >= -input_pos) {
|
||||
/* "compensating translation" */
|
||||
rel_offset = abs_offset + LZX_WIM_MAGIC_FILESIZE;
|
||||
put_unaligned_le32(rel_offset, target);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Do or undo the 'E8' preprocessing used in LZX. Before compression, the
|
||||
* uncompressed data is preprocessed by changing the targets of x86 CALL
|
||||
* instructions from relative offsets to absolute offsets. After decompression,
|
||||
* the translation is undone by changing the targets of x86 CALL instructions
|
||||
* from absolute offsets to relative offsets.
|
||||
*
|
||||
* Note that despite its intent, E8 preprocessing can be done on any data even
|
||||
* if it is not actually x86 machine code. In fact, E8 preprocessing appears to
|
||||
* always be used in LZX-compressed resources in WIM files; there is no bit to
|
||||
* indicate whether it is used or not, unlike in the LZX compressed format as
|
||||
* used in cabinet files, where a bit is reserved for that purpose.
|
||||
*
|
||||
* E8 preprocessing is disabled in the last 6 bytes of the uncompressed data,
|
||||
* which really means the 5-byte call instruction cannot start in the last 10
|
||||
* bytes of the uncompressed data. This is one of the errors in the LZX
|
||||
* documentation.
|
||||
*
|
||||
* E8 preprocessing does not appear to be disabled after the 32768th chunk of a
|
||||
* WIM resource, which apparently is another difference from the LZX compression
|
||||
* used in cabinet files.
|
||||
*
|
||||
* E8 processing is supposed to take the file size as a parameter, as it is used
|
||||
* in calculating the translated jump targets. But in WIM files, this file size
|
||||
* is always the same (LZX_WIM_MAGIC_FILESIZE == 12000000).
|
||||
*/
|
||||
static void
|
||||
lzx_e8_filter(u8 *data, u32 size, void (*process_target)(void *, s32))
|
||||
{
|
||||
|
||||
#if !defined(__SSE2__) && !defined(__AVX2__)
|
||||
/*
|
||||
* A worthwhile optimization is to push the end-of-buffer check into the
|
||||
* relatively rare E8 case. This is possible if we replace the last six
|
||||
* bytes of data with E8 bytes; then we are guaranteed to hit an E8 byte
|
||||
* before reaching end-of-buffer. In addition, this scheme guarantees
|
||||
* that no translation can begin following an E8 byte in the last 10
|
||||
* bytes because a 4-byte offset containing E8 as its high byte is a
|
||||
* large negative number that is not valid for translation. That is
|
||||
* exactly what we need.
|
||||
*/
|
||||
u8 *tail;
|
||||
u8 saved_bytes[6];
|
||||
u8 *p;
|
||||
|
||||
if (size <= 10)
|
||||
return;
|
||||
|
||||
tail = &data[size - 6];
|
||||
memcpy(saved_bytes, tail, 6);
|
||||
memset(tail, 0xE8, 6);
|
||||
p = data;
|
||||
for (;;) {
|
||||
while (*p != 0xE8)
|
||||
p++;
|
||||
if (p >= tail)
|
||||
break;
|
||||
(*process_target)(p + 1, p - data);
|
||||
p += 5;
|
||||
}
|
||||
memcpy(tail, saved_bytes, 6);
|
||||
#else
|
||||
/* SSE2 or AVX-2 optimized version for x86_64 */
|
||||
|
||||
u8 *p = data;
|
||||
u64 valid_mask = ~0;
|
||||
|
||||
if (size <= 10)
|
||||
return;
|
||||
#ifdef __AVX2__
|
||||
# define ALIGNMENT_REQUIRED 32
|
||||
#else
|
||||
# define ALIGNMENT_REQUIRED 16
|
||||
#endif
|
||||
|
||||
/* Process one byte at a time until the pointer is properly aligned. */
|
||||
while ((uintptr_t)p % ALIGNMENT_REQUIRED != 0) {
|
||||
if (p >= data + size - 10)
|
||||
return;
|
||||
if (*p == 0xE8 && (valid_mask & 1)) {
|
||||
(*process_target)(p + 1, p - data);
|
||||
valid_mask &= ~0x1F;
|
||||
}
|
||||
p++;
|
||||
valid_mask >>= 1;
|
||||
valid_mask |= (u64)1 << 63;
|
||||
}
|
||||
|
||||
if (data + size - p >= 64) {
|
||||
|
||||
/* Vectorized processing */
|
||||
|
||||
/* Note: we use a "trap" E8 byte to eliminate the need to check
|
||||
* for end-of-buffer in the inner loop. This byte is carefully
|
||||
* positioned so that it will never be changed by a previous
|
||||
* translation before it is detected. */
|
||||
|
||||
u8 *trap = p + ((data + size - p) & ~31) - 32 + 4;
|
||||
u8 saved_byte = *trap;
|
||||
*trap = 0xE8;
|
||||
|
||||
for (;;) {
|
||||
u32 e8_mask;
|
||||
u8 *orig_p = p;
|
||||
#ifdef __AVX2__
|
||||
const __m256i e8_bytes = _mm256_set1_epi8(0xE8);
|
||||
for (;;) {
|
||||
__m256i bytes = *(const __m256i *)p;
|
||||
__m256i cmpresult = _mm256_cmpeq_epi8(bytes, e8_bytes);
|
||||
e8_mask = _mm256_movemask_epi8(cmpresult);
|
||||
if (e8_mask)
|
||||
break;
|
||||
p += 32;
|
||||
}
|
||||
#else
|
||||
const __m128i e8_bytes = _mm_set1_epi8(0xE8);
|
||||
for (;;) {
|
||||
/* Read the next 32 bytes of data and test them
|
||||
* for E8 bytes. */
|
||||
__m128i bytes1 = *(const __m128i *)p;
|
||||
__m128i bytes2 = *(const __m128i *)(p + 16);
|
||||
__m128i cmpresult1 = _mm_cmpeq_epi8(bytes1, e8_bytes);
|
||||
__m128i cmpresult2 = _mm_cmpeq_epi8(bytes2, e8_bytes);
|
||||
u32 mask1 = _mm_movemask_epi8(cmpresult1);
|
||||
u32 mask2 = _mm_movemask_epi8(cmpresult2);
|
||||
/* The masks have a bit set for each E8 byte.
|
||||
* We stay in this fast inner loop as long as
|
||||
* there are no E8 bytes. */
|
||||
if (mask1 | mask2) {
|
||||
e8_mask = mask1 | (mask2 << 16);
|
||||
break;
|
||||
}
|
||||
p += 32;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Did we pass over data with no E8 bytes? */
|
||||
if (p != orig_p)
|
||||
valid_mask = ~0;
|
||||
|
||||
/* Are we nearing end-of-buffer? */
|
||||
if (p == trap - 4)
|
||||
break;
|
||||
|
||||
/* Process the E8 bytes. However, the AND with
|
||||
* 'valid_mask' ensures we never process an E8 byte that
|
||||
* was itself part of a translation target. */
|
||||
while ((e8_mask &= valid_mask)) {
|
||||
unsigned bit = bsf32(e8_mask);
|
||||
(*process_target)(p + bit + 1, p + bit - data);
|
||||
valid_mask &= ~((u64)0x1F << bit);
|
||||
}
|
||||
|
||||
valid_mask >>= 32;
|
||||
valid_mask |= 0xFFFFFFFF00000000;
|
||||
p += 32;
|
||||
}
|
||||
|
||||
*trap = saved_byte;
|
||||
}
|
||||
|
||||
/* Approaching the end of the buffer; process one byte a time. */
|
||||
while (p < data + size - 10) {
|
||||
if (*p == 0xE8 && (valid_mask & 1)) {
|
||||
(*process_target)(p + 1, p - data);
|
||||
valid_mask &= ~0x1F;
|
||||
}
|
||||
p++;
|
||||
valid_mask >>= 1;
|
||||
valid_mask |= (u64)1 << 63;
|
||||
}
|
||||
#endif /* __SSE2__ || __AVX2__ */
|
||||
}
|
||||
|
||||
void
|
||||
lzx_preprocess(u8 *data, u32 size)
|
||||
{
|
||||
lzx_e8_filter(data, size, do_translate_target);
|
||||
}
|
||||
|
||||
void
|
||||
lzx_postprocess(u8 *data, u32 size)
|
||||
{
|
||||
lzx_e8_filter(data, size, undo_translate_target);
|
||||
}
|
|
@ -0,0 +1,29 @@
|
|||
/*
|
||||
* lzx_common.h
|
||||
*
|
||||
* Declarations shared between LZX compression and decompression.
|
||||
*/
|
||||
|
||||
#ifndef _LZX_COMMON_H
|
||||
#define _LZX_COMMON_H
|
||||
|
||||
#include "lzx_constants.h"
|
||||
#include "common_defs.h"
|
||||
|
||||
extern const s32 lzx_offset_slot_base[LZX_MAX_OFFSET_SLOTS + 1];
|
||||
|
||||
extern const u8 lzx_extra_offset_bits[LZX_MAX_OFFSET_SLOTS];
|
||||
|
||||
extern unsigned
|
||||
lzx_get_window_order(size_t max_bufsize);
|
||||
|
||||
extern unsigned
|
||||
lzx_get_num_main_syms(unsigned window_order);
|
||||
|
||||
extern void
|
||||
lzx_preprocess(u8 *data, u32 size);
|
||||
|
||||
extern void
|
||||
lzx_postprocess(u8 *data, u32 size);
|
||||
|
||||
#endif /* _LZX_COMMON_H */
|
|
@ -0,0 +1,103 @@
|
|||
/*
|
||||
* lzx_constants.h
|
||||
*
|
||||
* Constants for the LZX compression format.
|
||||
*/
|
||||
|
||||
#ifndef _LZX_CONSTANTS_H
|
||||
#define _LZX_CONSTANTS_H
|
||||
|
||||
/* Number of literal byte values. */
|
||||
#define LZX_NUM_CHARS 256
|
||||
|
||||
/* The smallest and largest allowed match lengths. */
|
||||
#define LZX_MIN_MATCH_LEN 2
|
||||
#define LZX_MAX_MATCH_LEN 257
|
||||
|
||||
/* Number of distinct match lengths that can be represented. */
|
||||
#define LZX_NUM_LENS (LZX_MAX_MATCH_LEN - LZX_MIN_MATCH_LEN + 1)
|
||||
|
||||
/* Number of match lengths for which no length symbol is required. */
|
||||
#define LZX_NUM_PRIMARY_LENS 7
|
||||
#define LZX_NUM_LEN_HEADERS (LZX_NUM_PRIMARY_LENS + 1)
|
||||
|
||||
/* Valid values of the 3-bit block type field. */
|
||||
#define LZX_BLOCKTYPE_VERBATIM 1
|
||||
#define LZX_BLOCKTYPE_ALIGNED 2
|
||||
#define LZX_BLOCKTYPE_UNCOMPRESSED 3
|
||||
|
||||
/* 'LZX_MIN_WINDOW_SIZE' and 'LZX_MAX_WINDOW_SIZE' are the minimum and maximum
|
||||
* sizes of the sliding window. */
|
||||
#define LZX_MIN_WINDOW_ORDER 15
|
||||
#define LZX_MAX_WINDOW_ORDER 21
|
||||
#define LZX_MIN_WINDOW_SIZE (1UL << LZX_MIN_WINDOW_ORDER) /* 32768 */
|
||||
#define LZX_MAX_WINDOW_SIZE (1UL << LZX_MAX_WINDOW_ORDER) /* 2097152 */
|
||||
|
||||
/* Maximum number of offset slots. (The actual number of offset slots depends
|
||||
* on the window size.) */
|
||||
#define LZX_MAX_OFFSET_SLOTS 50
|
||||
|
||||
/* Maximum number of symbols in the main code. (The actual number of symbols in
|
||||
* the main code depends on the window size.) */
|
||||
#define LZX_MAINCODE_MAX_NUM_SYMBOLS \
|
||||
(LZX_NUM_CHARS + (LZX_MAX_OFFSET_SLOTS * LZX_NUM_LEN_HEADERS))
|
||||
|
||||
/* Number of symbols in the length code. */
|
||||
#define LZX_LENCODE_NUM_SYMBOLS (LZX_NUM_LENS - LZX_NUM_PRIMARY_LENS)
|
||||
|
||||
/* Number of symbols in the pre-code. */
|
||||
#define LZX_PRECODE_NUM_SYMBOLS 20
|
||||
|
||||
/* Number of bits in which each pre-code codeword length is represented. */
|
||||
#define LZX_PRECODE_ELEMENT_SIZE 4
|
||||
|
||||
/* Number of low-order bits of each match offset that are entropy-encoded in
|
||||
* aligned offset blocks. */
|
||||
#define LZX_NUM_ALIGNED_OFFSET_BITS 3
|
||||
|
||||
/* Number of symbols in the aligned offset code. */
|
||||
#define LZX_ALIGNEDCODE_NUM_SYMBOLS (1 << LZX_NUM_ALIGNED_OFFSET_BITS)
|
||||
|
||||
/* Mask for the match offset bits that are entropy-encoded in aligned offset
|
||||
* blocks. */
|
||||
#define LZX_ALIGNED_OFFSET_BITMASK ((1 << LZX_NUM_ALIGNED_OFFSET_BITS) - 1)
|
||||
|
||||
/* Number of bits in which each aligned offset codeword length is represented. */
|
||||
#define LZX_ALIGNEDCODE_ELEMENT_SIZE 3
|
||||
|
||||
/* The first offset slot which requires an aligned offset symbol in aligned
|
||||
* offset blocks. */
|
||||
#define LZX_MIN_ALIGNED_OFFSET_SLOT 8
|
||||
|
||||
/* The offset slot base for LZX_MIN_ALIGNED_OFFSET_SLOT. */
|
||||
#define LZX_MIN_ALIGNED_OFFSET 14
|
||||
|
||||
/* The maximum number of extra offset bits in verbatim blocks. (One would need
|
||||
* to subtract LZX_NUM_ALIGNED_OFFSET_BITS to get the number of extra offset
|
||||
* bits in *aligned* blocks.) */
|
||||
#define LZX_MAX_NUM_EXTRA_BITS 17
|
||||
|
||||
/* Maximum lengths (in bits) for length-limited Huffman code construction. */
|
||||
#define LZX_MAX_MAIN_CODEWORD_LEN 16
|
||||
#define LZX_MAX_LEN_CODEWORD_LEN 16
|
||||
#define LZX_MAX_PRE_CODEWORD_LEN ((1 << LZX_PRECODE_ELEMENT_SIZE) - 1)
|
||||
#define LZX_MAX_ALIGNED_CODEWORD_LEN ((1 << LZX_ALIGNEDCODE_ELEMENT_SIZE) - 1)
|
||||
|
||||
/* For LZX-compressed blocks in WIM resources, this value is always used as the
|
||||
* filesize parameter for the call instruction (0xe8 byte) preprocessing, even
|
||||
* though the blocks themselves are not this size, and the size of the actual
|
||||
* file resource in the WIM file is very likely to be something entirely
|
||||
* different as well. */
|
||||
#define LZX_WIM_MAGIC_FILESIZE 12000000
|
||||
|
||||
/* Assumed LZX block size when the encoded block size begins with a 0 bit.
|
||||
* This is probably WIM-specific. */
|
||||
#define LZX_DEFAULT_BLOCK_SIZE 32768
|
||||
|
||||
/* Number of offsets in the recent (or "repeat") offsets queue. */
|
||||
#define LZX_NUM_RECENT_OFFSETS 3
|
||||
|
||||
/* An offset of n bytes is actually encoded as (n + LZX_OFFSET_ADJUSTMENT). */
|
||||
#define LZX_OFFSET_ADJUSTMENT (LZX_NUM_RECENT_OFFSETS - 1)
|
||||
|
||||
#endif /* _LZX_CONSTANTS_H */
|
|
@ -1,10 +1,11 @@
|
|||
/*
|
||||
* lzx_decompress.c - A decompressor for the LZX compression format, which can
|
||||
* be used in "System Compressed" files. This is based on the code from wimlib.
|
||||
* This code only supports a window size (dictionary size) of 32768 bytes, since
|
||||
* this is the only size used in System Compression.
|
||||
* lzx_decompress.c
|
||||
*
|
||||
* Copyright (C) 2015 Eric Biggers
|
||||
* A decompressor for the LZX compression format, as used in WIM files.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (C) 2012-2016 Eric Biggers
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify it under
|
||||
* the terms of the GNU General Public License as published by the Free Software
|
||||
|
@ -20,276 +21,157 @@
|
|||
* this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
/*
|
||||
* LZX is an LZ77 and Huffman-code based compression format that has many
|
||||
* similarities to DEFLATE (the format used by zlib/gzip). The compression
|
||||
* ratio is as good or better than DEFLATE. See lzx_compress.c for a format
|
||||
* overview, and see https://en.wikipedia.org/wiki/LZX_(algorithm) for a
|
||||
* historical overview. Here I make some pragmatic notes.
|
||||
*
|
||||
* The old specification for LZX is the document "Microsoft LZX Data Compression
|
||||
* Format" (1997). It defines the LZX format as used in cabinet files. Allowed
|
||||
* window sizes are 2^n where 15 <= n <= 21. However, this document contains
|
||||
* several errors, so don't read too much into it...
|
||||
*
|
||||
* The new specification for LZX is the document "[MS-PATCH]: LZX DELTA
|
||||
* Compression and Decompression" (2014). It defines the LZX format as used by
|
||||
* Microsoft's binary patcher. It corrects several errors in the 1997 document
|
||||
* and extends the format in several ways --- namely, optional reference data,
|
||||
* up to 2^25 byte windows, and longer match lengths.
|
||||
*
|
||||
* WIM files use a more restricted form of LZX. No LZX DELTA extensions are
|
||||
* present, the window is not "sliding", E8 preprocessing is done
|
||||
* unconditionally with a fixed file size, and the maximum window size is always
|
||||
* 2^15 bytes (equal to the size of each "chunk" in a compressed WIM resource).
|
||||
* This code is primarily intended to implement this form of LZX. But although
|
||||
* not compatible with WIMGAPI, this code also supports maximum window sizes up
|
||||
* to 2^21 bytes.
|
||||
*
|
||||
* TODO: Add support for window sizes up to 2^25 bytes.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
# include "config.h"
|
||||
#endif
|
||||
|
||||
#include <errno.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <ntfs-3g/misc.h>
|
||||
|
||||
#include "decompress_common.h"
|
||||
#include "lzx_common.h"
|
||||
#include "system_compression.h"
|
||||
|
||||
/* Number of literal byte values */
|
||||
#define LZX_NUM_CHARS 256
|
||||
|
||||
/* The smallest and largest allowed match lengths */
|
||||
#define LZX_MIN_MATCH_LEN 2
|
||||
#define LZX_MAX_MATCH_LEN 257
|
||||
|
||||
/* Number of distinct match lengths that can be represented */
|
||||
#define LZX_NUM_LENS (LZX_MAX_MATCH_LEN - LZX_MIN_MATCH_LEN + 1)
|
||||
|
||||
/* Number of match lengths for which no length symbol is required */
|
||||
#define LZX_NUM_PRIMARY_LENS 7
|
||||
#define LZX_NUM_LEN_HEADERS (LZX_NUM_PRIMARY_LENS + 1)
|
||||
|
||||
/* Valid values of the 3-bit block type field */
|
||||
#define LZX_BLOCKTYPE_VERBATIM 1
|
||||
#define LZX_BLOCKTYPE_ALIGNED 2
|
||||
#define LZX_BLOCKTYPE_UNCOMPRESSED 3
|
||||
|
||||
/* Number of offset slots for a window size of 32768 */
|
||||
#define LZX_NUM_OFFSET_SLOTS 30
|
||||
|
||||
/* Number of symbols in the main code for a window size of 32768 */
|
||||
#define LZX_MAINCODE_NUM_SYMBOLS \
|
||||
(LZX_NUM_CHARS + (LZX_NUM_OFFSET_SLOTS * LZX_NUM_LEN_HEADERS))
|
||||
|
||||
/* Number of symbols in the length code */
|
||||
#define LZX_LENCODE_NUM_SYMBOLS (LZX_NUM_LENS - LZX_NUM_PRIMARY_LENS)
|
||||
|
||||
/* Number of symbols in the precode */
|
||||
#define LZX_PRECODE_NUM_SYMBOLS 20
|
||||
|
||||
/* Number of bits in which each precode codeword length is represented */
|
||||
#define LZX_PRECODE_ELEMENT_SIZE 4
|
||||
|
||||
/* Number of low-order bits of each match offset that are entropy-encoded in
|
||||
* aligned offset blocks */
|
||||
#define LZX_NUM_ALIGNED_OFFSET_BITS 3
|
||||
|
||||
/* Number of symbols in the aligned offset code */
|
||||
#define LZX_ALIGNEDCODE_NUM_SYMBOLS (1 << LZX_NUM_ALIGNED_OFFSET_BITS)
|
||||
|
||||
/* Mask for the match offset bits that are entropy-encoded in aligned offset
|
||||
* blocks */
|
||||
#define LZX_ALIGNED_OFFSET_BITMASK ((1 << LZX_NUM_ALIGNED_OFFSET_BITS) - 1)
|
||||
|
||||
/* Number of bits in which each aligned offset codeword length is represented */
|
||||
#define LZX_ALIGNEDCODE_ELEMENT_SIZE 3
|
||||
|
||||
/* Maximum lengths (in bits) of the codewords in each Huffman code */
|
||||
#define LZX_MAX_MAIN_CODEWORD_LEN 16
|
||||
#define LZX_MAX_LEN_CODEWORD_LEN 16
|
||||
#define LZX_MAX_PRE_CODEWORD_LEN ((1 << LZX_PRECODE_ELEMENT_SIZE) - 1)
|
||||
#define LZX_MAX_ALIGNED_CODEWORD_LEN ((1 << LZX_ALIGNEDCODE_ELEMENT_SIZE) - 1)
|
||||
|
||||
/* The default "filesize" value used in pre/post-processing. In the LZX format
|
||||
* used in cabinet files this value must be given to the decompressor, whereas
|
||||
* in the LZX format used in WIM files and system-compressed files this value is
|
||||
* fixed at 12000000. */
|
||||
#define LZX_DEFAULT_FILESIZE 12000000
|
||||
|
||||
/* Assumed block size when the encoded block size begins with a 0 bit. */
|
||||
#define LZX_DEFAULT_BLOCK_SIZE 32768
|
||||
|
||||
/* Number of offsets in the recent (or "repeat") offsets queue. */
|
||||
#define LZX_NUM_RECENT_OFFSETS 3
|
||||
|
||||
/* These values are chosen for fast decompression. */
|
||||
#define LZX_MAINCODE_TABLEBITS 11
|
||||
#define LZX_LENCODE_TABLEBITS 10
|
||||
#define LZX_LENCODE_TABLEBITS 9
|
||||
#define LZX_PRECODE_TABLEBITS 6
|
||||
#define LZX_ALIGNEDCODE_TABLEBITS 7
|
||||
|
||||
#define LZX_READ_LENS_MAX_OVERRUN 50
|
||||
#define LZX_READ_LENS_MAX_OVERRUN 50
|
||||
|
||||
/* Mapping: offset slot => first match offset that uses that offset slot.
|
||||
*/
|
||||
static const u32 lzx_offset_slot_base[LZX_NUM_OFFSET_SLOTS + 1] = {
|
||||
0 , 1 , 2 , 3 , 4 , /* 0 --- 4 */
|
||||
6 , 8 , 12 , 16 , 24 , /* 5 --- 9 */
|
||||
32 , 48 , 64 , 96 , 128 , /* 10 --- 14 */
|
||||
192 , 256 , 384 , 512 , 768 , /* 15 --- 19 */
|
||||
1024 , 1536 , 2048 , 3072 , 4096 , /* 20 --- 24 */
|
||||
6144 , 8192 , 12288 , 16384 , 24576 , /* 25 --- 29 */
|
||||
32768 , /* extra */
|
||||
};
|
||||
|
||||
/* Mapping: offset slot => how many extra bits must be read and added to the
|
||||
* corresponding offset slot base to decode the match offset. */
|
||||
static const u8 lzx_extra_offset_bits[LZX_NUM_OFFSET_SLOTS] = {
|
||||
0 , 0 , 0 , 0 , 1 ,
|
||||
1 , 2 , 2 , 3 , 3 ,
|
||||
4 , 4 , 5 , 5 , 6 ,
|
||||
6 , 7 , 7 , 8 , 8 ,
|
||||
9 , 9 , 10, 10, 11,
|
||||
11, 12, 12, 13, 13,
|
||||
};
|
||||
|
||||
/* Reusable heap-allocated memory for LZX decompression */
|
||||
struct lzx_decompressor {
|
||||
|
||||
/* Huffman decoding tables, and arrays that map symbols to codeword
|
||||
* lengths */
|
||||
DECODE_TABLE(maincode_decode_table, LZX_MAINCODE_MAX_NUM_SYMBOLS,
|
||||
LZX_MAINCODE_TABLEBITS, LZX_MAX_MAIN_CODEWORD_LEN);
|
||||
u8 maincode_lens[LZX_MAINCODE_MAX_NUM_SYMBOLS + LZX_READ_LENS_MAX_OVERRUN];
|
||||
|
||||
u16 maincode_decode_table[(1 << LZX_MAINCODE_TABLEBITS) +
|
||||
(LZX_MAINCODE_NUM_SYMBOLS * 2)];
|
||||
u8 maincode_lens[LZX_MAINCODE_NUM_SYMBOLS + LZX_READ_LENS_MAX_OVERRUN];
|
||||
|
||||
|
||||
u16 lencode_decode_table[(1 << LZX_LENCODE_TABLEBITS) +
|
||||
(LZX_LENCODE_NUM_SYMBOLS * 2)];
|
||||
DECODE_TABLE(lencode_decode_table, LZX_LENCODE_NUM_SYMBOLS,
|
||||
LZX_LENCODE_TABLEBITS, LZX_MAX_LEN_CODEWORD_LEN);
|
||||
u8 lencode_lens[LZX_LENCODE_NUM_SYMBOLS + LZX_READ_LENS_MAX_OVERRUN];
|
||||
|
||||
union {
|
||||
DECODE_TABLE(alignedcode_decode_table, LZX_ALIGNEDCODE_NUM_SYMBOLS,
|
||||
LZX_ALIGNEDCODE_TABLEBITS, LZX_MAX_ALIGNED_CODEWORD_LEN);
|
||||
u8 alignedcode_lens[LZX_ALIGNEDCODE_NUM_SYMBOLS];
|
||||
};
|
||||
|
||||
u16 alignedcode_decode_table[(1 << LZX_ALIGNEDCODE_TABLEBITS) +
|
||||
(LZX_ALIGNEDCODE_NUM_SYMBOLS * 2)];
|
||||
u8 alignedcode_lens[LZX_ALIGNEDCODE_NUM_SYMBOLS];
|
||||
union {
|
||||
DECODE_TABLE(precode_decode_table, LZX_PRECODE_NUM_SYMBOLS,
|
||||
LZX_PRECODE_TABLEBITS, LZX_MAX_PRE_CODEWORD_LEN);
|
||||
u8 precode_lens[LZX_PRECODE_NUM_SYMBOLS];
|
||||
u8 extra_offset_bits[LZX_MAX_OFFSET_SLOTS];
|
||||
};
|
||||
|
||||
u16 precode_decode_table[(1 << LZX_PRECODE_TABLEBITS) +
|
||||
(LZX_PRECODE_NUM_SYMBOLS * 2)];
|
||||
u8 precode_lens[LZX_PRECODE_NUM_SYMBOLS];
|
||||
union {
|
||||
DECODE_TABLE_WORKING_SPACE(maincode_working_space,
|
||||
LZX_MAINCODE_MAX_NUM_SYMBOLS,
|
||||
LZX_MAX_MAIN_CODEWORD_LEN);
|
||||
DECODE_TABLE_WORKING_SPACE(lencode_working_space,
|
||||
LZX_LENCODE_NUM_SYMBOLS,
|
||||
LZX_MAX_LEN_CODEWORD_LEN);
|
||||
DECODE_TABLE_WORKING_SPACE(alignedcode_working_space,
|
||||
LZX_ALIGNEDCODE_NUM_SYMBOLS,
|
||||
LZX_MAX_ALIGNED_CODEWORD_LEN);
|
||||
DECODE_TABLE_WORKING_SPACE(precode_working_space,
|
||||
LZX_PRECODE_NUM_SYMBOLS,
|
||||
LZX_MAX_PRE_CODEWORD_LEN);
|
||||
};
|
||||
|
||||
/* Temporary space for make_huffman_decode_table() */
|
||||
u16 working_space[2 * (1 + LZX_MAX_MAIN_CODEWORD_LEN) +
|
||||
LZX_MAINCODE_NUM_SYMBOLS];
|
||||
};
|
||||
unsigned window_order;
|
||||
unsigned num_main_syms;
|
||||
|
||||
static void undo_e8_translation(void *target, s32 input_pos)
|
||||
{
|
||||
s32 abs_offset, rel_offset;
|
||||
/* Like lzx_extra_offset_bits[], but does not include the entropy-coded
|
||||
* bits of aligned offset blocks */
|
||||
u8 extra_offset_bits_minus_aligned[LZX_MAX_OFFSET_SLOTS];
|
||||
|
||||
abs_offset = get_unaligned_le32(target);
|
||||
if (abs_offset >= 0) {
|
||||
if (abs_offset < LZX_DEFAULT_FILESIZE) {
|
||||
/* "good translation" */
|
||||
rel_offset = abs_offset - input_pos;
|
||||
put_unaligned_le32(rel_offset, target);
|
||||
}
|
||||
} else {
|
||||
if (abs_offset >= -input_pos) {
|
||||
/* "compensating translation" */
|
||||
rel_offset = abs_offset + LZX_DEFAULT_FILESIZE;
|
||||
put_unaligned_le32(rel_offset, target);
|
||||
}
|
||||
}
|
||||
}
|
||||
} _aligned_attribute(DECODE_TABLE_ALIGNMENT);
|
||||
|
||||
/*
|
||||
* Undo the 'E8' preprocessing used in LZX. Before compression, the
|
||||
* uncompressed data was preprocessed by changing the targets of suspected x86
|
||||
* CALL instructions from relative offsets to absolute offsets. After
|
||||
* match/literal decoding, the decompressor must undo the translation.
|
||||
*/
|
||||
static void lzx_postprocess(u8 *data, u32 size)
|
||||
{
|
||||
/*
|
||||
* A worthwhile optimization is to push the end-of-buffer check into the
|
||||
* relatively rare E8 case. This is possible if we replace the last six
|
||||
* bytes of data with E8 bytes; then we are guaranteed to hit an E8 byte
|
||||
* before reaching end-of-buffer. In addition, this scheme guarantees
|
||||
* that no translation can begin following an E8 byte in the last 10
|
||||
* bytes because a 4-byte offset containing E8 as its high byte is a
|
||||
* large negative number that is not valid for translation. That is
|
||||
* exactly what we need.
|
||||
*/
|
||||
u8 *tail;
|
||||
u8 saved_bytes[6];
|
||||
u8 *p;
|
||||
|
||||
if (size <= 10)
|
||||
return;
|
||||
|
||||
tail = &data[size - 6];
|
||||
memcpy(saved_bytes, tail, 6);
|
||||
memset(tail, 0xE8, 6);
|
||||
p = data;
|
||||
for (;;) {
|
||||
while (*p != 0xE8)
|
||||
p++;
|
||||
if (p >= tail)
|
||||
break;
|
||||
undo_e8_translation(p + 1, p - data);
|
||||
p += 5;
|
||||
}
|
||||
memcpy(tail, saved_bytes, 6);
|
||||
}
|
||||
|
||||
/* Read a Huffman-encoded symbol using the precode. */
|
||||
static forceinline unsigned read_presym(const struct lzx_decompressor *d,
|
||||
struct input_bitstream *is)
|
||||
/* Read a Huffman-encoded symbol using the precode. */
|
||||
static forceinline unsigned
|
||||
read_presym(const struct lzx_decompressor *d, struct input_bitstream *is)
|
||||
{
|
||||
return read_huffsym(is, d->precode_decode_table,
|
||||
LZX_PRECODE_TABLEBITS, LZX_MAX_PRE_CODEWORD_LEN);
|
||||
}
|
||||
|
||||
/* Read a Huffman-encoded symbol using the main code. */
|
||||
static forceinline unsigned read_mainsym(const struct lzx_decompressor *d,
|
||||
struct input_bitstream *is)
|
||||
/* Read a Huffman-encoded symbol using the main code. */
|
||||
static forceinline unsigned
|
||||
read_mainsym(const struct lzx_decompressor *d, struct input_bitstream *is)
|
||||
{
|
||||
return read_huffsym(is, d->maincode_decode_table,
|
||||
LZX_MAINCODE_TABLEBITS, LZX_MAX_MAIN_CODEWORD_LEN);
|
||||
}
|
||||
|
||||
/* Read a Huffman-encoded symbol using the length code. */
|
||||
static forceinline unsigned read_lensym(const struct lzx_decompressor *d,
|
||||
struct input_bitstream *is)
|
||||
/* Read a Huffman-encoded symbol using the length code. */
|
||||
static forceinline unsigned
|
||||
read_lensym(const struct lzx_decompressor *d, struct input_bitstream *is)
|
||||
{
|
||||
return read_huffsym(is, d->lencode_decode_table,
|
||||
LZX_LENCODE_TABLEBITS, LZX_MAX_LEN_CODEWORD_LEN);
|
||||
}
|
||||
|
||||
/* Read a Huffman-encoded symbol using the aligned offset code. */
|
||||
static forceinline unsigned read_alignedsym(const struct lzx_decompressor *d,
|
||||
struct input_bitstream *is)
|
||||
/* Read a Huffman-encoded symbol using the aligned offset code. */
|
||||
static forceinline unsigned
|
||||
read_alignedsym(const struct lzx_decompressor *d, struct input_bitstream *is)
|
||||
{
|
||||
return read_huffsym(is, d->alignedcode_decode_table,
|
||||
LZX_ALIGNEDCODE_TABLEBITS,
|
||||
LZX_MAX_ALIGNED_CODEWORD_LEN);
|
||||
LZX_ALIGNEDCODE_TABLEBITS, LZX_MAX_ALIGNED_CODEWORD_LEN);
|
||||
}
|
||||
|
||||
/*
|
||||
* Read the precode from the compressed input bitstream, then use it to decode
|
||||
* @num_lens codeword length values.
|
||||
*
|
||||
* @is: The input bitstream.
|
||||
*
|
||||
* @lens: An array that contains the length values from the previous time
|
||||
* the codeword lengths for this Huffman code were read, or all 0's
|
||||
* if this is the first time. This array must have at least
|
||||
* (@num_lens + LZX_READ_LENS_MAX_OVERRUN) entries.
|
||||
*
|
||||
* @num_lens: Number of length values to decode.
|
||||
*
|
||||
* Returns 0 on success, or -1 if the data was invalid.
|
||||
* Read a precode from the compressed input bitstream, then use it to decode
|
||||
* @num_lens codeword length values and write them to @lens.
|
||||
*/
|
||||
static int lzx_read_codeword_lens(struct lzx_decompressor *d,
|
||||
struct input_bitstream *is,
|
||||
u8 *lens, unsigned num_lens)
|
||||
static int
|
||||
lzx_read_codeword_lens(struct lzx_decompressor *d, struct input_bitstream *is,
|
||||
u8 *lens, unsigned num_lens)
|
||||
{
|
||||
u8 *len_ptr = lens;
|
||||
u8 *lens_end = lens + num_lens;
|
||||
int i;
|
||||
|
||||
/* Read the lengths of the precode codewords. These are given
|
||||
* explicitly. */
|
||||
for (i = 0; i < LZX_PRECODE_NUM_SYMBOLS; i++) {
|
||||
/* Read the lengths of the precode codewords. These are stored
|
||||
* explicitly. */
|
||||
for (int i = 0; i < LZX_PRECODE_NUM_SYMBOLS; i++) {
|
||||
d->precode_lens[i] =
|
||||
bitstream_read_bits(is, LZX_PRECODE_ELEMENT_SIZE);
|
||||
}
|
||||
|
||||
/* Make the decoding table for the precode. */
|
||||
/* Build the decoding table for the precode. */
|
||||
if (make_huffman_decode_table(d->precode_decode_table,
|
||||
LZX_PRECODE_NUM_SYMBOLS,
|
||||
LZX_PRECODE_TABLEBITS,
|
||||
d->precode_lens,
|
||||
LZX_MAX_PRE_CODEWORD_LEN,
|
||||
d->working_space))
|
||||
d->precode_working_space))
|
||||
return -1;
|
||||
|
||||
/* Decode the codeword lengths. */
|
||||
|
@ -322,7 +204,7 @@ static int lzx_read_codeword_lens(struct lzx_decompressor *d,
|
|||
/* Run of identical lengths */
|
||||
run_len = 4 + bitstream_read_bits(is, 1);
|
||||
presym = read_presym(d, is);
|
||||
if (presym > 17)
|
||||
if (unlikely(presym > 17))
|
||||
return -1;
|
||||
len = *len_ptr - presym;
|
||||
if ((s8)len < 0)
|
||||
|
@ -332,7 +214,8 @@ static int lzx_read_codeword_lens(struct lzx_decompressor *d,
|
|||
do {
|
||||
*len_ptr++ = len;
|
||||
} while (--run_len);
|
||||
/* Worst case overrun is when presym == 18,
|
||||
/*
|
||||
* The worst case overrun is when presym == 18,
|
||||
* run_len == 20 + 31, and only 1 length was remaining.
|
||||
* So LZX_READ_LENS_MAX_OVERRUN == 50.
|
||||
*
|
||||
|
@ -340,7 +223,8 @@ static int lzx_read_codeword_lens(struct lzx_decompressor *d,
|
|||
* can corrupt the previous values in the second half.
|
||||
* This doesn't really matter because the resulting
|
||||
* lengths will still be in range, and data that
|
||||
* generates overruns is invalid anyway. */
|
||||
* generates overruns is invalid anyway.
|
||||
*/
|
||||
}
|
||||
} while (len_ptr < lens_end);
|
||||
|
||||
|
@ -348,115 +232,82 @@ static int lzx_read_codeword_lens(struct lzx_decompressor *d,
|
|||
}
|
||||
|
||||
/*
|
||||
* Read the header of an LZX block and save the block type and (uncompressed)
|
||||
* size in *block_type_ret and *block_size_ret, respectively.
|
||||
*
|
||||
* If the block is compressed, also update the Huffman decode @tables with the
|
||||
* new Huffman codes. If the block is uncompressed, also update the match
|
||||
* offset @queue with the new match offsets.
|
||||
*
|
||||
* Return 0 on success, or -1 if the data was invalid.
|
||||
* Read the header of an LZX block. For all block types, the block type and
|
||||
* size is saved in *block_type_ret and *block_size_ret, respectively. For
|
||||
* compressed blocks, the codeword lengths are also saved. For uncompressed
|
||||
* blocks, the recent offsets queue is also updated.
|
||||
*/
|
||||
static int lzx_read_block_header(struct lzx_decompressor *d,
|
||||
struct input_bitstream *is,
|
||||
int *block_type_ret,
|
||||
u32 *block_size_ret,
|
||||
u32 recent_offsets[])
|
||||
static int
|
||||
lzx_read_block_header(struct lzx_decompressor *d, struct input_bitstream *is,
|
||||
u32 recent_offsets[], int *block_type_ret,
|
||||
u32 *block_size_ret)
|
||||
{
|
||||
int block_type;
|
||||
u32 block_size;
|
||||
int i;
|
||||
|
||||
bitstream_ensure_bits(is, 4);
|
||||
|
||||
/* The first three bits tell us what kind of block it is, and should be
|
||||
* one of the LZX_BLOCKTYPE_* values. */
|
||||
/* Read the block type. */
|
||||
block_type = bitstream_pop_bits(is, 3);
|
||||
|
||||
/* Read the block size. */
|
||||
/* Read the block size. */
|
||||
if (bitstream_pop_bits(is, 1)) {
|
||||
block_size = LZX_DEFAULT_BLOCK_SIZE;
|
||||
} else {
|
||||
block_size = 0;
|
||||
block_size |= bitstream_read_bits(is, 8);
|
||||
block_size <<= 8;
|
||||
block_size |= bitstream_read_bits(is, 8);
|
||||
block_size = bitstream_read_bits(is, 16);
|
||||
if (d->window_order >= 16) {
|
||||
block_size <<= 8;
|
||||
block_size |= bitstream_read_bits(is, 8);
|
||||
}
|
||||
}
|
||||
|
||||
switch (block_type) {
|
||||
|
||||
case LZX_BLOCKTYPE_ALIGNED:
|
||||
|
||||
/* Read the aligned offset code and prepare its decode table.
|
||||
*/
|
||||
/* Read the aligned offset codeword lengths. */
|
||||
|
||||
for (i = 0; i < LZX_ALIGNEDCODE_NUM_SYMBOLS; i++) {
|
||||
for (int i = 0; i < LZX_ALIGNEDCODE_NUM_SYMBOLS; i++) {
|
||||
d->alignedcode_lens[i] =
|
||||
bitstream_read_bits(is,
|
||||
LZX_ALIGNEDCODE_ELEMENT_SIZE);
|
||||
}
|
||||
|
||||
if (make_huffman_decode_table(d->alignedcode_decode_table,
|
||||
LZX_ALIGNEDCODE_NUM_SYMBOLS,
|
||||
LZX_ALIGNEDCODE_TABLEBITS,
|
||||
d->alignedcode_lens,
|
||||
LZX_MAX_ALIGNED_CODEWORD_LEN,
|
||||
d->working_space))
|
||||
return -1;
|
||||
|
||||
/* Fall though, since the rest of the header for aligned offset
|
||||
* blocks is the same as that for verbatim blocks. */
|
||||
|
||||
case LZX_BLOCKTYPE_VERBATIM:
|
||||
|
||||
/* Read the main code and prepare its decode table.
|
||||
*
|
||||
* Note that the codeword lengths in the main code are encoded
|
||||
* in two parts: one part for literal symbols, and one part for
|
||||
* match symbols. */
|
||||
/* Read the main codeword lengths, which are divided into two
|
||||
* parts: literal symbols and match headers. */
|
||||
|
||||
if (lzx_read_codeword_lens(d, is, d->maincode_lens,
|
||||
LZX_NUM_CHARS))
|
||||
return -1;
|
||||
|
||||
if (lzx_read_codeword_lens(d, is,
|
||||
d->maincode_lens + LZX_NUM_CHARS,
|
||||
LZX_MAINCODE_NUM_SYMBOLS - LZX_NUM_CHARS))
|
||||
if (lzx_read_codeword_lens(d, is, d->maincode_lens + LZX_NUM_CHARS,
|
||||
d->num_main_syms - LZX_NUM_CHARS))
|
||||
return -1;
|
||||
|
||||
if (make_huffman_decode_table(d->maincode_decode_table,
|
||||
LZX_MAINCODE_NUM_SYMBOLS,
|
||||
LZX_MAINCODE_TABLEBITS,
|
||||
d->maincode_lens,
|
||||
LZX_MAX_MAIN_CODEWORD_LEN,
|
||||
d->working_space))
|
||||
return -1;
|
||||
|
||||
/* Read the length code and prepare its decode table. */
|
||||
/* Read the length codeword lengths. */
|
||||
|
||||
if (lzx_read_codeword_lens(d, is, d->lencode_lens,
|
||||
LZX_LENCODE_NUM_SYMBOLS))
|
||||
return -1;
|
||||
|
||||
if (make_huffman_decode_table(d->lencode_decode_table,
|
||||
LZX_LENCODE_NUM_SYMBOLS,
|
||||
LZX_LENCODE_TABLEBITS,
|
||||
d->lencode_lens,
|
||||
LZX_MAX_LEN_CODEWORD_LEN,
|
||||
d->working_space))
|
||||
return -1;
|
||||
|
||||
break;
|
||||
|
||||
case LZX_BLOCKTYPE_UNCOMPRESSED:
|
||||
|
||||
/* Before reading the three recent offsets from the uncompressed
|
||||
* block header, the stream must be aligned on a 16-bit
|
||||
* boundary. But if the stream is *already* aligned, then the
|
||||
* next 16 bits must be discarded. */
|
||||
/*
|
||||
* The header of an uncompressed block contains new values for
|
||||
* the recent offsets queue, starting on the next 16-bit
|
||||
* boundary in the bitstream. Careful: if the stream is
|
||||
* *already* aligned, the correct thing to do is to throw away
|
||||
* the next 16 bits (this is probably a mistake in the format).
|
||||
*/
|
||||
bitstream_ensure_bits(is, 1);
|
||||
bitstream_align(is);
|
||||
|
||||
recent_offsets[0] = bitstream_read_u32(is);
|
||||
recent_offsets[1] = bitstream_read_u32(is);
|
||||
recent_offsets[2] = bitstream_read_u32(is);
|
||||
|
@ -477,202 +328,218 @@ static int lzx_read_block_header(struct lzx_decompressor *d,
|
|||
return 0;
|
||||
}
|
||||
|
||||
/* Decompress a block of LZX-compressed data. */
|
||||
static int lzx_decompress_block(const struct lzx_decompressor *d,
|
||||
struct input_bitstream *is,
|
||||
int block_type, u32 block_size,
|
||||
u8 * const out_begin, u8 *out_next,
|
||||
u32 recent_offsets[])
|
||||
/* Decompress a block of LZX-compressed data. */
|
||||
static int
|
||||
lzx_decompress_block(struct lzx_decompressor *d, struct input_bitstream *is,
|
||||
int block_type, u32 block_size,
|
||||
u8 * const out_begin, u8 *out_next, u32 recent_offsets[])
|
||||
{
|
||||
u8 * const block_end = out_next + block_size;
|
||||
unsigned ones_if_aligned = 0U - (block_type == LZX_BLOCKTYPE_ALIGNED);
|
||||
unsigned min_aligned_offset_slot;
|
||||
|
||||
/*
|
||||
* Build the Huffman decode tables. We always need to build the main
|
||||
* and length decode tables. For aligned blocks we additionally need to
|
||||
* build the aligned offset decode table.
|
||||
*/
|
||||
|
||||
if (make_huffman_decode_table(d->maincode_decode_table,
|
||||
d->num_main_syms,
|
||||
LZX_MAINCODE_TABLEBITS,
|
||||
d->maincode_lens,
|
||||
LZX_MAX_MAIN_CODEWORD_LEN,
|
||||
d->maincode_working_space))
|
||||
return -1;
|
||||
|
||||
if (make_huffman_decode_table(d->lencode_decode_table,
|
||||
LZX_LENCODE_NUM_SYMBOLS,
|
||||
LZX_LENCODE_TABLEBITS,
|
||||
d->lencode_lens,
|
||||
LZX_MAX_LEN_CODEWORD_LEN,
|
||||
d->lencode_working_space))
|
||||
return -1;
|
||||
|
||||
if (block_type == LZX_BLOCKTYPE_ALIGNED) {
|
||||
if (make_huffman_decode_table(d->alignedcode_decode_table,
|
||||
LZX_ALIGNEDCODE_NUM_SYMBOLS,
|
||||
LZX_ALIGNEDCODE_TABLEBITS,
|
||||
d->alignedcode_lens,
|
||||
LZX_MAX_ALIGNED_CODEWORD_LEN,
|
||||
d->alignedcode_working_space))
|
||||
return -1;
|
||||
min_aligned_offset_slot = LZX_MIN_ALIGNED_OFFSET_SLOT;
|
||||
memcpy(d->extra_offset_bits, d->extra_offset_bits_minus_aligned,
|
||||
sizeof(lzx_extra_offset_bits));
|
||||
} else {
|
||||
min_aligned_offset_slot = LZX_MAX_OFFSET_SLOTS;
|
||||
memcpy(d->extra_offset_bits, lzx_extra_offset_bits,
|
||||
sizeof(lzx_extra_offset_bits));
|
||||
}
|
||||
|
||||
/* Decode the literals and matches. */
|
||||
|
||||
do {
|
||||
unsigned mainsym;
|
||||
unsigned match_len;
|
||||
u32 match_offset;
|
||||
unsigned length;
|
||||
u32 offset;
|
||||
unsigned offset_slot;
|
||||
unsigned num_extra_bits;
|
||||
|
||||
mainsym = read_mainsym(d, is);
|
||||
if (mainsym < LZX_NUM_CHARS) {
|
||||
/* Literal */
|
||||
/* Literal */
|
||||
*out_next++ = mainsym;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Match */
|
||||
/* Match */
|
||||
|
||||
/* Decode the length header and offset slot. */
|
||||
mainsym -= LZX_NUM_CHARS;
|
||||
match_len = mainsym % LZX_NUM_LEN_HEADERS;
|
||||
offset_slot = mainsym / LZX_NUM_LEN_HEADERS;
|
||||
STATIC_ASSERT(LZX_NUM_CHARS % LZX_NUM_LEN_HEADERS == 0);
|
||||
length = mainsym % LZX_NUM_LEN_HEADERS;
|
||||
offset_slot = (mainsym - LZX_NUM_CHARS) / LZX_NUM_LEN_HEADERS;
|
||||
|
||||
/* If needed, read a length symbol to decode the full length. */
|
||||
if (match_len == LZX_NUM_PRIMARY_LENS)
|
||||
match_len += read_lensym(d, is);
|
||||
match_len += LZX_MIN_MATCH_LEN;
|
||||
if (length == LZX_NUM_PRIMARY_LENS)
|
||||
length += read_lensym(d, is);
|
||||
length += LZX_MIN_MATCH_LEN;
|
||||
|
||||
if (offset_slot < LZX_NUM_RECENT_OFFSETS) {
|
||||
/* Repeat offset */
|
||||
|
||||
/* Note: This isn't a real LRU queue, since using the R2
|
||||
* offset doesn't bump the R1 offset down to R2. This
|
||||
* quirk allows all 3 recent offsets to be handled by
|
||||
* the same code. (For R0, the swap is a no-op.) */
|
||||
match_offset = recent_offsets[offset_slot];
|
||||
* offset doesn't bump the R1 offset down to R2. */
|
||||
offset = recent_offsets[offset_slot];
|
||||
recent_offsets[offset_slot] = recent_offsets[0];
|
||||
recent_offsets[0] = match_offset;
|
||||
} else {
|
||||
/* Explicit offset */
|
||||
|
||||
/* Look up the number of extra bits that need to be read
|
||||
* to decode offsets with this offset slot. */
|
||||
num_extra_bits = lzx_extra_offset_bits[offset_slot];
|
||||
|
||||
/* Start with the offset slot base value. */
|
||||
match_offset = lzx_offset_slot_base[offset_slot];
|
||||
|
||||
/* In aligned offset blocks, the low-order 3 bits of
|
||||
* each offset are encoded using the aligned offset
|
||||
* code. Otherwise, all the extra bits are literal. */
|
||||
|
||||
if ((num_extra_bits & ones_if_aligned) >= LZX_NUM_ALIGNED_OFFSET_BITS) {
|
||||
match_offset +=
|
||||
bitstream_read_bits(is, num_extra_bits -
|
||||
LZX_NUM_ALIGNED_OFFSET_BITS)
|
||||
<< LZX_NUM_ALIGNED_OFFSET_BITS;
|
||||
match_offset += read_alignedsym(d, is);
|
||||
} else {
|
||||
match_offset += bitstream_read_bits(is, num_extra_bits);
|
||||
offset = bitstream_read_bits(is, d->extra_offset_bits[offset_slot]);
|
||||
if (offset_slot >= min_aligned_offset_slot) {
|
||||
offset = (offset << LZX_NUM_ALIGNED_OFFSET_BITS) |
|
||||
read_alignedsym(d, is);
|
||||
}
|
||||
offset += lzx_offset_slot_base[offset_slot];
|
||||
|
||||
/* Adjust the offset. */
|
||||
match_offset -= (LZX_NUM_RECENT_OFFSETS - 1);
|
||||
|
||||
/* Update the recent offsets. */
|
||||
/* Update the match offset LRU queue. */
|
||||
STATIC_ASSERT(LZX_NUM_RECENT_OFFSETS == 3);
|
||||
recent_offsets[2] = recent_offsets[1];
|
||||
recent_offsets[1] = recent_offsets[0];
|
||||
recent_offsets[0] = match_offset;
|
||||
}
|
||||
recent_offsets[0] = offset;
|
||||
|
||||
/* Validate the match, then copy it to the current position. */
|
||||
|
||||
if (match_len > (size_t)(block_end - out_next))
|
||||
/* Validate the match and copy it to the current position. */
|
||||
if (unlikely(lz_copy(length, offset, out_begin,
|
||||
out_next, block_end, LZX_MIN_MATCH_LEN)))
|
||||
return -1;
|
||||
|
||||
if (match_offset > (size_t)(out_next - out_begin))
|
||||
return -1;
|
||||
|
||||
out_next = lz_copy(out_next, match_len, match_offset,
|
||||
block_end, LZX_MIN_MATCH_LEN);
|
||||
|
||||
out_next += length;
|
||||
} while (out_next != block_end);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* lzx_allocate_decompressor - Allocate an LZX decompressor
|
||||
*
|
||||
* Return the pointer to the decompressor on success, or return NULL and set
|
||||
* errno on failure.
|
||||
*/
|
||||
struct lzx_decompressor *lzx_allocate_decompressor(void)
|
||||
int
|
||||
lzx_decompress(struct lzx_decompressor *restrict d,
|
||||
const void *restrict compressed_data, size_t compressed_size,
|
||||
void *restrict uncompressed_data, size_t uncompressed_size)
|
||||
{
|
||||
return ntfs_malloc(sizeof(struct lzx_decompressor));
|
||||
}
|
||||
|
||||
/*
|
||||
* lzx_decompress - Decompress a buffer of LZX-compressed data
|
||||
*
|
||||
* @decompressor: A decompressor allocated with lzx_allocate_decompressor()
|
||||
* @compressed_data: The buffer of data to decompress
|
||||
* @compressed_size: Number of bytes of compressed data
|
||||
* @uncompressed_data: The buffer in which to store the decompressed data
|
||||
* @uncompressed_size: The number of bytes the data decompresses into
|
||||
*
|
||||
* Return 0 on success, or return -1 and set errno on failure.
|
||||
*/
|
||||
int lzx_decompress(struct lzx_decompressor *decompressor,
|
||||
const void *compressed_data, size_t compressed_size,
|
||||
void *uncompressed_data, size_t uncompressed_size)
|
||||
{
|
||||
struct lzx_decompressor *d = decompressor;
|
||||
u8 * const out_begin = uncompressed_data;
|
||||
u8 *out_next = out_begin;
|
||||
u8 * const out_end = out_begin + uncompressed_size;
|
||||
struct input_bitstream is;
|
||||
STATIC_ASSERT(LZX_NUM_RECENT_OFFSETS == 3);
|
||||
u32 recent_offsets[LZX_NUM_RECENT_OFFSETS] = {1, 1, 1};
|
||||
int e8_status = 0;
|
||||
unsigned may_have_e8_byte = 0;
|
||||
|
||||
init_input_bitstream(&is, compressed_data, compressed_size);
|
||||
|
||||
/* Codeword lengths begin as all 0's for delta encoding purposes. */
|
||||
memset(d->maincode_lens, 0, LZX_MAINCODE_NUM_SYMBOLS);
|
||||
/* Codeword lengths begin as all 0's for delta encoding purposes. */
|
||||
memset(d->maincode_lens, 0, d->num_main_syms);
|
||||
memset(d->lencode_lens, 0, LZX_LENCODE_NUM_SYMBOLS);
|
||||
|
||||
/* Decompress blocks until we have all the uncompressed data. */
|
||||
/* Decompress blocks until we have all the uncompressed data. */
|
||||
|
||||
while (out_next != out_end) {
|
||||
int block_type;
|
||||
u32 block_size;
|
||||
|
||||
if (lzx_read_block_header(d, &is, &block_type, &block_size,
|
||||
recent_offsets))
|
||||
goto invalid;
|
||||
if (lzx_read_block_header(d, &is, recent_offsets,
|
||||
&block_type, &block_size))
|
||||
return -1;
|
||||
|
||||
if (block_size < 1 || block_size > (size_t)(out_end - out_next))
|
||||
goto invalid;
|
||||
if (block_size < 1 || block_size > out_end - out_next)
|
||||
return -1;
|
||||
|
||||
if (block_type != LZX_BLOCKTYPE_UNCOMPRESSED) {
|
||||
if (likely(block_type != LZX_BLOCKTYPE_UNCOMPRESSED)) {
|
||||
|
||||
/* Compressed block */
|
||||
|
||||
if (lzx_decompress_block(d,
|
||||
&is,
|
||||
block_type,
|
||||
block_size,
|
||||
out_begin,
|
||||
out_next,
|
||||
/* Compressed block */
|
||||
if (lzx_decompress_block(d, &is, block_type, block_size,
|
||||
out_begin, out_next,
|
||||
recent_offsets))
|
||||
goto invalid;
|
||||
return -1;
|
||||
|
||||
e8_status |= d->maincode_lens[0xe8];
|
||||
out_next += block_size;
|
||||
/* If the first E8 byte was in this block, then it must
|
||||
* have been encoded as a literal using mainsym E8. */
|
||||
may_have_e8_byte |= d->maincode_lens[0xE8];
|
||||
} else {
|
||||
/* Uncompressed block */
|
||||
|
||||
out_next = bitstream_read_bytes(&is, out_next,
|
||||
block_size);
|
||||
if (!out_next)
|
||||
goto invalid;
|
||||
/* Uncompressed block */
|
||||
if (bitstream_read_bytes(&is, out_next, block_size))
|
||||
return -1;
|
||||
|
||||
/* Re-align the bitstream if needed. */
|
||||
if (block_size & 1)
|
||||
bitstream_read_byte(&is);
|
||||
|
||||
e8_status = 1;
|
||||
/* There may have been an E8 byte in the block. */
|
||||
may_have_e8_byte = 1;
|
||||
}
|
||||
out_next += block_size;
|
||||
}
|
||||
|
||||
/* Postprocess the data unless it cannot possibly contain 0xe8 bytes. */
|
||||
if (e8_status)
|
||||
/* Postprocess the data unless it cannot possibly contain E8 bytes. */
|
||||
if (may_have_e8_byte)
|
||||
lzx_postprocess(uncompressed_data, uncompressed_size);
|
||||
|
||||
return 0;
|
||||
|
||||
invalid:
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
* lzx_free_decompressor - Free an LZX decompressor
|
||||
*
|
||||
* @decompressor: A decompressor that was allocated with
|
||||
* lzx_allocate_decompressor(), or NULL.
|
||||
*/
|
||||
void lzx_free_decompressor(struct lzx_decompressor *decompressor)
|
||||
struct lzx_decompressor *
|
||||
lzx_allocate_decompressor(size_t max_block_size)
|
||||
{
|
||||
free(decompressor);
|
||||
unsigned window_order;
|
||||
struct lzx_decompressor *d;
|
||||
|
||||
window_order = lzx_get_window_order(max_block_size);
|
||||
if (window_order == 0) {
|
||||
errno = EINVAL;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
d = aligned_malloc(sizeof(*d), DECODE_TABLE_ALIGNMENT);
|
||||
if (!d)
|
||||
return NULL;
|
||||
|
||||
d->window_order = window_order;
|
||||
d->num_main_syms = lzx_get_num_main_syms(window_order);
|
||||
|
||||
/* Initialize 'd->extra_offset_bits_minus_aligned'. */
|
||||
STATIC_ASSERT(sizeof(d->extra_offset_bits_minus_aligned) ==
|
||||
sizeof(lzx_extra_offset_bits));
|
||||
STATIC_ASSERT(sizeof(d->extra_offset_bits) ==
|
||||
sizeof(lzx_extra_offset_bits));
|
||||
memcpy(d->extra_offset_bits_minus_aligned, lzx_extra_offset_bits,
|
||||
sizeof(lzx_extra_offset_bits));
|
||||
for (unsigned offset_slot = LZX_MIN_ALIGNED_OFFSET_SLOT;
|
||||
offset_slot < LZX_MAX_OFFSET_SLOTS; offset_slot++)
|
||||
{
|
||||
d->extra_offset_bits_minus_aligned[offset_slot] -=
|
||||
LZX_NUM_ALIGNED_OFFSET_BITS;
|
||||
}
|
||||
|
||||
return d;
|
||||
}
|
||||
|
||||
void
|
||||
lzx_free_decompressor(struct lzx_decompressor *d)
|
||||
{
|
||||
aligned_free(d);
|
||||
}
|
||||
|
|
|
@ -211,7 +211,7 @@ struct ntfs_system_decompression_ctx {
|
|||
static int allocate_decompressor(struct ntfs_system_decompression_ctx *ctx)
|
||||
{
|
||||
if (ctx->format == FORMAT_LZX)
|
||||
ctx->decompressor = lzx_allocate_decompressor();
|
||||
ctx->decompressor = lzx_allocate_decompressor(32768);
|
||||
else
|
||||
ctx->decompressor = xpress_allocate_decompressor();
|
||||
if (!ctx->decompressor)
|
||||
|
@ -590,8 +590,13 @@ static int read_and_decompress_chunk(struct ntfs_system_decompression_ctx *ctx,
|
|||
return 0;
|
||||
|
||||
/* The chunk was stored compressed. Decompress its data. */
|
||||
return decompress(ctx, read_buffer, stored_size,
|
||||
buffer, uncompressed_size);
|
||||
if (decompress(ctx, read_buffer, stored_size,
|
||||
buffer, uncompressed_size)) {
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Retrieve a pointer to the uncompressed data of the specified chunk. On
|
||||
|
|
|
@ -60,7 +60,8 @@ extern void xpress_free_decompressor(struct xpress_decompressor *decompressor);
|
|||
|
||||
struct lzx_decompressor;
|
||||
|
||||
extern struct lzx_decompressor *lzx_allocate_decompressor(void);
|
||||
extern struct lzx_decompressor *
|
||||
lzx_allocate_decompressor(size_t max_block_size);
|
||||
|
||||
extern int lzx_decompress(struct lzx_decompressor *decompressor,
|
||||
const void *compressed_data, size_t compressed_size,
|
||||
|
|
|
@ -0,0 +1,22 @@
|
|||
/*
|
||||
* xpress_constants.h
|
||||
*
|
||||
* Constants for the XPRESS compression format.
|
||||
*/
|
||||
|
||||
#ifndef _XPRESS_CONSTANTS_H
|
||||
#define _XPRESS_CONSTANTS_H
|
||||
|
||||
#define XPRESS_NUM_CHARS 256
|
||||
#define XPRESS_NUM_SYMBOLS 512
|
||||
#define XPRESS_MAX_CODEWORD_LEN 15
|
||||
|
||||
#define XPRESS_END_OF_DATA 256
|
||||
|
||||
#define XPRESS_MIN_OFFSET 1
|
||||
#define XPRESS_MAX_OFFSET 65535
|
||||
|
||||
#define XPRESS_MIN_MATCH_LEN 3
|
||||
#define XPRESS_MAX_MATCH_LEN 65538
|
||||
|
||||
#endif /* _XPRESS_CONSTANTS_H */
|
|
@ -1,9 +1,12 @@
|
|||
/*
|
||||
* xpress_decompress.c - A decompressor for the XPRESS compression format
|
||||
* (Huffman variant), which can be used in "System Compressed" files. This is
|
||||
* based on the code from wimlib.
|
||||
* xpress_decompress.c
|
||||
*
|
||||
* Copyright (C) 2015 Eric Biggers
|
||||
* A decompressor for the XPRESS compression format (Huffman variant).
|
||||
*/
|
||||
|
||||
/*
|
||||
*
|
||||
* Copyright (C) 2012-2016 Eric Biggers
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify it under
|
||||
* the terms of the GNU General Public License as published by the Free Software
|
||||
|
@ -19,80 +22,85 @@
|
|||
* this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
* The XPRESS compression format is an LZ77 and Huffman-code based algorithm.
|
||||
* That means it is fairly similar to LZX compression, but XPRESS is simpler, so
|
||||
* it is a little faster to compress and decompress.
|
||||
*
|
||||
* The XPRESS compression format is mostly documented in a file called "[MS-XCA]
|
||||
* Xpress Compression Algorithm". In the MSDN library, it can currently be
|
||||
* found under Open Specifications => Protocols => Windows Protocols => Windows
|
||||
* Server Protocols => [MS-XCA] Xpress Compression Algorithm". The format in
|
||||
* WIMs is specifically the algorithm labeled as the "LZ77+Huffman Algorithm"
|
||||
* (there apparently are some other versions of XPRESS as well).
|
||||
*
|
||||
* If you are already familiar with the LZ77 algorithm and Huffman coding, the
|
||||
* XPRESS format is fairly simple. The compressed data begins with 256 bytes
|
||||
* that contain 512 4-bit integers that are the lengths of the symbols in the
|
||||
* Huffman code used for match/literal headers. In contrast with more
|
||||
* complicated formats such as DEFLATE and LZX, this is the only Huffman code
|
||||
* that is used for the entirety of the XPRESS compressed data, and the codeword
|
||||
* lengths are not encoded with a pretree.
|
||||
*
|
||||
* The rest of the compressed data is Huffman-encoded symbols. Values 0 through
|
||||
* 255 represent the corresponding literal bytes. Values 256 through 511
|
||||
* represent matches and may require extra bits or bytes to be read to get the
|
||||
* match offset and match length.
|
||||
*
|
||||
* The trickiest part is probably the way in which literal bytes for match
|
||||
* lengths are interleaved in the bitstream.
|
||||
*
|
||||
* Also, a caveat--- according to Microsoft's documentation for XPRESS,
|
||||
*
|
||||
* "Some implementation of the decompression algorithm expect an extra
|
||||
* symbol to mark the end of the data. Specifically, some implementations
|
||||
* fail during decompression if the Huffman symbol 256 is not found after
|
||||
* the actual data."
|
||||
*
|
||||
* This is the case with Microsoft's implementation in WIMGAPI, for example. So
|
||||
* although our implementation doesn't currently check for this extra symbol,
|
||||
* compressors would be wise to add it.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
# include "config.h"
|
||||
#endif
|
||||
|
||||
#include <errno.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include <ntfs-3g/misc.h>
|
||||
|
||||
#include "decompress_common.h"
|
||||
#include "system_compression.h"
|
||||
|
||||
#define XPRESS_NUM_SYMBOLS 512
|
||||
#define XPRESS_MAX_CODEWORD_LEN 15
|
||||
#define XPRESS_MIN_MATCH_LEN 3
|
||||
#include "xpress_constants.h"
|
||||
|
||||
/* This value is chosen for fast decompression. */
|
||||
#define XPRESS_TABLEBITS 12
|
||||
#define XPRESS_TABLEBITS 11
|
||||
|
||||
/* Reusable heap-allocated memory for XPRESS decompression */
|
||||
struct xpress_decompressor {
|
||||
union {
|
||||
DECODE_TABLE(decode_table, XPRESS_NUM_SYMBOLS,
|
||||
XPRESS_TABLEBITS, XPRESS_MAX_CODEWORD_LEN);
|
||||
u8 lens[XPRESS_NUM_SYMBOLS];
|
||||
};
|
||||
DECODE_TABLE_WORKING_SPACE(working_space, XPRESS_NUM_SYMBOLS,
|
||||
XPRESS_MAX_CODEWORD_LEN);
|
||||
} _aligned_attribute(DECODE_TABLE_ALIGNMENT);
|
||||
|
||||
/* The Huffman decoding table */
|
||||
u16 decode_table[(1 << XPRESS_TABLEBITS) + 2 * XPRESS_NUM_SYMBOLS];
|
||||
|
||||
/* An array that maps symbols to codeword lengths */
|
||||
u8 lens[XPRESS_NUM_SYMBOLS];
|
||||
|
||||
/* Temporary space for make_huffman_decode_table() */
|
||||
u16 working_space[2 * (1 + XPRESS_MAX_CODEWORD_LEN) +
|
||||
XPRESS_NUM_SYMBOLS];
|
||||
};
|
||||
|
||||
/*
|
||||
* xpress_allocate_decompressor - Allocate an XPRESS decompressor
|
||||
*
|
||||
* Return the pointer to the decompressor on success, or return NULL and set
|
||||
* errno on failure.
|
||||
*/
|
||||
struct xpress_decompressor *xpress_allocate_decompressor(void)
|
||||
int
|
||||
xpress_decompress(struct xpress_decompressor *restrict d,
|
||||
const void *restrict compressed_data, size_t compressed_size,
|
||||
void *restrict uncompressed_data, size_t uncompressed_size)
|
||||
{
|
||||
return ntfs_malloc(sizeof(struct xpress_decompressor));
|
||||
}
|
||||
|
||||
/*
|
||||
* xpress_decompress - Decompress a buffer of XPRESS-compressed data
|
||||
*
|
||||
* @decompressor: A decompressor that was allocated with
|
||||
* xpress_allocate_decompressor()
|
||||
* @compressed_data: The buffer of data to decompress
|
||||
* @compressed_size: Number of bytes of compressed data
|
||||
* @uncompressed_data: The buffer in which to store the decompressed data
|
||||
* @uncompressed_size: The number of bytes the data decompresses into
|
||||
*
|
||||
* Return 0 on success, or return -1 and set errno on failure.
|
||||
*/
|
||||
int xpress_decompress(struct xpress_decompressor *decompressor,
|
||||
const void *compressed_data, size_t compressed_size,
|
||||
void *uncompressed_data, size_t uncompressed_size)
|
||||
{
|
||||
struct xpress_decompressor *d = decompressor;
|
||||
const u8 * const in_begin = compressed_data;
|
||||
u8 * const out_begin = uncompressed_data;
|
||||
u8 *out_next = out_begin;
|
||||
u8 * const out_end = out_begin + uncompressed_size;
|
||||
struct input_bitstream is;
|
||||
unsigned i;
|
||||
|
||||
/* Read the Huffman codeword lengths. */
|
||||
if (compressed_size < XPRESS_NUM_SYMBOLS / 2)
|
||||
goto invalid;
|
||||
for (i = 0; i < XPRESS_NUM_SYMBOLS / 2; i++) {
|
||||
d->lens[i*2 + 0] = in_begin[i] & 0xF;
|
||||
d->lens[i*2 + 1] = in_begin[i] >> 4;
|
||||
return -1;
|
||||
for (int i = 0; i < XPRESS_NUM_SYMBOLS / 2; i++) {
|
||||
d->lens[2 * i + 0] = in_begin[i] & 0xf;
|
||||
d->lens[2 * i + 1] = in_begin[i] >> 4;
|
||||
}
|
||||
|
||||
/* Build a decoding table for the Huffman code. */
|
||||
|
@ -100,7 +108,7 @@ int xpress_decompress(struct xpress_decompressor *decompressor,
|
|||
XPRESS_TABLEBITS, d->lens,
|
||||
XPRESS_MAX_CODEWORD_LEN,
|
||||
d->working_space))
|
||||
goto invalid;
|
||||
return -1;
|
||||
|
||||
/* Decode the matches and literals. */
|
||||
|
||||
|
@ -115,7 +123,7 @@ int xpress_decompress(struct xpress_decompressor *decompressor,
|
|||
|
||||
sym = read_huffsym(&is, d->decode_table,
|
||||
XPRESS_TABLEBITS, XPRESS_MAX_CODEWORD_LEN);
|
||||
if (sym < 256) {
|
||||
if (sym < XPRESS_NUM_CHARS) {
|
||||
/* Literal */
|
||||
*out_next++ = sym;
|
||||
} else {
|
||||
|
@ -135,30 +143,26 @@ int xpress_decompress(struct xpress_decompressor *decompressor,
|
|||
}
|
||||
length += XPRESS_MIN_MATCH_LEN;
|
||||
|
||||
if (offset > (size_t)(out_next - out_begin))
|
||||
goto invalid;
|
||||
if (unlikely(lz_copy(length, offset,
|
||||
out_begin, out_next, out_end,
|
||||
XPRESS_MIN_MATCH_LEN)))
|
||||
return -1;
|
||||
|
||||
if (length > (size_t)(out_end - out_next))
|
||||
goto invalid;
|
||||
|
||||
out_next = lz_copy(out_next, length, offset, out_end,
|
||||
XPRESS_MIN_MATCH_LEN);
|
||||
out_next += length;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
|
||||
invalid:
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
* xpress_free_decompressor - Free an XPRESS decompressor
|
||||
*
|
||||
* @decompressor: A decompressor that was allocated with
|
||||
* xpress_allocate_decompressor(), or NULL.
|
||||
*/
|
||||
void xpress_free_decompressor(struct xpress_decompressor *decompressor)
|
||||
struct xpress_decompressor *
|
||||
xpress_allocate_decompressor(void)
|
||||
{
|
||||
free(decompressor);
|
||||
return aligned_malloc(sizeof(struct xpress_decompressor),
|
||||
DECODE_TABLE_ALIGNMENT);
|
||||
}
|
||||
|
||||
void
|
||||
xpress_free_decompressor(struct xpress_decompressor *d)
|
||||
{
|
||||
aligned_free(d);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue