Initial release
commit
3ddd227ee8
|
@ -0,0 +1,20 @@
|
|||
*.la
|
||||
*.lo
|
||||
*.o
|
||||
/.libs
|
||||
/Makefile
|
||||
/Makefile.in
|
||||
/aclocal.m4
|
||||
/autom4te.cache/
|
||||
/build-aux/
|
||||
/config.h
|
||||
/config.h.in
|
||||
/config.h.in~
|
||||
/config.log
|
||||
/config.status
|
||||
/configure
|
||||
/libtool
|
||||
/m4/
|
||||
/src/.deps/
|
||||
/src/.dirstamp
|
||||
/stamp-h1
|
|
@ -0,0 +1,340 @@
|
|||
GNU GENERAL PUBLIC LICENSE
|
||||
Version 2, June 1991
|
||||
|
||||
Copyright (C) 1989, 1991 Free Software Foundation, Inc.
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
Everyone is permitted to copy and distribute verbatim copies
|
||||
of this license document, but changing it is not allowed.
|
||||
|
||||
Preamble
|
||||
|
||||
The licenses for most software are designed to take away your
|
||||
freedom to share and change it. By contrast, the GNU General Public
|
||||
License is intended to guarantee your freedom to share and change free
|
||||
software--to make sure the software is free for all its users. This
|
||||
General Public License applies to most of the Free Software
|
||||
Foundation's software and to any other program whose authors commit to
|
||||
using it. (Some other Free Software Foundation software is covered by
|
||||
the GNU Library General Public License instead.) You can apply it to
|
||||
your programs, too.
|
||||
|
||||
When we speak of free software, we are referring to freedom, not
|
||||
price. Our General Public Licenses are designed to make sure that you
|
||||
have the freedom to distribute copies of free software (and charge for
|
||||
this service if you wish), that you receive source code or can get it
|
||||
if you want it, that you can change the software or use pieces of it
|
||||
in new free programs; and that you know you can do these things.
|
||||
|
||||
To protect your rights, we need to make restrictions that forbid
|
||||
anyone to deny you these rights or to ask you to surrender the rights.
|
||||
These restrictions translate to certain responsibilities for you if you
|
||||
distribute copies of the software, or if you modify it.
|
||||
|
||||
For example, if you distribute copies of such a program, whether
|
||||
gratis or for a fee, you must give the recipients all the rights that
|
||||
you have. You must make sure that they, too, receive or can get the
|
||||
source code. And you must show them these terms so they know their
|
||||
rights.
|
||||
|
||||
We protect your rights with two steps: (1) copyright the software, and
|
||||
(2) offer you this license which gives you legal permission to copy,
|
||||
distribute and/or modify the software.
|
||||
|
||||
Also, for each author's protection and ours, we want to make certain
|
||||
that everyone understands that there is no warranty for this free
|
||||
software. If the software is modified by someone else and passed on, we
|
||||
want its recipients to know that what they have is not the original, so
|
||||
that any problems introduced by others will not reflect on the original
|
||||
authors' reputations.
|
||||
|
||||
Finally, any free program is threatened constantly by software
|
||||
patents. We wish to avoid the danger that redistributors of a free
|
||||
program will individually obtain patent licenses, in effect making the
|
||||
program proprietary. To prevent this, we have made it clear that any
|
||||
patent must be licensed for everyone's free use or not licensed at all.
|
||||
|
||||
The precise terms and conditions for copying, distribution and
|
||||
modification follow.
|
||||
|
||||
GNU GENERAL PUBLIC LICENSE
|
||||
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
|
||||
|
||||
0. This License applies to any program or other work which contains
|
||||
a notice placed by the copyright holder saying it may be distributed
|
||||
under the terms of this General Public License. The "Program", below,
|
||||
refers to any such program or work, and a "work based on the Program"
|
||||
means either the Program or any derivative work under copyright law:
|
||||
that is to say, a work containing the Program or a portion of it,
|
||||
either verbatim or with modifications and/or translated into another
|
||||
language. (Hereinafter, translation is included without limitation in
|
||||
the term "modification".) Each licensee is addressed as "you".
|
||||
|
||||
Activities other than copying, distribution and modification are not
|
||||
covered by this License; they are outside its scope. The act of
|
||||
running the Program is not restricted, and the output from the Program
|
||||
is covered only if its contents constitute a work based on the
|
||||
Program (independent of having been made by running the Program).
|
||||
Whether that is true depends on what the Program does.
|
||||
|
||||
1. You may copy and distribute verbatim copies of the Program's
|
||||
source code as you receive it, in any medium, provided that you
|
||||
conspicuously and appropriately publish on each copy an appropriate
|
||||
copyright notice and disclaimer of warranty; keep intact all the
|
||||
notices that refer to this License and to the absence of any warranty;
|
||||
and give any other recipients of the Program a copy of this License
|
||||
along with the Program.
|
||||
|
||||
You may charge a fee for the physical act of transferring a copy, and
|
||||
you may at your option offer warranty protection in exchange for a fee.
|
||||
|
||||
2. You may modify your copy or copies of the Program or any portion
|
||||
of it, thus forming a work based on the Program, and copy and
|
||||
distribute such modifications or work under the terms of Section 1
|
||||
above, provided that you also meet all of these conditions:
|
||||
|
||||
a) You must cause the modified files to carry prominent notices
|
||||
stating that you changed the files and the date of any change.
|
||||
|
||||
b) You must cause any work that you distribute or publish, that in
|
||||
whole or in part contains or is derived from the Program or any
|
||||
part thereof, to be licensed as a whole at no charge to all third
|
||||
parties under the terms of this License.
|
||||
|
||||
c) If the modified program normally reads commands interactively
|
||||
when run, you must cause it, when started running for such
|
||||
interactive use in the most ordinary way, to print or display an
|
||||
announcement including an appropriate copyright notice and a
|
||||
notice that there is no warranty (or else, saying that you provide
|
||||
a warranty) and that users may redistribute the program under
|
||||
these conditions, and telling the user how to view a copy of this
|
||||
License. (Exception: if the Program itself is interactive but
|
||||
does not normally print such an announcement, your work based on
|
||||
the Program is not required to print an announcement.)
|
||||
|
||||
These requirements apply to the modified work as a whole. If
|
||||
identifiable sections of that work are not derived from the Program,
|
||||
and can be reasonably considered independent and separate works in
|
||||
themselves, then this License, and its terms, do not apply to those
|
||||
sections when you distribute them as separate works. But when you
|
||||
distribute the same sections as part of a whole which is a work based
|
||||
on the Program, the distribution of the whole must be on the terms of
|
||||
this License, whose permissions for other licensees extend to the
|
||||
entire whole, and thus to each and every part regardless of who wrote it.
|
||||
|
||||
Thus, it is not the intent of this section to claim rights or contest
|
||||
your rights to work written entirely by you; rather, the intent is to
|
||||
exercise the right to control the distribution of derivative or
|
||||
collective works based on the Program.
|
||||
|
||||
In addition, mere aggregation of another work not based on the Program
|
||||
with the Program (or with a work based on the Program) on a volume of
|
||||
a storage or distribution medium does not bring the other work under
|
||||
the scope of this License.
|
||||
|
||||
3. You may copy and distribute the Program (or a work based on it,
|
||||
under Section 2) in object code or executable form under the terms of
|
||||
Sections 1 and 2 above provided that you also do one of the following:
|
||||
|
||||
a) Accompany it with the complete corresponding machine-readable
|
||||
source code, which must be distributed under the terms of Sections
|
||||
1 and 2 above on a medium customarily used for software interchange; or,
|
||||
|
||||
b) Accompany it with a written offer, valid for at least three
|
||||
years, to give any third party, for a charge no more than your
|
||||
cost of physically performing source distribution, a complete
|
||||
machine-readable copy of the corresponding source code, to be
|
||||
distributed under the terms of Sections 1 and 2 above on a medium
|
||||
customarily used for software interchange; or,
|
||||
|
||||
c) Accompany it with the information you received as to the offer
|
||||
to distribute corresponding source code. (This alternative is
|
||||
allowed only for noncommercial distribution and only if you
|
||||
received the program in object code or executable form with such
|
||||
an offer, in accord with Subsection b above.)
|
||||
|
||||
The source code for a work means the preferred form of the work for
|
||||
making modifications to it. For an executable work, complete source
|
||||
code means all the source code for all modules it contains, plus any
|
||||
associated interface definition files, plus the scripts used to
|
||||
control compilation and installation of the executable. However, as a
|
||||
special exception, the source code distributed need not include
|
||||
anything that is normally distributed (in either source or binary
|
||||
form) with the major components (compiler, kernel, and so on) of the
|
||||
operating system on which the executable runs, unless that component
|
||||
itself accompanies the executable.
|
||||
|
||||
If distribution of executable or object code is made by offering
|
||||
access to copy from a designated place, then offering equivalent
|
||||
access to copy the source code from the same place counts as
|
||||
distribution of the source code, even though third parties are not
|
||||
compelled to copy the source along with the object code.
|
||||
|
||||
4. You may not copy, modify, sublicense, or distribute the Program
|
||||
except as expressly provided under this License. Any attempt
|
||||
otherwise to copy, modify, sublicense or distribute the Program is
|
||||
void, and will automatically terminate your rights under this License.
|
||||
However, parties who have received copies, or rights, from you under
|
||||
this License will not have their licenses terminated so long as such
|
||||
parties remain in full compliance.
|
||||
|
||||
5. You are not required to accept this License, since you have not
|
||||
signed it. However, nothing else grants you permission to modify or
|
||||
distribute the Program or its derivative works. These actions are
|
||||
prohibited by law if you do not accept this License. Therefore, by
|
||||
modifying or distributing the Program (or any work based on the
|
||||
Program), you indicate your acceptance of this License to do so, and
|
||||
all its terms and conditions for copying, distributing or modifying
|
||||
the Program or works based on it.
|
||||
|
||||
6. Each time you redistribute the Program (or any work based on the
|
||||
Program), the recipient automatically receives a license from the
|
||||
original licensor to copy, distribute or modify the Program subject to
|
||||
these terms and conditions. You may not impose any further
|
||||
restrictions on the recipients' exercise of the rights granted herein.
|
||||
You are not responsible for enforcing compliance by third parties to
|
||||
this License.
|
||||
|
||||
7. If, as a consequence of a court judgment or allegation of patent
|
||||
infringement or for any other reason (not limited to patent issues),
|
||||
conditions are imposed on you (whether by court order, agreement or
|
||||
otherwise) that contradict the conditions of this License, they do not
|
||||
excuse you from the conditions of this License. If you cannot
|
||||
distribute so as to satisfy simultaneously your obligations under this
|
||||
License and any other pertinent obligations, then as a consequence you
|
||||
may not distribute the Program at all. For example, if a patent
|
||||
license would not permit royalty-free redistribution of the Program by
|
||||
all those who receive copies directly or indirectly through you, then
|
||||
the only way you could satisfy both it and this License would be to
|
||||
refrain entirely from distribution of the Program.
|
||||
|
||||
If any portion of this section is held invalid or unenforceable under
|
||||
any particular circumstance, the balance of the section is intended to
|
||||
apply and the section as a whole is intended to apply in other
|
||||
circumstances.
|
||||
|
||||
It is not the purpose of this section to induce you to infringe any
|
||||
patents or other property right claims or to contest validity of any
|
||||
such claims; this section has the sole purpose of protecting the
|
||||
integrity of the free software distribution system, which is
|
||||
implemented by public license practices. Many people have made
|
||||
generous contributions to the wide range of software distributed
|
||||
through that system in reliance on consistent application of that
|
||||
system; it is up to the author/donor to decide if he or she is willing
|
||||
to distribute software through any other system and a licensee cannot
|
||||
impose that choice.
|
||||
|
||||
This section is intended to make thoroughly clear what is believed to
|
||||
be a consequence of the rest of this License.
|
||||
|
||||
8. If the distribution and/or use of the Program is restricted in
|
||||
certain countries either by patents or by copyrighted interfaces, the
|
||||
original copyright holder who places the Program under this License
|
||||
may add an explicit geographical distribution limitation excluding
|
||||
those countries, so that distribution is permitted only in or among
|
||||
countries not thus excluded. In such case, this License incorporates
|
||||
the limitation as if written in the body of this License.
|
||||
|
||||
9. The Free Software Foundation may publish revised and/or new versions
|
||||
of the General Public License from time to time. Such new versions will
|
||||
be similar in spirit to the present version, but may differ in detail to
|
||||
address new problems or concerns.
|
||||
|
||||
Each version is given a distinguishing version number. If the Program
|
||||
specifies a version number of this License which applies to it and "any
|
||||
later version", you have the option of following the terms and conditions
|
||||
either of that version or of any later version published by the Free
|
||||
Software Foundation. If the Program does not specify a version number of
|
||||
this License, you may choose any version ever published by the Free Software
|
||||
Foundation.
|
||||
|
||||
10. If you wish to incorporate parts of the Program into other free
|
||||
programs whose distribution conditions are different, write to the author
|
||||
to ask for permission. For software which is copyrighted by the Free
|
||||
Software Foundation, write to the Free Software Foundation; we sometimes
|
||||
make exceptions for this. Our decision will be guided by the two goals
|
||||
of preserving the free status of all derivatives of our free software and
|
||||
of promoting the sharing and reuse of software generally.
|
||||
|
||||
NO WARRANTY
|
||||
|
||||
11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
|
||||
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
|
||||
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
|
||||
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
|
||||
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
|
||||
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
|
||||
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
|
||||
REPAIR OR CORRECTION.
|
||||
|
||||
12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
|
||||
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
|
||||
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
|
||||
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
|
||||
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
|
||||
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
|
||||
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
|
||||
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGES.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
How to Apply These Terms to Your New Programs
|
||||
|
||||
If you develop a new program, and you want it to be of the greatest
|
||||
possible use to the public, the best way to achieve this is to make it
|
||||
free software which everyone can redistribute and change under these terms.
|
||||
|
||||
To do so, attach the following notices to the program. It is safest
|
||||
to attach them to the start of each source file to most effectively
|
||||
convey the exclusion of warranty; and each file should have at least
|
||||
the "copyright" line and a pointer to where the full notice is found.
|
||||
|
||||
<one line to give the program's name and a brief idea of what it does.>
|
||||
Copyright (C) <year> <name of author>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
|
||||
Also add information on how to contact you by electronic and paper mail.
|
||||
|
||||
If the program is interactive, make it output a short notice like this
|
||||
when it starts in an interactive mode:
|
||||
|
||||
Gnomovision version 69, Copyright (C) year name of author
|
||||
Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
|
||||
This is free software, and you are welcome to redistribute it
|
||||
under certain conditions; type `show c' for details.
|
||||
|
||||
The hypothetical commands `show w' and `show c' should show the appropriate
|
||||
parts of the General Public License. Of course, the commands you use may
|
||||
be called something other than `show w' and `show c'; they could even be
|
||||
mouse-clicks or menu items--whatever suits your program.
|
||||
|
||||
You should also get your employer (if you work as a programmer) or your
|
||||
school, if any, to sign a "copyright disclaimer" for the program, if
|
||||
necessary. Here is a sample; alter the names:
|
||||
|
||||
Yoyodyne, Inc., hereby disclaims all copyright interest in the program
|
||||
`Gnomovision' (which makes passes at compilers) written by James Hacker.
|
||||
|
||||
<signature of Ty Coon>, 1 April 1989
|
||||
Ty Coon, President of Vice
|
||||
|
||||
This General Public License does not permit incorporating your program into
|
||||
proprietary programs. If your program is a subroutine library, you may
|
||||
consider it more useful to permit linking proprietary applications with the
|
||||
library. If this is what you want to do, use the GNU Library General
|
||||
Public License instead of this License.
|
|
@ -0,0 +1,21 @@
|
|||
ACLOCAL_AMFLAGS = -I m4
|
||||
|
||||
EXTRA_DIST = README.md COPYING
|
||||
|
||||
plugindir = $(libdir)/ntfs-3g
|
||||
|
||||
plugin_LTLIBRARIES = ntfs-plugin-80000017.la
|
||||
|
||||
ntfs_plugin_80000017_la_SOURCES = \
|
||||
src/decompress_common.c \
|
||||
src/decompress_common.h \
|
||||
src/lzx_decompress.c \
|
||||
src/plugin.c \
|
||||
src/system_compression.c \
|
||||
src/system_compression.h \
|
||||
src/xpress_decompress.c
|
||||
|
||||
ntfs_plugin_80000017_la_LDFLAGS = -module -shared -avoid-version
|
||||
ntfs_plugin_80000017_la_CPPFLAGS = -D_FILE_OFFSET_BITS=64
|
||||
ntfs_plugin_80000017_la_CFLAGS = $(LIBNTFS_3G_CFLAGS)
|
||||
ntfs_plugin_80000017_la_LIBADD = $(LIBNTFS_3G_LIBS)
|
|
@ -0,0 +1,42 @@
|
|||
# Overview
|
||||
|
||||
System compression, also known as "Compact OS", is a Windows feature that allows
|
||||
rarely modified files to be compressed using the XPRESS or LZX compression
|
||||
formats. It is not built directly into NTFS but rather is implemented using
|
||||
reparse points. This feature appeared in Windows 10 and it appears that many
|
||||
Windows 10 systems have been using it by default.
|
||||
|
||||
This repository contains a plugin which enables the NTFS-3G FUSE driver to
|
||||
transparently read from system-compressed files. It must be built against
|
||||
NTFS-3G version 2016.2.22AR.1 or later, since that was the first version to
|
||||
include support for reparse point plugins.
|
||||
|
||||
Currently, only reading is supported. Compressing an existing file may be done
|
||||
by using the "compact" utility on Windows, with one of the options below
|
||||
("xpress4k" is the weakest and fastest, "lzx" is the strongest and slowest):
|
||||
|
||||
/exe:xpress4k
|
||||
/exe:xpress8k
|
||||
/exe:xpress16k
|
||||
/exe:lzx
|
||||
|
||||
# Installation
|
||||
|
||||
The plugin can be built by running `./configure && make`. The build system must
|
||||
be able to find the NTFS-3G library and headers. On some platforms this may
|
||||
require that the "ntfs-3g-dev" package or similar be installed in addition to
|
||||
the main "ntfs-3g" package.
|
||||
|
||||
After compiling, run `make install` to install the plugin to the NTFS-3G plugin
|
||||
directory, which will be a subdirectory "ntfs-3g" of the system library
|
||||
directory (`$libdir`). An example full path to the installed plugin is
|
||||
`/usr/lib/ntfs-3g/ntfs-plugin-80000017.so`. It may differ slightly on different
|
||||
platforms. `make install` will create the plugin directory if it does not
|
||||
already exist.
|
||||
|
||||
# License
|
||||
|
||||
This software may be redistributed and/or modified under the terms of the GNU
|
||||
General Public License as published by the Free Software Foundation, either
|
||||
version 2 of the License, or (at your option) any later version. There is NO
|
||||
WARRANY, to the extent permitted by law. See the COPYING file for details.
|
|
@ -0,0 +1,29 @@
|
|||
AC_INIT([ntfs-3g-system-compression], [0.1], [ebiggers3@gmail.com])
|
||||
|
||||
AC_CONFIG_SRCDIR([src/plugin.c])
|
||||
AC_CONFIG_MACRO_DIR([m4])
|
||||
AC_CONFIG_AUX_DIR([build-aux])
|
||||
AM_INIT_AUTOMAKE([-Wall -Werror subdir-objects foreign])
|
||||
AM_SILENT_RULES([yes])
|
||||
m4_ifdef([AM_PROG_AR], [AM_PROG_AR])
|
||||
|
||||
AC_PROG_CC
|
||||
AC_C_BIGENDIAN
|
||||
|
||||
LT_INIT([dlopen])
|
||||
|
||||
AC_CONFIG_HEADERS([config.h])
|
||||
AC_CONFIG_FILES([Makefile])
|
||||
|
||||
AC_CHECK_HEADERS([errno.h \
|
||||
limits.h \
|
||||
stdarg.h \
|
||||
stddef.h \
|
||||
stdlib.h \
|
||||
string.h \
|
||||
sys/types.h])
|
||||
|
||||
# TODO: should be changed to require 2017.#.## when released
|
||||
PKG_CHECK_MODULES([LIBNTFS_3G], [libntfs-3g >= 2016.2.22], [],
|
||||
[AC_MSG_ERROR(["Unable to find libntfs-3g"])])
|
||||
AC_OUTPUT
|
|
@ -0,0 +1,325 @@
|
|||
/*
|
||||
* decompress_common.c - Code shared by the XPRESS and LZX decompressors
|
||||
*
|
||||
* Copyright (C) 2015 Eric Biggers
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify it under
|
||||
* the terms of the GNU General Public License as published by the Free Software
|
||||
* Foundation, either version 2 of the License, or (at your option) any later
|
||||
* version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along with
|
||||
* this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "decompress_common.h"
|
||||
|
||||
/*
|
||||
* make_huffman_decode_table() -
|
||||
*
|
||||
* Build a decoding table for a canonical prefix code, or "Huffman code".
|
||||
*
|
||||
* This is an internal function, not part of the library API!
|
||||
*
|
||||
* This takes as input the length of the codeword for each symbol in the
|
||||
* alphabet and produces as output a table that can be used for fast
|
||||
* decoding of prefix-encoded symbols using read_huffsym().
|
||||
*
|
||||
* Strictly speaking, a canonical prefix code might not be a Huffman
|
||||
* code. But this algorithm will work either way; and in fact, since
|
||||
* Huffman codes are defined in terms of symbol frequencies, there is no
|
||||
* way for the decompressor to know whether the code is a true Huffman
|
||||
* code or not until all symbols have been decoded.
|
||||
*
|
||||
* Because the prefix code is assumed to be "canonical", it can be
|
||||
* reconstructed directly from the codeword lengths. A prefix code is
|
||||
* canonical if and only if a longer codeword never lexicographically
|
||||
* precedes a shorter codeword, and the lexicographic ordering of
|
||||
* codewords of the same length is the same as the lexicographic ordering
|
||||
* of the corresponding symbols. Consequently, we can sort the symbols
|
||||
* primarily by codeword length and secondarily by symbol value, then
|
||||
* reconstruct the prefix code by generating codewords lexicographically
|
||||
* in that order.
|
||||
*
|
||||
* This function does not, however, generate the prefix code explicitly.
|
||||
* Instead, it directly builds a table for decoding symbols using the
|
||||
* code. The basic idea is this: given the next 'max_codeword_len' bits
|
||||
* in the input, we can look up the decoded symbol by indexing a table
|
||||
* containing 2**max_codeword_len entries. A codeword with length
|
||||
* 'max_codeword_len' will have exactly one entry in this table, whereas
|
||||
* a codeword shorter than 'max_codeword_len' will have multiple entries
|
||||
* in this table. Precisely, a codeword of length n will be represented
|
||||
* by 2**(max_codeword_len - n) entries in this table. The 0-based index
|
||||
* of each such entry will contain the corresponding codeword as a prefix
|
||||
* when zero-padded on the left to 'max_codeword_len' binary digits.
|
||||
*
|
||||
* That's the basic idea, but we implement two optimizations regarding
|
||||
* the format of the decode table itself:
|
||||
*
|
||||
* - For many compression formats, the maximum codeword length is too
|
||||
* long for it to be efficient to build the full decoding table
|
||||
* whenever a new prefix code is used. Instead, we can build the table
|
||||
* using only 2**table_bits entries, where 'table_bits' is some number
|
||||
* less than or equal to 'max_codeword_len'. Then, only codewords of
|
||||
* length 'table_bits' and shorter can be directly looked up. For
|
||||
* longer codewords, the direct lookup instead produces the root of a
|
||||
* binary tree. Using this tree, the decoder can do traditional
|
||||
* bit-by-bit decoding of the remainder of the codeword. Child nodes
|
||||
* are allocated in extra entries at the end of the table; leaf nodes
|
||||
* contain symbols. Note that the long-codeword case is, in general,
|
||||
* not performance critical, since in Huffman codes the most frequently
|
||||
* used symbols are assigned the shortest codeword lengths.
|
||||
*
|
||||
* - When we decode a symbol using a direct lookup of the table, we still
|
||||
* need to know its length so that the bitstream can be advanced by the
|
||||
* appropriate number of bits. The simple solution is to simply retain
|
||||
* the 'lens' array and use the decoded symbol as an index into it.
|
||||
* However, this requires two separate array accesses in the fast path.
|
||||
* The optimization is to store the length directly in the decode
|
||||
* table. We use the bottom 11 bits for the symbol and the top 5 bits
|
||||
* for the length. In addition, to combine this optimization with the
|
||||
* previous one, we introduce a special case where the top 2 bits of
|
||||
* the length are both set if the entry is actually the root of a
|
||||
* binary tree.
|
||||
*
|
||||
* @decode_table:
|
||||
* The array in which to create the decoding table. This must have
|
||||
* a length of at least ((2**table_bits) + 2 * num_syms) entries.
|
||||
*
|
||||
* @num_syms:
|
||||
* The number of symbols in the alphabet; also, the length of the
|
||||
* 'lens' array. Must be less than or equal to 2048.
|
||||
*
|
||||
* @table_bits:
|
||||
* The order of the decode table size, as explained above. Must be
|
||||
* less than or equal to 13.
|
||||
*
|
||||
* @lens:
|
||||
* An array of length @num_syms, indexable by symbol, that gives the
|
||||
* length of the codeword, in bits, for that symbol. The length can
|
||||
* be 0, which means that the symbol does not have a codeword
|
||||
* assigned.
|
||||
*
|
||||
* @max_codeword_len:
|
||||
* The longest codeword length allowed in the compression format.
|
||||
* All entries in 'lens' must be less than or equal to this value.
|
||||
* This must be less than or equal to 23.
|
||||
*
|
||||
* @working_space
|
||||
* A temporary array of length '2 * (max_codeword_len + 1) +
|
||||
* num_syms'.
|
||||
*
|
||||
* Returns 0 on success, or -1 if the lengths do not form a valid prefix
|
||||
* code.
|
||||
*/
|
||||
int make_huffman_decode_table(u16 decode_table[], const unsigned num_syms,
|
||||
const unsigned table_bits, const u8 lens[],
|
||||
const unsigned max_codeword_len,
|
||||
u16 working_space[])
|
||||
{
|
||||
const unsigned table_num_entries = 1 << table_bits;
|
||||
u16 * const len_counts = &working_space[0];
|
||||
u16 * const offsets = &working_space[1 * (max_codeword_len + 1)];
|
||||
u16 * const sorted_syms = &working_space[2 * (max_codeword_len + 1)];
|
||||
int left;
|
||||
void *decode_table_ptr;
|
||||
unsigned sym_idx;
|
||||
unsigned codeword_len;
|
||||
unsigned stores_per_loop;
|
||||
unsigned decode_table_pos;
|
||||
unsigned len;
|
||||
unsigned sym;
|
||||
|
||||
/* Count how many symbols have each possible codeword length.
|
||||
* Note that a length of 0 indicates the corresponding symbol is not
|
||||
* used in the code and therefore does not have a codeword. */
|
||||
for (len = 0; len <= max_codeword_len; len++)
|
||||
len_counts[len] = 0;
|
||||
for (sym = 0; sym < num_syms; sym++)
|
||||
len_counts[lens[sym]]++;
|
||||
|
||||
/* We can assume all lengths are <= max_codeword_len, but we
|
||||
* cannot assume they form a valid prefix code. A codeword of
|
||||
* length n should require a proportion of the codespace equaling
|
||||
* (1/2)^n. The code is valid if and only if the codespace is
|
||||
* exactly filled by the lengths, by this measure. */
|
||||
left = 1;
|
||||
for (len = 1; len <= max_codeword_len; len++) {
|
||||
left <<= 1;
|
||||
left -= len_counts[len];
|
||||
if (left < 0) {
|
||||
/* The lengths overflow the codespace; that is, the code
|
||||
* is over-subscribed. */
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
if (left != 0) {
|
||||
/* The lengths do not fill the codespace; that is, they form an
|
||||
* incomplete set. */
|
||||
if (left == (1 << max_codeword_len)) {
|
||||
/* The code is completely empty. This is arguably
|
||||
* invalid, but in fact it is valid in LZX and XPRESS,
|
||||
* so we must allow it. By definition, no symbols can
|
||||
* be decoded with an empty code. Consequently, we
|
||||
* technically don't even need to fill in the decode
|
||||
* table. However, to avoid accessing uninitialized
|
||||
* memory if the algorithm nevertheless attempts to
|
||||
* decode symbols using such a code, we zero out the
|
||||
* decode table. */
|
||||
memset(decode_table, 0,
|
||||
table_num_entries * sizeof(decode_table[0]));
|
||||
return 0;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Sort the symbols primarily by length and secondarily by symbol order.
|
||||
*/
|
||||
|
||||
/* Initialize 'offsets' so that offsets[len] for 1 <= len <=
|
||||
* max_codeword_len is the number of codewords shorter than 'len' bits.
|
||||
*/
|
||||
offsets[1] = 0;
|
||||
for (len = 1; len < max_codeword_len; len++)
|
||||
offsets[len + 1] = offsets[len] + len_counts[len];
|
||||
|
||||
/* Use the 'offsets' array to sort the symbols. Note that we do not
|
||||
* include symbols that are not used in the code. Consequently, fewer
|
||||
* than 'num_syms' entries in 'sorted_syms' may be filled. */
|
||||
for (sym = 0; sym < num_syms; sym++)
|
||||
if (lens[sym] != 0)
|
||||
sorted_syms[offsets[lens[sym]]++] = sym;
|
||||
|
||||
/* Fill entries for codewords with length <= table_bits
|
||||
* --- that is, those short enough for a direct mapping.
|
||||
*
|
||||
* The table will start with entries for the shortest codeword(s), which
|
||||
* have the most entries. From there, the number of entries per
|
||||
* codeword will decrease. */
|
||||
decode_table_ptr = decode_table;
|
||||
sym_idx = 0;
|
||||
codeword_len = 1;
|
||||
stores_per_loop = (1 << (table_bits - codeword_len));
|
||||
for (; stores_per_loop != 0; codeword_len++, stores_per_loop >>= 1) {
|
||||
unsigned end_sym_idx = sym_idx + len_counts[codeword_len];
|
||||
for (; sym_idx < end_sym_idx; sym_idx++) {
|
||||
u16 entry;
|
||||
u16 *p;
|
||||
unsigned n;
|
||||
|
||||
entry = ((u32)codeword_len << 11) | sorted_syms[sym_idx];
|
||||
p = (u16*)decode_table_ptr;
|
||||
n = stores_per_loop;
|
||||
|
||||
do {
|
||||
*p++ = entry;
|
||||
} while (--n);
|
||||
|
||||
decode_table_ptr = p;
|
||||
}
|
||||
}
|
||||
|
||||
/* If we've filled in the entire table, we are done. Otherwise,
|
||||
* there are codewords longer than table_bits for which we must
|
||||
* generate binary trees. */
|
||||
|
||||
decode_table_pos = (u16*)decode_table_ptr - decode_table;
|
||||
if (decode_table_pos != table_num_entries) {
|
||||
unsigned j;
|
||||
unsigned next_free_tree_slot;
|
||||
unsigned cur_codeword;
|
||||
|
||||
/* First, zero out the remaining entries. This is
|
||||
* necessary so that these entries appear as
|
||||
* "unallocated" in the next part. Each of these entries
|
||||
* will eventually be filled with the representation of
|
||||
* the root node of a binary tree. */
|
||||
j = decode_table_pos;
|
||||
do {
|
||||
decode_table[j] = 0;
|
||||
} while (++j != table_num_entries);
|
||||
|
||||
/* We allocate child nodes starting at the end of the
|
||||
* direct lookup table. Note that there should be
|
||||
* 2*num_syms extra entries for this purpose, although
|
||||
* fewer than this may actually be needed. */
|
||||
next_free_tree_slot = table_num_entries;
|
||||
|
||||
/* Iterate through each codeword with length greater than
|
||||
* 'table_bits', primarily in order of codeword length
|
||||
* and secondarily in order of symbol. */
|
||||
for (cur_codeword = decode_table_pos << 1;
|
||||
codeword_len <= max_codeword_len;
|
||||
codeword_len++, cur_codeword <<= 1)
|
||||
{
|
||||
unsigned end_sym_idx = sym_idx + len_counts[codeword_len];
|
||||
for (; sym_idx < end_sym_idx; sym_idx++, cur_codeword++)
|
||||
{
|
||||
/* 'sorted_sym' is the symbol represented by the
|
||||
* codeword. */
|
||||
unsigned sorted_sym = sorted_syms[sym_idx];
|
||||
|
||||
unsigned extra_bits = codeword_len - table_bits;
|
||||
|
||||
unsigned node_idx = cur_codeword >> extra_bits;
|
||||
|
||||
/* Go through each bit of the current codeword
|
||||
* beyond the prefix of length @table_bits and
|
||||
* walk the appropriate binary tree, allocating
|
||||
* any slots that have not yet been allocated.
|
||||
*
|
||||
* Note that the 'pointer' entry to the binary
|
||||
* tree, which is stored in the direct lookup
|
||||
* portion of the table, is represented
|
||||
* identically to other internal (non-leaf)
|
||||
* nodes of the binary tree; it can be thought
|
||||
* of as simply the root of the tree. The
|
||||
* representation of these internal nodes is
|
||||
* simply the index of the left child combined
|
||||
* with the special bits 0xC000 to distingush
|
||||
* the entry from direct mapping and leaf node
|
||||
* entries. */
|
||||
do {
|
||||
|
||||
/* At least one bit remains in the
|
||||
* codeword, but the current node is an
|
||||
* unallocated leaf. Change it to an
|
||||
* internal node. */
|
||||
if (decode_table[node_idx] == 0) {
|
||||
decode_table[node_idx] =
|
||||
next_free_tree_slot | 0xC000;
|
||||
decode_table[next_free_tree_slot++] = 0;
|
||||
decode_table[next_free_tree_slot++] = 0;
|
||||
}
|
||||
|
||||
/* Go to the left child if the next bit
|
||||
* in the codeword is 0; otherwise go to
|
||||
* the right child. */
|
||||
node_idx = decode_table[node_idx] & 0x3FFF;
|
||||
--extra_bits;
|
||||
node_idx += (cur_codeword >> extra_bits) & 1;
|
||||
} while (extra_bits != 0);
|
||||
|
||||
/* We've traversed the tree using the entire
|
||||
* codeword, and we're now at the entry where
|
||||
* the actual symbol will be stored. This is
|
||||
* distinguished from internal nodes by not
|
||||
* having its high two bits set. */
|
||||
decode_table[node_idx] = sorted_sym;
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,375 @@
|
|||
/*
|
||||
* decompress_common.h - Code shared by the XPRESS and LZX decompressors
|
||||
*
|
||||
* Copyright (C) 2015 Eric Biggers
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify it under
|
||||
* the terms of the GNU General Public License as published by the Free Software
|
||||
* Foundation, either version 2 of the License, or (at your option) any later
|
||||
* version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along with
|
||||
* this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <ntfs-3g/endians.h>
|
||||
#include <ntfs-3g/types.h>
|
||||
|
||||
/* "Force inline" macro (not required, but helpful for performance) */
|
||||
#ifdef __GNUC__
|
||||
# define forceinline inline __attribute__((always_inline))
|
||||
#else
|
||||
# define forceinline inline
|
||||
#endif
|
||||
|
||||
/* Enable whole-word match copying on selected architectures */
|
||||
#if defined(__i386__) || defined(__x86_64__) || defined(__ARM_FEATURE_UNALIGNED)
|
||||
# define FAST_UNALIGNED_ACCESS
|
||||
#endif
|
||||
|
||||
/* Size of a machine word */
|
||||
#define WORDBYTES (sizeof(size_t))
|
||||
|
||||
/* Inline functions to read and write unaligned data.
|
||||
* We use just memcpy() for this. It is standard and modern compilers will
|
||||
* usually replace it with load/store instructions. */
|
||||
|
||||
static forceinline u16 get_unaligned_le16(const u8 *p)
|
||||
{
|
||||
le16 v_le;
|
||||
memcpy(&v_le, p, 2);
|
||||
return le16_to_cpu(v_le);
|
||||
}
|
||||
|
||||
static forceinline u32 get_unaligned_le32(const u8 *p)
|
||||
{
|
||||
le32 v_le;
|
||||
memcpy(&v_le, p, 4);
|
||||
return le32_to_cpu(v_le);
|
||||
}
|
||||
|
||||
static forceinline void put_unaligned_le32(u32 v, u8 *p)
|
||||
{
|
||||
le32 v_le = cpu_to_le32(v);
|
||||
memcpy(p, &v_le, 4);
|
||||
}
|
||||
|
||||
/* Load a "word" with platform-dependent size and endianness. */
|
||||
static forceinline size_t get_unaligned_word(const u8 *p)
|
||||
{
|
||||
size_t v;
|
||||
memcpy(&v, p, WORDBYTES);
|
||||
return v;
|
||||
}
|
||||
|
||||
/* Store a "word" with platform-dependent size and endianness. */
|
||||
static forceinline void put_unaligned_word(size_t v, u8 *p)
|
||||
{
|
||||
memcpy(p, &v, WORDBYTES);
|
||||
}
|
||||
|
||||
/* Copy a "word" with platform-dependent size. */
|
||||
static forceinline void copy_unaligned_word(const u8 *src, u8 *dst)
|
||||
{
|
||||
put_unaligned_word(get_unaligned_word(src), dst);
|
||||
}
|
||||
|
||||
/* Generate a "word" with platform-dependent size whose bytes all contain the
|
||||
* value 'b'. */
|
||||
static forceinline size_t repeat_byte(u8 b)
|
||||
{
|
||||
size_t v;
|
||||
|
||||
v = b;
|
||||
v |= v << 8;
|
||||
v |= v << 16;
|
||||
v |= v << ((WORDBYTES == 8) ? 32 : 0);
|
||||
return v;
|
||||
}
|
||||
|
||||
/* Structure that encapsulates a block of in-memory data being interpreted as a
|
||||
* stream of bits, optionally with interwoven literal bytes. Bits are assumed
|
||||
* to be stored in little endian 16-bit coding units, with the bits ordered high
|
||||
* to low. */
|
||||
struct input_bitstream {
|
||||
|
||||
/* Bits that have been read from the input buffer. The bits are
|
||||
* left-justified; the next bit is always bit 31. */
|
||||
u32 bitbuf;
|
||||
|
||||
/* Number of bits currently held in @bitbuf. */
|
||||
unsigned bitsleft;
|
||||
|
||||
/* Pointer to the next byte to be retrieved from the input buffer. */
|
||||
const u8 *next;
|
||||
|
||||
/* Pointer to just past the end of the input buffer. */
|
||||
const u8 *end;
|
||||
};
|
||||
|
||||
/* Initialize a bitstream to read from the specified input buffer. */
|
||||
static forceinline void init_input_bitstream(struct input_bitstream *is,
|
||||
const void *buffer, u32 size)
|
||||
{
|
||||
is->bitbuf = 0;
|
||||
is->bitsleft = 0;
|
||||
is->next = buffer;
|
||||
is->end = is->next + size;
|
||||
}
|
||||
|
||||
/* Ensure the bit buffer variable for the bitstream contains at least @num_bits
|
||||
* bits. Following this, bitstream_peek_bits() and/or bitstream_remove_bits()
|
||||
* may be called on the bitstream to peek or remove up to @num_bits bits. Note
|
||||
* that @num_bits must be <= 16. */
|
||||
static forceinline void bitstream_ensure_bits(struct input_bitstream *is,
|
||||
unsigned num_bits)
|
||||
{
|
||||
if (is->bitsleft < num_bits) {
|
||||
if (is->end - is->next >= 2) {
|
||||
is->bitbuf |= (u32)get_unaligned_le16(is->next)
|
||||
<< (16 - is->bitsleft);
|
||||
is->next += 2;
|
||||
}
|
||||
is->bitsleft += 16;
|
||||
}
|
||||
}
|
||||
|
||||
/* Return the next @num_bits bits from the bitstream, without removing them.
|
||||
* There must be at least @num_bits remaining in the buffer variable, from a
|
||||
* previous call to bitstream_ensure_bits(). */
|
||||
static forceinline u32 bitstream_peek_bits(const struct input_bitstream *is,
|
||||
unsigned num_bits)
|
||||
{
|
||||
if (num_bits == 0)
|
||||
return 0;
|
||||
return is->bitbuf >> (32 - num_bits);
|
||||
}
|
||||
|
||||
/* Remove @num_bits from the bitstream. There must be at least @num_bits
|
||||
* remaining in the buffer variable, from a previous call to
|
||||
* bitstream_ensure_bits(). */
|
||||
static forceinline void bitstream_remove_bits(struct input_bitstream *is,
|
||||
unsigned num_bits)
|
||||
{
|
||||
is->bitbuf <<= num_bits;
|
||||
is->bitsleft -= num_bits;
|
||||
}
|
||||
|
||||
/* Remove and return @num_bits bits from the bitstream. There must be at least
|
||||
* @num_bits remaining in the buffer variable, from a previous call to
|
||||
* bitstream_ensure_bits(). */
|
||||
static forceinline u32 bitstream_pop_bits(struct input_bitstream *is,
|
||||
unsigned num_bits)
|
||||
{
|
||||
u32 bits = bitstream_peek_bits(is, num_bits);
|
||||
bitstream_remove_bits(is, num_bits);
|
||||
return bits;
|
||||
}
|
||||
|
||||
/* Read and return the next @num_bits bits from the bitstream. */
|
||||
static forceinline u32 bitstream_read_bits(struct input_bitstream *is,
|
||||
unsigned num_bits)
|
||||
{
|
||||
bitstream_ensure_bits(is, num_bits);
|
||||
return bitstream_pop_bits(is, num_bits);
|
||||
}
|
||||
|
||||
/* Read and return the next literal byte embedded in the bitstream. */
|
||||
static forceinline u8 bitstream_read_byte(struct input_bitstream *is)
|
||||
{
|
||||
if (is->end == is->next)
|
||||
return 0;
|
||||
return *is->next++;
|
||||
}
|
||||
|
||||
/* Read and return the next 16-bit integer embedded in the bitstream. */
|
||||
static forceinline u16 bitstream_read_u16(struct input_bitstream *is)
|
||||
{
|
||||
u16 v;
|
||||
|
||||
if (is->end - is->next < 2)
|
||||
return 0;
|
||||
v = get_unaligned_le16(is->next);
|
||||
is->next += 2;
|
||||
return v;
|
||||
}
|
||||
|
||||
/* Read and return the next 32-bit integer embedded in the bitstream. */
|
||||
static forceinline u32 bitstream_read_u32(struct input_bitstream *is)
|
||||
{
|
||||
u32 v;
|
||||
|
||||
if (is->end - is->next < 4)
|
||||
return 0;
|
||||
v = get_unaligned_le32(is->next);
|
||||
is->next += 4;
|
||||
return v;
|
||||
}
|
||||
|
||||
/* Read into @dst_buffer an array of literal bytes embedded in the bitstream.
|
||||
* Return either a pointer to the byte past the last written, or NULL if the
|
||||
* read overflows the input buffer. */
|
||||
static forceinline void *bitstream_read_bytes(struct input_bitstream *is,
|
||||
void *dst_buffer, size_t count)
|
||||
{
|
||||
if ((size_t)(is->end - is->next) < count)
|
||||
return NULL;
|
||||
memcpy(dst_buffer, is->next, count);
|
||||
is->next += count;
|
||||
return (u8 *)dst_buffer + count;
|
||||
}
|
||||
|
||||
/* Align the input bitstream on a coding-unit boundary. */
|
||||
static forceinline void bitstream_align(struct input_bitstream *is)
|
||||
{
|
||||
is->bitsleft = 0;
|
||||
is->bitbuf = 0;
|
||||
}
|
||||
|
||||
extern int make_huffman_decode_table(u16 decode_table[], const unsigned num_syms,
|
||||
const unsigned num_bits, const u8 lens[],
|
||||
const unsigned max_codeword_len,
|
||||
u16 working_space[]);
|
||||
|
||||
|
||||
/* Reads and returns the next Huffman-encoded symbol from a bitstream. If the
|
||||
* input data is exhausted, the Huffman symbol is decoded as if the missing bits
|
||||
* are all zeroes. */
|
||||
static forceinline unsigned read_huffsym(struct input_bitstream *istream,
|
||||
const u16 decode_table[],
|
||||
unsigned table_bits,
|
||||
unsigned max_codeword_len)
|
||||
{
|
||||
unsigned entry;
|
||||
unsigned key_bits;
|
||||
|
||||
bitstream_ensure_bits(istream, max_codeword_len);
|
||||
|
||||
/* Index the decode table by the next table_bits bits of the input. */
|
||||
key_bits = bitstream_peek_bits(istream, table_bits);
|
||||
entry = decode_table[key_bits];
|
||||
if (entry < 0xC000) {
|
||||
/* Fast case: The decode table directly provided the
|
||||
* symbol and codeword length. The low 11 bits are the
|
||||
* symbol, and the high 5 bits are the codeword length. */
|
||||
bitstream_remove_bits(istream, entry >> 11);
|
||||
return entry & 0x7FF;
|
||||
} else {
|
||||
/* Slow case: The codeword for the symbol is longer than
|
||||
* table_bits, so the symbol does not have an entry
|
||||
* directly in the first (1 << table_bits) entries of the
|
||||
* decode table. Traverse the appropriate binary tree
|
||||
* bit-by-bit to decode the symbol. */
|
||||
bitstream_remove_bits(istream, table_bits);
|
||||
do {
|
||||
key_bits = (entry & 0x3FFF) + bitstream_pop_bits(istream, 1);
|
||||
} while ((entry = decode_table[key_bits]) >= 0xC000);
|
||||
return entry;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Copy an LZ77 match at (dst - offset) to dst.
|
||||
*
|
||||
* The length and offset must be already validated --- that is, (dst - offset)
|
||||
* can't underrun the output buffer, and (dst + length) can't overrun the output
|
||||
* buffer. Also, the length cannot be 0.
|
||||
*
|
||||
* @bufend points to the byte past the end of the output buffer. This function
|
||||
* won't write any data beyond this position.
|
||||
*
|
||||
* Returns dst + length.
|
||||
*/
|
||||
static forceinline u8 *lz_copy(u8 *dst, u32 length, u32 offset, const u8 *bufend,
|
||||
u32 min_length)
|
||||
{
|
||||
const u8 *src = dst - offset;
|
||||
|
||||
/*
|
||||
* Try to copy one machine word at a time. On i386 and x86_64 this is
|
||||
* faster than copying one byte at a time, unless the data is
|
||||
* near-random and all the matches have very short lengths. Note that
|
||||
* since this requires unaligned memory accesses, it won't necessarily
|
||||
* be faster on every architecture.
|
||||
*
|
||||
* Also note that we might copy more than the length of the match. For
|
||||
* example, if a word is 8 bytes and the match is of length 5, then
|
||||
* we'll simply copy 8 bytes. This is okay as long as we don't write
|
||||
* beyond the end of the output buffer, hence the check for (bufend -
|
||||
* end >= WORDBYTES - 1).
|
||||
*/
|
||||
#ifdef FAST_UNALIGNED_ACCESS
|
||||
u8 * const end = dst + length;
|
||||
if (bufend - end >= (ptrdiff_t)(WORDBYTES - 1)) {
|
||||
|
||||
if (offset >= WORDBYTES) {
|
||||
/* The source and destination words don't overlap. */
|
||||
|
||||
/* To improve branch prediction, one iteration of this
|
||||
* loop is unrolled. Most matches are short and will
|
||||
* fail the first check. But if that check passes, then
|
||||
* it becomes increasing likely that the match is long
|
||||
* and we'll need to continue copying. */
|
||||
|
||||
copy_unaligned_word(src, dst);
|
||||
src += WORDBYTES;
|
||||
dst += WORDBYTES;
|
||||
|
||||
if (dst < end) {
|
||||
do {
|
||||
copy_unaligned_word(src, dst);
|
||||
src += WORDBYTES;
|
||||
dst += WORDBYTES;
|
||||
} while (dst < end);
|
||||
}
|
||||
return end;
|
||||
} else if (offset == 1) {
|
||||
|
||||
/* Offset 1 matches are equivalent to run-length
|
||||
* encoding of the previous byte. This case is common
|
||||
* if the data contains many repeated bytes. */
|
||||
|
||||
size_t v = repeat_byte(*(dst - 1));
|
||||
do {
|
||||
put_unaligned_word(v, dst);
|
||||
src += WORDBYTES;
|
||||
dst += WORDBYTES;
|
||||
} while (dst < end);
|
||||
return end;
|
||||
}
|
||||
/*
|
||||
* We don't bother with special cases for other 'offset <
|
||||
* WORDBYTES', which are usually rarer than 'offset == 1'. Extra
|
||||
* checks will just slow things down. Actually, it's possible
|
||||
* to handle all the 'offset < WORDBYTES' cases using the same
|
||||
* code, but it still becomes more complicated doesn't seem any
|
||||
* faster overall; it definitely slows down the more common
|
||||
* 'offset == 1' case.
|
||||
*/
|
||||
}
|
||||
#endif /* FAST_UNALIGNED_ACCESS */
|
||||
|
||||
/* Fall back to a bytewise copy. */
|
||||
|
||||
if (min_length >= 2) {
|
||||
*dst++ = *src++;
|
||||
length--;
|
||||
}
|
||||
if (min_length >= 3) {
|
||||
*dst++ = *src++;
|
||||
length--;
|
||||
}
|
||||
do {
|
||||
*dst++ = *src++;
|
||||
} while (--length);
|
||||
|
||||
return dst;
|
||||
}
|
|
@ -0,0 +1,678 @@
|
|||
/*
|
||||
* lzx_decompress.c - A decompressor for the LZX compression format, which can
|
||||
* be used in "System Compressed" files. This is based on the code from wimlib.
|
||||
* This code only supports a window size (dictionary size) of 32768 bytes, since
|
||||
* this is the only size used in System Compression.
|
||||
*
|
||||
* Copyright (C) 2015 Eric Biggers
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify it under
|
||||
* the terms of the GNU General Public License as published by the Free Software
|
||||
* Foundation, either version 2 of the License, or (at your option) any later
|
||||
* version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along with
|
||||
* this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include <errno.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <ntfs-3g/misc.h>
|
||||
|
||||
#include "decompress_common.h"
|
||||
#include "system_compression.h"
|
||||
|
||||
/* Number of literal byte values */
|
||||
#define LZX_NUM_CHARS 256
|
||||
|
||||
/* The smallest and largest allowed match lengths */
|
||||
#define LZX_MIN_MATCH_LEN 2
|
||||
#define LZX_MAX_MATCH_LEN 257
|
||||
|
||||
/* Number of distinct match lengths that can be represented */
|
||||
#define LZX_NUM_LENS (LZX_MAX_MATCH_LEN - LZX_MIN_MATCH_LEN + 1)
|
||||
|
||||
/* Number of match lengths for which no length symbol is required */
|
||||
#define LZX_NUM_PRIMARY_LENS 7
|
||||
#define LZX_NUM_LEN_HEADERS (LZX_NUM_PRIMARY_LENS + 1)
|
||||
|
||||
/* Valid values of the 3-bit block type field */
|
||||
#define LZX_BLOCKTYPE_VERBATIM 1
|
||||
#define LZX_BLOCKTYPE_ALIGNED 2
|
||||
#define LZX_BLOCKTYPE_UNCOMPRESSED 3
|
||||
|
||||
/* Number of offset slots for a window size of 32768 */
|
||||
#define LZX_NUM_OFFSET_SLOTS 30
|
||||
|
||||
/* Number of symbols in the main code for a window size of 32768 */
|
||||
#define LZX_MAINCODE_NUM_SYMBOLS \
|
||||
(LZX_NUM_CHARS + (LZX_NUM_OFFSET_SLOTS * LZX_NUM_LEN_HEADERS))
|
||||
|
||||
/* Number of symbols in the length code */
|
||||
#define LZX_LENCODE_NUM_SYMBOLS (LZX_NUM_LENS - LZX_NUM_PRIMARY_LENS)
|
||||
|
||||
/* Number of symbols in the precode */
|
||||
#define LZX_PRECODE_NUM_SYMBOLS 20
|
||||
|
||||
/* Number of bits in which each precode codeword length is represented */
|
||||
#define LZX_PRECODE_ELEMENT_SIZE 4
|
||||
|
||||
/* Number of low-order bits of each match offset that are entropy-encoded in
|
||||
* aligned offset blocks */
|
||||
#define LZX_NUM_ALIGNED_OFFSET_BITS 3
|
||||
|
||||
/* Number of symbols in the aligned offset code */
|
||||
#define LZX_ALIGNEDCODE_NUM_SYMBOLS (1 << LZX_NUM_ALIGNED_OFFSET_BITS)
|
||||
|
||||
/* Mask for the match offset bits that are entropy-encoded in aligned offset
|
||||
* blocks */
|
||||
#define LZX_ALIGNED_OFFSET_BITMASK ((1 << LZX_NUM_ALIGNED_OFFSET_BITS) - 1)
|
||||
|
||||
/* Number of bits in which each aligned offset codeword length is represented */
|
||||
#define LZX_ALIGNEDCODE_ELEMENT_SIZE 3
|
||||
|
||||
/* Maximum lengths (in bits) of the codewords in each Huffman code */
|
||||
#define LZX_MAX_MAIN_CODEWORD_LEN 16
|
||||
#define LZX_MAX_LEN_CODEWORD_LEN 16
|
||||
#define LZX_MAX_PRE_CODEWORD_LEN ((1 << LZX_PRECODE_ELEMENT_SIZE) - 1)
|
||||
#define LZX_MAX_ALIGNED_CODEWORD_LEN ((1 << LZX_ALIGNEDCODE_ELEMENT_SIZE) - 1)
|
||||
|
||||
/* The default "filesize" value used in pre/post-processing. In the LZX format
|
||||
* used in cabinet files this value must be given to the decompressor, whereas
|
||||
* in the LZX format used in WIM files and system-compressed files this value is
|
||||
* fixed at 12000000. */
|
||||
#define LZX_DEFAULT_FILESIZE 12000000
|
||||
|
||||
/* Assumed block size when the encoded block size begins with a 0 bit. */
|
||||
#define LZX_DEFAULT_BLOCK_SIZE 32768
|
||||
|
||||
/* Number of offsets in the recent (or "repeat") offsets queue. */
|
||||
#define LZX_NUM_RECENT_OFFSETS 3
|
||||
|
||||
/* These values are chosen for fast decompression. */
|
||||
#define LZX_MAINCODE_TABLEBITS 11
|
||||
#define LZX_LENCODE_TABLEBITS 10
|
||||
#define LZX_PRECODE_TABLEBITS 6
|
||||
#define LZX_ALIGNEDCODE_TABLEBITS 7
|
||||
|
||||
#define LZX_READ_LENS_MAX_OVERRUN 50
|
||||
|
||||
/* Mapping: offset slot => first match offset that uses that offset slot.
|
||||
*/
|
||||
static const u32 lzx_offset_slot_base[LZX_NUM_OFFSET_SLOTS + 1] = {
|
||||
0 , 1 , 2 , 3 , 4 , /* 0 --- 4 */
|
||||
6 , 8 , 12 , 16 , 24 , /* 5 --- 9 */
|
||||
32 , 48 , 64 , 96 , 128 , /* 10 --- 14 */
|
||||
192 , 256 , 384 , 512 , 768 , /* 15 --- 19 */
|
||||
1024 , 1536 , 2048 , 3072 , 4096 , /* 20 --- 24 */
|
||||
6144 , 8192 , 12288 , 16384 , 24576 , /* 25 --- 29 */
|
||||
32768 , /* extra */
|
||||
};
|
||||
|
||||
/* Mapping: offset slot => how many extra bits must be read and added to the
|
||||
* corresponding offset slot base to decode the match offset. */
|
||||
static const u8 lzx_extra_offset_bits[LZX_NUM_OFFSET_SLOTS] = {
|
||||
0 , 0 , 0 , 0 , 1 ,
|
||||
1 , 2 , 2 , 3 , 3 ,
|
||||
4 , 4 , 5 , 5 , 6 ,
|
||||
6 , 7 , 7 , 8 , 8 ,
|
||||
9 , 9 , 10, 10, 11,
|
||||
11, 12, 12, 13, 13,
|
||||
};
|
||||
|
||||
/* Reusable heap-allocated memory for LZX decompression */
|
||||
struct lzx_decompressor {
|
||||
|
||||
/* Huffman decoding tables, and arrays that map symbols to codeword
|
||||
* lengths */
|
||||
|
||||
u16 maincode_decode_table[(1 << LZX_MAINCODE_TABLEBITS) +
|
||||
(LZX_MAINCODE_NUM_SYMBOLS * 2)];
|
||||
u8 maincode_lens[LZX_MAINCODE_NUM_SYMBOLS + LZX_READ_LENS_MAX_OVERRUN];
|
||||
|
||||
|
||||
u16 lencode_decode_table[(1 << LZX_LENCODE_TABLEBITS) +
|
||||
(LZX_LENCODE_NUM_SYMBOLS * 2)];
|
||||
u8 lencode_lens[LZX_LENCODE_NUM_SYMBOLS + LZX_READ_LENS_MAX_OVERRUN];
|
||||
|
||||
|
||||
u16 alignedcode_decode_table[(1 << LZX_ALIGNEDCODE_TABLEBITS) +
|
||||
(LZX_ALIGNEDCODE_NUM_SYMBOLS * 2)];
|
||||
u8 alignedcode_lens[LZX_ALIGNEDCODE_NUM_SYMBOLS];
|
||||
|
||||
u16 precode_decode_table[(1 << LZX_PRECODE_TABLEBITS) +
|
||||
(LZX_PRECODE_NUM_SYMBOLS * 2)];
|
||||
u8 precode_lens[LZX_PRECODE_NUM_SYMBOLS];
|
||||
|
||||
/* Temporary space for make_huffman_decode_table() */
|
||||
u16 working_space[2 * (1 + LZX_MAX_MAIN_CODEWORD_LEN) +
|
||||
LZX_MAINCODE_NUM_SYMBOLS];
|
||||
};
|
||||
|
||||
static void undo_e8_translation(void *target, s32 input_pos)
|
||||
{
|
||||
s32 abs_offset, rel_offset;
|
||||
|
||||
abs_offset = get_unaligned_le32(target);
|
||||
if (abs_offset >= 0) {
|
||||
if (abs_offset < LZX_DEFAULT_FILESIZE) {
|
||||
/* "good translation" */
|
||||
rel_offset = abs_offset - input_pos;
|
||||
put_unaligned_le32(rel_offset, target);
|
||||
}
|
||||
} else {
|
||||
if (abs_offset >= -input_pos) {
|
||||
/* "compensating translation" */
|
||||
rel_offset = abs_offset + LZX_DEFAULT_FILESIZE;
|
||||
put_unaligned_le32(rel_offset, target);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Undo the 'E8' preprocessing used in LZX. Before compression, the
|
||||
* uncompressed data was preprocessed by changing the targets of suspected x86
|
||||
* CALL instructions from relative offsets to absolute offsets. After
|
||||
* match/literal decoding, the decompressor must undo the translation.
|
||||
*/
|
||||
static void lzx_postprocess(u8 *data, u32 size)
|
||||
{
|
||||
/*
|
||||
* A worthwhile optimization is to push the end-of-buffer check into the
|
||||
* relatively rare E8 case. This is possible if we replace the last six
|
||||
* bytes of data with E8 bytes; then we are guaranteed to hit an E8 byte
|
||||
* before reaching end-of-buffer. In addition, this scheme guarantees
|
||||
* that no translation can begin following an E8 byte in the last 10
|
||||
* bytes because a 4-byte offset containing E8 as its high byte is a
|
||||
* large negative number that is not valid for translation. That is
|
||||
* exactly what we need.
|
||||
*/
|
||||
u8 *tail;
|
||||
u8 saved_bytes[6];
|
||||
u8 *p;
|
||||
|
||||
if (size <= 10)
|
||||
return;
|
||||
|
||||
tail = &data[size - 6];
|
||||
memcpy(saved_bytes, tail, 6);
|
||||
memset(tail, 0xE8, 6);
|
||||
p = data;
|
||||
for (;;) {
|
||||
while (*p != 0xE8)
|
||||
p++;
|
||||
if (p >= tail)
|
||||
break;
|
||||
undo_e8_translation(p + 1, p - data);
|
||||
p += 5;
|
||||
}
|
||||
memcpy(tail, saved_bytes, 6);
|
||||
}
|
||||
|
||||
/* Read a Huffman-encoded symbol using the precode. */
|
||||
static forceinline unsigned read_presym(const struct lzx_decompressor *d,
|
||||
struct input_bitstream *is)
|
||||
{
|
||||
return read_huffsym(is, d->precode_decode_table,
|
||||
LZX_PRECODE_TABLEBITS, LZX_MAX_PRE_CODEWORD_LEN);
|
||||
}
|
||||
|
||||
/* Read a Huffman-encoded symbol using the main code. */
|
||||
static forceinline unsigned read_mainsym(const struct lzx_decompressor *d,
|
||||
struct input_bitstream *is)
|
||||
{
|
||||
return read_huffsym(is, d->maincode_decode_table,
|
||||
LZX_MAINCODE_TABLEBITS, LZX_MAX_MAIN_CODEWORD_LEN);
|
||||
}
|
||||
|
||||
/* Read a Huffman-encoded symbol using the length code. */
|
||||
static forceinline unsigned read_lensym(const struct lzx_decompressor *d,
|
||||
struct input_bitstream *is)
|
||||
{
|
||||
return read_huffsym(is, d->lencode_decode_table,
|
||||
LZX_LENCODE_TABLEBITS, LZX_MAX_LEN_CODEWORD_LEN);
|
||||
}
|
||||
|
||||
/* Read a Huffman-encoded symbol using the aligned offset code. */
|
||||
static forceinline unsigned read_alignedsym(const struct lzx_decompressor *d,
|
||||
struct input_bitstream *is)
|
||||
{
|
||||
return read_huffsym(is, d->alignedcode_decode_table,
|
||||
LZX_ALIGNEDCODE_TABLEBITS,
|
||||
LZX_MAX_ALIGNED_CODEWORD_LEN);
|
||||
}
|
||||
|
||||
/*
|
||||
* Read the precode from the compressed input bitstream, then use it to decode
|
||||
* @num_lens codeword length values.
|
||||
*
|
||||
* @is: The input bitstream.
|
||||
*
|
||||
* @lens: An array that contains the length values from the previous time
|
||||
* the codeword lengths for this Huffman code were read, or all 0's
|
||||
* if this is the first time. This array must have at least
|
||||
* (@num_lens + LZX_READ_LENS_MAX_OVERRUN) entries.
|
||||
*
|
||||
* @num_lens: Number of length values to decode.
|
||||
*
|
||||
* Returns 0 on success, or -1 if the data was invalid.
|
||||
*/
|
||||
static int lzx_read_codeword_lens(struct lzx_decompressor *d,
|
||||
struct input_bitstream *is,
|
||||
u8 *lens, unsigned num_lens)
|
||||
{
|
||||
u8 *len_ptr = lens;
|
||||
u8 *lens_end = lens + num_lens;
|
||||
int i;
|
||||
|
||||
/* Read the lengths of the precode codewords. These are given
|
||||
* explicitly. */
|
||||
for (i = 0; i < LZX_PRECODE_NUM_SYMBOLS; i++) {
|
||||
d->precode_lens[i] =
|
||||
bitstream_read_bits(is, LZX_PRECODE_ELEMENT_SIZE);
|
||||
}
|
||||
|
||||
/* Make the decoding table for the precode. */
|
||||
if (make_huffman_decode_table(d->precode_decode_table,
|
||||
LZX_PRECODE_NUM_SYMBOLS,
|
||||
LZX_PRECODE_TABLEBITS,
|
||||
d->precode_lens,
|
||||
LZX_MAX_PRE_CODEWORD_LEN,
|
||||
d->working_space))
|
||||
return -1;
|
||||
|
||||
/* Decode the codeword lengths. */
|
||||
do {
|
||||
unsigned presym;
|
||||
u8 len;
|
||||
|
||||
/* Read the next precode symbol. */
|
||||
presym = read_presym(d, is);
|
||||
if (presym < 17) {
|
||||
/* Difference from old length */
|
||||
len = *len_ptr - presym;
|
||||
if ((s8)len < 0)
|
||||
len += 17;
|
||||
*len_ptr++ = len;
|
||||
} else {
|
||||
/* Special RLE values */
|
||||
|
||||
unsigned run_len;
|
||||
|
||||
if (presym == 17) {
|
||||
/* Run of 0's */
|
||||
run_len = 4 + bitstream_read_bits(is, 4);
|
||||
len = 0;
|
||||
} else if (presym == 18) {
|
||||
/* Longer run of 0's */
|
||||
run_len = 20 + bitstream_read_bits(is, 5);
|
||||
len = 0;
|
||||
} else {
|
||||
/* Run of identical lengths */
|
||||
run_len = 4 + bitstream_read_bits(is, 1);
|
||||
presym = read_presym(d, is);
|
||||
if (presym > 17)
|
||||
return -1;
|
||||
len = *len_ptr - presym;
|
||||
if ((s8)len < 0)
|
||||
len += 17;
|
||||
}
|
||||
|
||||
do {
|
||||
*len_ptr++ = len;
|
||||
} while (--run_len);
|
||||
/* Worst case overrun is when presym == 18,
|
||||
* run_len == 20 + 31, and only 1 length was remaining.
|
||||
* So LZX_READ_LENS_MAX_OVERRUN == 50.
|
||||
*
|
||||
* Overrun while reading the first half of maincode_lens
|
||||
* can corrupt the previous values in the second half.
|
||||
* This doesn't really matter because the resulting
|
||||
* lengths will still be in range, and data that
|
||||
* generates overruns is invalid anyway. */
|
||||
}
|
||||
} while (len_ptr < lens_end);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Read the header of an LZX block and save the block type and (uncompressed)
|
||||
* size in *block_type_ret and *block_size_ret, respectively.
|
||||
*
|
||||
* If the block is compressed, also update the Huffman decode @tables with the
|
||||
* new Huffman codes. If the block is uncompressed, also update the match
|
||||
* offset @queue with the new match offsets.
|
||||
*
|
||||
* Return 0 on success, or -1 if the data was invalid.
|
||||
*/
|
||||
static int lzx_read_block_header(struct lzx_decompressor *d,
|
||||
struct input_bitstream *is,
|
||||
int *block_type_ret,
|
||||
u32 *block_size_ret,
|
||||
u32 recent_offsets[])
|
||||
{
|
||||
int block_type;
|
||||
u32 block_size;
|
||||
int i;
|
||||
|
||||
bitstream_ensure_bits(is, 4);
|
||||
|
||||
/* The first three bits tell us what kind of block it is, and should be
|
||||
* one of the LZX_BLOCKTYPE_* values. */
|
||||
block_type = bitstream_pop_bits(is, 3);
|
||||
|
||||
/* Read the block size. */
|
||||
if (bitstream_pop_bits(is, 1)) {
|
||||
block_size = LZX_DEFAULT_BLOCK_SIZE;
|
||||
} else {
|
||||
block_size = 0;
|
||||
block_size |= bitstream_read_bits(is, 8);
|
||||
block_size <<= 8;
|
||||
block_size |= bitstream_read_bits(is, 8);
|
||||
}
|
||||
|
||||
switch (block_type) {
|
||||
|
||||
case LZX_BLOCKTYPE_ALIGNED:
|
||||
|
||||
/* Read the aligned offset code and prepare its decode table.
|
||||
*/
|
||||
|
||||
for (i = 0; i < LZX_ALIGNEDCODE_NUM_SYMBOLS; i++) {
|
||||
d->alignedcode_lens[i] =
|
||||
bitstream_read_bits(is,
|
||||
LZX_ALIGNEDCODE_ELEMENT_SIZE);
|
||||
}
|
||||
|
||||
if (make_huffman_decode_table(d->alignedcode_decode_table,
|
||||
LZX_ALIGNEDCODE_NUM_SYMBOLS,
|
||||
LZX_ALIGNEDCODE_TABLEBITS,
|
||||
d->alignedcode_lens,
|
||||
LZX_MAX_ALIGNED_CODEWORD_LEN,
|
||||
d->working_space))
|
||||
return -1;
|
||||
|
||||
/* Fall though, since the rest of the header for aligned offset
|
||||
* blocks is the same as that for verbatim blocks. */
|
||||
|
||||
case LZX_BLOCKTYPE_VERBATIM:
|
||||
|
||||
/* Read the main code and prepare its decode table.
|
||||
*
|
||||
* Note that the codeword lengths in the main code are encoded
|
||||
* in two parts: one part for literal symbols, and one part for
|
||||
* match symbols. */
|
||||
|
||||
if (lzx_read_codeword_lens(d, is, d->maincode_lens,
|
||||
LZX_NUM_CHARS))
|
||||
return -1;
|
||||
|
||||
if (lzx_read_codeword_lens(d, is,
|
||||
d->maincode_lens + LZX_NUM_CHARS,
|
||||
LZX_MAINCODE_NUM_SYMBOLS - LZX_NUM_CHARS))
|
||||
return -1;
|
||||
|
||||
if (make_huffman_decode_table(d->maincode_decode_table,
|
||||
LZX_MAINCODE_NUM_SYMBOLS,
|
||||
LZX_MAINCODE_TABLEBITS,
|
||||
d->maincode_lens,
|
||||
LZX_MAX_MAIN_CODEWORD_LEN,
|
||||
d->working_space))
|
||||
return -1;
|
||||
|
||||
/* Read the length code and prepare its decode table. */
|
||||
|
||||
if (lzx_read_codeword_lens(d, is, d->lencode_lens,
|
||||
LZX_LENCODE_NUM_SYMBOLS))
|
||||
return -1;
|
||||
|
||||
if (make_huffman_decode_table(d->lencode_decode_table,
|
||||
LZX_LENCODE_NUM_SYMBOLS,
|
||||
LZX_LENCODE_TABLEBITS,
|
||||
d->lencode_lens,
|
||||
LZX_MAX_LEN_CODEWORD_LEN,
|
||||
d->working_space))
|
||||
return -1;
|
||||
|
||||
break;
|
||||
|
||||
case LZX_BLOCKTYPE_UNCOMPRESSED:
|
||||
|
||||
/* Before reading the three recent offsets from the uncompressed
|
||||
* block header, the stream must be aligned on a 16-bit
|
||||
* boundary. But if the stream is *already* aligned, then the
|
||||
* next 16 bits must be discarded. */
|
||||
bitstream_ensure_bits(is, 1);
|
||||
bitstream_align(is);
|
||||
|
||||
recent_offsets[0] = bitstream_read_u32(is);
|
||||
recent_offsets[1] = bitstream_read_u32(is);
|
||||
recent_offsets[2] = bitstream_read_u32(is);
|
||||
|
||||
/* Offsets of 0 are invalid. */
|
||||
if (recent_offsets[0] == 0 || recent_offsets[1] == 0 ||
|
||||
recent_offsets[2] == 0)
|
||||
return -1;
|
||||
break;
|
||||
|
||||
default:
|
||||
/* Unrecognized block type. */
|
||||
return -1;
|
||||
}
|
||||
|
||||
*block_type_ret = block_type;
|
||||
*block_size_ret = block_size;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Decompress a block of LZX-compressed data. */
|
||||
static int lzx_decompress_block(const struct lzx_decompressor *d,
|
||||
struct input_bitstream *is,
|
||||
int block_type, u32 block_size,
|
||||
u8 * const out_begin, u8 *out_next,
|
||||
u32 recent_offsets[])
|
||||
{
|
||||
u8 * const block_end = out_next + block_size;
|
||||
unsigned ones_if_aligned = 0U - (block_type == LZX_BLOCKTYPE_ALIGNED);
|
||||
|
||||
do {
|
||||
unsigned mainsym;
|
||||
unsigned match_len;
|
||||
u32 match_offset;
|
||||
unsigned offset_slot;
|
||||
unsigned num_extra_bits;
|
||||
|
||||
mainsym = read_mainsym(d, is);
|
||||
if (mainsym < LZX_NUM_CHARS) {
|
||||
/* Literal */
|
||||
*out_next++ = mainsym;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Match */
|
||||
|
||||
/* Decode the length header and offset slot. */
|
||||
mainsym -= LZX_NUM_CHARS;
|
||||
match_len = mainsym % LZX_NUM_LEN_HEADERS;
|
||||
offset_slot = mainsym / LZX_NUM_LEN_HEADERS;
|
||||
|
||||
/* If needed, read a length symbol to decode the full length. */
|
||||
if (match_len == LZX_NUM_PRIMARY_LENS)
|
||||
match_len += read_lensym(d, is);
|
||||
match_len += LZX_MIN_MATCH_LEN;
|
||||
|
||||
if (offset_slot < LZX_NUM_RECENT_OFFSETS) {
|
||||
/* Repeat offset */
|
||||
|
||||
/* Note: This isn't a real LRU queue, since using the R2
|
||||
* offset doesn't bump the R1 offset down to R2. This
|
||||
* quirk allows all 3 recent offsets to be handled by
|
||||
* the same code. (For R0, the swap is a no-op.) */
|
||||
match_offset = recent_offsets[offset_slot];
|
||||
recent_offsets[offset_slot] = recent_offsets[0];
|
||||
recent_offsets[0] = match_offset;
|
||||
} else {
|
||||
/* Explicit offset */
|
||||
|
||||
/* Look up the number of extra bits that need to be read
|
||||
* to decode offsets with this offset slot. */
|
||||
num_extra_bits = lzx_extra_offset_bits[offset_slot];
|
||||
|
||||
/* Start with the offset slot base value. */
|
||||
match_offset = lzx_offset_slot_base[offset_slot];
|
||||
|
||||
/* In aligned offset blocks, the low-order 3 bits of
|
||||
* each offset are encoded using the aligned offset
|
||||
* code. Otherwise, all the extra bits are literal. */
|
||||
|
||||
if ((num_extra_bits & ones_if_aligned) >= LZX_NUM_ALIGNED_OFFSET_BITS) {
|
||||
match_offset +=
|
||||
bitstream_read_bits(is, num_extra_bits -
|
||||
LZX_NUM_ALIGNED_OFFSET_BITS)
|
||||
<< LZX_NUM_ALIGNED_OFFSET_BITS;
|
||||
match_offset += read_alignedsym(d, is);
|
||||
} else {
|
||||
match_offset += bitstream_read_bits(is, num_extra_bits);
|
||||
}
|
||||
|
||||
/* Adjust the offset. */
|
||||
match_offset -= (LZX_NUM_RECENT_OFFSETS - 1);
|
||||
|
||||
/* Update the recent offsets. */
|
||||
recent_offsets[2] = recent_offsets[1];
|
||||
recent_offsets[1] = recent_offsets[0];
|
||||
recent_offsets[0] = match_offset;
|
||||
}
|
||||
|
||||
/* Validate the match, then copy it to the current position. */
|
||||
|
||||
if (match_len > (size_t)(block_end - out_next))
|
||||
return -1;
|
||||
|
||||
if (match_offset > (size_t)(out_next - out_begin))
|
||||
return -1;
|
||||
|
||||
out_next = lz_copy(out_next, match_len, match_offset,
|
||||
block_end, LZX_MIN_MATCH_LEN);
|
||||
|
||||
} while (out_next != block_end);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* lzx_allocate_decompressor - Allocate an LZX decompressor
|
||||
*
|
||||
* Return the pointer to the decompressor on success, or return NULL and set
|
||||
* errno on failure.
|
||||
*/
|
||||
struct lzx_decompressor *lzx_allocate_decompressor(void)
|
||||
{
|
||||
return ntfs_malloc(sizeof(struct lzx_decompressor));
|
||||
}
|
||||
|
||||
/*
|
||||
* lzx_decompress - Decompress a buffer of LZX-compressed data
|
||||
*
|
||||
* @decompressor: A decompressor allocated with lzx_allocate_decompressor()
|
||||
* @compressed_data: The buffer of data to decompress
|
||||
* @compressed_size: Number of bytes of compressed data
|
||||
* @uncompressed_data: The buffer in which to store the decompressed data
|
||||
* @uncompressed_size: The number of bytes the data decompresses into
|
||||
*
|
||||
* Return 0 on success, or return -1 and set errno on failure.
|
||||
*/
|
||||
int lzx_decompress(struct lzx_decompressor *decompressor,
|
||||
const void *compressed_data, size_t compressed_size,
|
||||
void *uncompressed_data, size_t uncompressed_size)
|
||||
{
|
||||
struct lzx_decompressor *d = decompressor;
|
||||
u8 * const out_begin = uncompressed_data;
|
||||
u8 *out_next = out_begin;
|
||||
u8 * const out_end = out_begin + uncompressed_size;
|
||||
struct input_bitstream is;
|
||||
u32 recent_offsets[LZX_NUM_RECENT_OFFSETS] = {1, 1, 1};
|
||||
int e8_status = 0;
|
||||
|
||||
init_input_bitstream(&is, compressed_data, compressed_size);
|
||||
|
||||
/* Codeword lengths begin as all 0's for delta encoding purposes. */
|
||||
memset(d->maincode_lens, 0, LZX_MAINCODE_NUM_SYMBOLS);
|
||||
memset(d->lencode_lens, 0, LZX_LENCODE_NUM_SYMBOLS);
|
||||
|
||||
/* Decompress blocks until we have all the uncompressed data. */
|
||||
|
||||
while (out_next != out_end) {
|
||||
int block_type;
|
||||
u32 block_size;
|
||||
|
||||
if (lzx_read_block_header(d, &is, &block_type, &block_size,
|
||||
recent_offsets))
|
||||
goto invalid;
|
||||
|
||||
if (block_size < 1 || block_size > (size_t)(out_end - out_next))
|
||||
goto invalid;
|
||||
|
||||
if (block_type != LZX_BLOCKTYPE_UNCOMPRESSED) {
|
||||
|
||||
/* Compressed block */
|
||||
|
||||
if (lzx_decompress_block(d,
|
||||
&is,
|
||||
block_type,
|
||||
block_size,
|
||||
out_begin,
|
||||
out_next,
|
||||
recent_offsets))
|
||||
goto invalid;
|
||||
|
||||
e8_status |= d->maincode_lens[0xe8];
|
||||
out_next += block_size;
|
||||
} else {
|
||||
/* Uncompressed block */
|
||||
|
||||
out_next = bitstream_read_bytes(&is, out_next,
|
||||
block_size);
|
||||
if (!out_next)
|
||||
goto invalid;
|
||||
|
||||
if (block_size & 1)
|
||||
bitstream_read_byte(&is);
|
||||
|
||||
e8_status = 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* Postprocess the data unless it cannot possibly contain 0xe8 bytes. */
|
||||
if (e8_status)
|
||||
lzx_postprocess(uncompressed_data, uncompressed_size);
|
||||
|
||||
return 0;
|
||||
|
||||
invalid:
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
* lzx_free_decompressor - Free an LZX decompressor
|
||||
*
|
||||
* @decompressor: A decompressor that was allocated with
|
||||
* lzx_allocate_decompressor(), or NULL.
|
||||
*/
|
||||
void lzx_free_decompressor(struct lzx_decompressor *decompressor)
|
||||
{
|
||||
free(decompressor);
|
||||
}
|
|
@ -0,0 +1,102 @@
|
|||
/*
|
||||
* plugin.c - NTFS-3G system compression plugin
|
||||
*
|
||||
* Copyright (C) 2015 Jean-Pierre Andre
|
||||
* Copyright (C) 2015-2016 Eric Biggers
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify it under
|
||||
* the terms of the GNU General Public License as published by the Free Software
|
||||
* Foundation, either version 2 of the License, or (at your option) any later
|
||||
* version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along with
|
||||
* this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#include "config.h"
|
||||
|
||||
#include <fuse.h>
|
||||
|
||||
#ifdef HAVE_ERRNO_H
|
||||
#include <errno.h>
|
||||
#endif
|
||||
|
||||
#include <ntfs-3g/inode.h>
|
||||
#include <ntfs-3g/plugin.h>
|
||||
|
||||
#include "system_compression.h"
|
||||
|
||||
static int compressed_getattr(ntfs_inode *ni, const REPARSE_POINT *reparse,
|
||||
struct stat *stbuf)
|
||||
{
|
||||
s64 compressed_size = ntfs_get_system_compressed_file_size(ni, reparse);
|
||||
|
||||
if (compressed_size >= 0) {
|
||||
/* System-compressed file */
|
||||
stbuf->st_size = ni->data_size;
|
||||
stbuf->st_blocks = (compressed_size + 511) >> 9;
|
||||
stbuf->st_mode = S_IFREG | 0555;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Not a system compressed file, or another error occurred */
|
||||
return -errno;
|
||||
}
|
||||
|
||||
static int compressed_open(ntfs_inode *ni __attribute__((unused)),
|
||||
const REPARSE_POINT *reparse __attribute__((unused)),
|
||||
struct fuse_file_info *fi)
|
||||
{
|
||||
if ((fi->flags & O_ACCMODE) != O_RDONLY)
|
||||
return -EOPNOTSUPP;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int compressed_release(ntfs_inode *ni __attribute__((unused)),
|
||||
const REPARSE_POINT *reparse __attribute__((unused)),
|
||||
struct fuse_file_info *fi __attribute__((unused)))
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int compressed_read(ntfs_inode *ni, const REPARSE_POINT *reparse,
|
||||
char *buf, size_t size, off_t offset,
|
||||
struct fuse_file_info *fi __attribute__((unused)))
|
||||
{
|
||||
struct ntfs_system_decompression_ctx *dctx;
|
||||
ssize_t res;
|
||||
|
||||
/* TODO: there needs to be more investigation into reusing decompression
|
||||
* contexts for multiple reads. */
|
||||
|
||||
dctx = ntfs_open_system_decompression_ctx(ni, reparse);
|
||||
if (!dctx)
|
||||
return -errno;
|
||||
|
||||
res = ntfs_read_system_compressed_data(dctx, offset, size, buf);
|
||||
|
||||
ntfs_close_system_decompression_ctx(dctx);
|
||||
|
||||
if (res < 0)
|
||||
return -errno;
|
||||
return res;
|
||||
}
|
||||
|
||||
static const struct plugin_operations ops = {
|
||||
.getattr = compressed_getattr,
|
||||
.open = compressed_open,
|
||||
.release = compressed_release,
|
||||
.read = compressed_read,
|
||||
};
|
||||
|
||||
const struct plugin_operations *init(le32 tag)
|
||||
{
|
||||
if (tag == IO_REPARSE_TAG_WOF)
|
||||
return &ops;
|
||||
errno = EINVAL;
|
||||
return NULL;
|
||||
}
|
|
@ -0,0 +1,687 @@
|
|||
/*
|
||||
* system_compression.c - Support for reading System Compressed files
|
||||
*
|
||||
* Copyright (C) 2015-2016 Eric Biggers
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify it under
|
||||
* the terms of the GNU General Public License as published by the Free Software
|
||||
* Foundation, either version 2 of the License, or (at your option) any later
|
||||
* version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along with
|
||||
* this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Windows 10 introduced a new filesystem compression feature: System
|
||||
* Compression, also called "Compact OS". The feature allows rarely modified
|
||||
* files to be compressed more heavily than is possible with regular NTFS
|
||||
* compression (which uses the LZNT1 algorithm with 4096-byte chunks).
|
||||
* System-compressed files can only be read, not written; on Windows, if a
|
||||
* program attempts to write to such a file, it is automatically decompressed
|
||||
* and turned into an ordinary uncompressed file.
|
||||
*
|
||||
* Rather than building it directly into NTFS, Microsoft implemented this new
|
||||
* compression mode using the Windows Overlay Filesystem (WOF) filter driver
|
||||
* that was added in Windows 8.1. A system-compressed file contains the
|
||||
* following NTFS attributes:
|
||||
*
|
||||
* - A reparse point attribute in the format WOF_FILE_PROVIDER_REPARSE_POINT_V1,
|
||||
* documented below
|
||||
* - A sparse unnamed data attribute, containing all zero bytes, with data size
|
||||
* equal to the uncompressed file size
|
||||
* - A data attribute named "WofCompressedData" containing the compressed data
|
||||
* of the file.
|
||||
*
|
||||
* The compressed data contains a series of chunks, each of which decompresses
|
||||
* to a known size determined by the compression format specified in the reparse
|
||||
* point. The last chunk can be an exception, since it decompresses to whatever
|
||||
* size remains. Chunks that did not compress to less than their original size
|
||||
* are stored uncompressed. The compressed chunks are concatenated in order and
|
||||
* are prefixed by a table of 4-byte (for files < 4 GiB in size uncompressed) or
|
||||
* 8-byte (for files >= 4 GiB in size uncompressed) little endian numbers which
|
||||
* give the offset of each compressed chunk from the end of the table. Since
|
||||
* every chunk can be decompressed independently and its location can be
|
||||
* discovered from the chunk offset table, "random access" reads are possible
|
||||
* with chunk granularity. Writes are not possible, in general, without
|
||||
* rewriting the entire file.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include <errno.h>
|
||||
#include <limits.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <ntfs-3g/attrib.h>
|
||||
#include <ntfs-3g/layout.h>
|
||||
#include <ntfs-3g/misc.h>
|
||||
|
||||
#include "system_compression.h"
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
/* Known values of the WOF protocol / reparse point format */
|
||||
typedef enum {
|
||||
WOF_CURRENT_VERSION = const_cpu_to_le32(1),
|
||||
} WOF_VERSION;
|
||||
|
||||
/* Known WOF providers */
|
||||
typedef enum {
|
||||
/* WIM backing provider ("WIMBoot") */
|
||||
WOF_PROVIDER_WIM = const_cpu_to_le32(1),
|
||||
|
||||
/* System compressed file provider */
|
||||
WOF_PROVIDER_FILE = const_cpu_to_le32(2),
|
||||
} WOF_PROVIDER;
|
||||
|
||||
/* Known versions of the compressed file provider */
|
||||
typedef enum {
|
||||
WOF_FILE_PROVIDER_CURRENT_VERSION = const_cpu_to_le32(1),
|
||||
} WOF_FILE_PROVIDER_VERSION;
|
||||
|
||||
/* Information needed to specify a WOF provider */
|
||||
typedef struct {
|
||||
le32 version;
|
||||
le32 provider;
|
||||
} WOF_EXTERNAL_INFO;
|
||||
|
||||
/* Metadata for the compressed file provider --- indicates how the file
|
||||
* is compressed */
|
||||
typedef struct {
|
||||
le32 version;
|
||||
le32 compression_format;
|
||||
} WOF_FILE_PROVIDER_EXTERNAL_INFO_V1;
|
||||
|
||||
/* Format of the reparse point attribute of system compressed files */
|
||||
typedef struct {
|
||||
/* The reparse point header. This indicates that the reparse point is
|
||||
* supposed to be interpreted by the WOF filter driver. */
|
||||
REPARSE_POINT reparse;
|
||||
|
||||
/* The WOF provider specification. This indicates the "provider" that
|
||||
* the WOF filter driver is supposed to hand control to. */
|
||||
WOF_EXTERNAL_INFO wof;
|
||||
|
||||
/* The metadata specific to the compressed file "provider" */
|
||||
WOF_FILE_PROVIDER_EXTERNAL_INFO_V1 file;
|
||||
|
||||
} WOF_FILE_PROVIDER_REPARSE_POINT_V1;
|
||||
|
||||
/* The available compression formats for system compressed files */
|
||||
typedef enum {
|
||||
FORMAT_XPRESS4K = const_cpu_to_le32(0),
|
||||
FORMAT_LZX = const_cpu_to_le32(1),
|
||||
FORMAT_XPRESS8K = const_cpu_to_le32(2),
|
||||
FORMAT_XPRESS16K = const_cpu_to_le32(3),
|
||||
} WOF_FILE_PROVIDER_COMPRESSION_FORMAT;
|
||||
|
||||
/* "WofCompressedData": the name of the named data stream which contains the
|
||||
* compressed data of a system compressed file */
|
||||
static ntfschar compressed_stream_name[] = {
|
||||
const_cpu_to_le16('W'), const_cpu_to_le16('o'),
|
||||
const_cpu_to_le16('f'), const_cpu_to_le16('C'),
|
||||
const_cpu_to_le16('o'), const_cpu_to_le16('m'),
|
||||
const_cpu_to_le16('p'), const_cpu_to_le16('r'),
|
||||
const_cpu_to_le16('e'), const_cpu_to_le16('s'),
|
||||
const_cpu_to_le16('s'), const_cpu_to_le16('e'),
|
||||
const_cpu_to_le16('d'), const_cpu_to_le16('D'),
|
||||
const_cpu_to_le16('a'), const_cpu_to_le16('t'),
|
||||
const_cpu_to_le16('a'),
|
||||
};
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
/* The maximum number of chunk offsets that may be cached at any one time. This
|
||||
* is purely an implementation detail, and this number can be changed. The
|
||||
* minimum possible value is 2, and the maximum possible value is UINT32_MAX
|
||||
* divided by the maximum chunk size. */
|
||||
#define NUM_CHUNK_OFFSETS 128
|
||||
|
||||
/* A special marker value not used by any chunk index */
|
||||
#define INVALID_CHUNK_INDEX UINT64_MAX
|
||||
|
||||
/* The decompression context for a system compressed file */
|
||||
struct ntfs_system_decompression_ctx {
|
||||
|
||||
/* The open compressed stream ("WofCompressedData") */
|
||||
ntfs_attr *na;
|
||||
|
||||
/* The compression format of the file */
|
||||
WOF_FILE_PROVIDER_COMPRESSION_FORMAT format;
|
||||
|
||||
/* The decompressor for the file */
|
||||
void *decompressor;
|
||||
|
||||
/* The uncompressed size of the file in bytes */
|
||||
u64 uncompressed_size;
|
||||
|
||||
/* The compressed size of the file in bytes */
|
||||
u64 compressed_size;
|
||||
|
||||
/* The number of chunks into which the file is divided */
|
||||
u64 num_chunks;
|
||||
|
||||
/* The base 2 logarithm of chunk_size */
|
||||
u32 chunk_order;
|
||||
|
||||
/* The uncompressed chunk size in bytes. All chunks have this
|
||||
* uncompressed size except possibly the last. */
|
||||
u32 chunk_size;
|
||||
|
||||
/*
|
||||
* The chunk offsets cache. If 'base_chunk_idx == INVALID_CHUNK_INDEX',
|
||||
* then the cache is empty. Otherwise, 'base_chunk_idx' is the 0-based
|
||||
* index of the chunk that has its offset cached in 'chunk_offsets[0]'.
|
||||
* The offsets of the subsequent chunks follow until either the array is
|
||||
* full or the offset of the file's last chunk has been cached. There
|
||||
* is an extra entry at end-of-file which contains the end-of-file
|
||||
* offset. All offsets are stored relative to 'base_chunk_offset'.
|
||||
*/
|
||||
u64 base_chunk_idx;
|
||||
u64 base_chunk_offset;
|
||||
u32 chunk_offsets[NUM_CHUNK_OFFSETS];
|
||||
|
||||
/* A temporary buffer used to hold the compressed chunk currently being
|
||||
* decompressed or the chunk offset data currently being parsed. */
|
||||
void *temp_buffer;
|
||||
|
||||
/*
|
||||
* A cache for the most recently decompressed chunk. 'cached_chunk' is
|
||||
* a buffer which, if 'cached_chunk_idx != INVALID_CHUNK_INDEX',
|
||||
* contains the uncompressed data of the chunk with index
|
||||
* 'cached_chunk_idx'.
|
||||
*
|
||||
* This cache is intended to prevent adjacent reads with lengths shorter
|
||||
* than the chunk size from causing redundant chunk decompressions.
|
||||
* It's not intended to be a general purpose data cache.
|
||||
*/
|
||||
void *cached_chunk;
|
||||
u64 cached_chunk_idx;
|
||||
};
|
||||
|
||||
static int allocate_decompressor(struct ntfs_system_decompression_ctx *ctx)
|
||||
{
|
||||
if (ctx->format == FORMAT_LZX)
|
||||
ctx->decompressor = lzx_allocate_decompressor();
|
||||
else
|
||||
ctx->decompressor = xpress_allocate_decompressor();
|
||||
if (!ctx->decompressor)
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void free_decompressor(struct ntfs_system_decompression_ctx *ctx)
|
||||
{
|
||||
if (ctx->format == FORMAT_LZX)
|
||||
lzx_free_decompressor(ctx->decompressor);
|
||||
else
|
||||
xpress_free_decompressor(ctx->decompressor);
|
||||
}
|
||||
|
||||
static int decompress(struct ntfs_system_decompression_ctx *ctx,
|
||||
const void *compressed_data, size_t compressed_size,
|
||||
void *uncompressed_data, size_t uncompressed_size)
|
||||
{
|
||||
if (ctx->format == FORMAT_LZX)
|
||||
return lzx_decompress(ctx->decompressor,
|
||||
compressed_data, compressed_size,
|
||||
uncompressed_data, uncompressed_size);
|
||||
else
|
||||
return xpress_decompress(ctx->decompressor,
|
||||
compressed_data, compressed_size,
|
||||
uncompressed_data, uncompressed_size);
|
||||
}
|
||||
|
||||
static int get_compression_format(ntfs_inode *ni, const REPARSE_POINT *reparse,
|
||||
WOF_FILE_PROVIDER_COMPRESSION_FORMAT *format_ret)
|
||||
{
|
||||
WOF_FILE_PROVIDER_REPARSE_POINT_V1 *rp;
|
||||
s64 rpbuflen;
|
||||
int ret;
|
||||
|
||||
if (!ni) {
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Is this a reparse point file? */
|
||||
if (!(ni->flags & FILE_ATTR_REPARSE_POINT)) {
|
||||
errno = EOPNOTSUPP;
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Read the reparse point if not done already. */
|
||||
if (reparse) {
|
||||
rp = (WOF_FILE_PROVIDER_REPARSE_POINT_V1 *)reparse;
|
||||
rpbuflen = sizeof(REPARSE_POINT) +
|
||||
le16_to_cpu(reparse->reparse_data_length);
|
||||
} else {
|
||||
rp = ntfs_attr_readall(ni, AT_REPARSE_POINT, AT_UNNAMED, 0,
|
||||
&rpbuflen);
|
||||
if (!rp)
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Does the reparse point indicate a system compressed file? */
|
||||
if (rpbuflen >= (s64)sizeof(WOF_FILE_PROVIDER_REPARSE_POINT_V1) &&
|
||||
rp->reparse.reparse_tag == IO_REPARSE_TAG_WOF &&
|
||||
rp->wof.version == WOF_CURRENT_VERSION &&
|
||||
rp->wof.provider == WOF_PROVIDER_FILE &&
|
||||
rp->file.version == WOF_FILE_PROVIDER_CURRENT_VERSION &&
|
||||
(rp->file.compression_format == FORMAT_XPRESS4K ||
|
||||
rp->file.compression_format == FORMAT_XPRESS8K ||
|
||||
rp->file.compression_format == FORMAT_XPRESS16K ||
|
||||
rp->file.compression_format == FORMAT_LZX))
|
||||
{
|
||||
/* Yes, it's a system compressed file. Save the compression
|
||||
* format identifier. */
|
||||
*format_ret = rp->file.compression_format;
|
||||
ret = 0;
|
||||
} else {
|
||||
/* No, it's not a system compressed file. */
|
||||
errno = EOPNOTSUPP;
|
||||
ret = -1;
|
||||
}
|
||||
|
||||
if ((const REPARSE_POINT *)rp != reparse)
|
||||
free(rp);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static u32 get_chunk_order(WOF_FILE_PROVIDER_COMPRESSION_FORMAT format)
|
||||
{
|
||||
switch (format) {
|
||||
case FORMAT_XPRESS4K:
|
||||
return 12;
|
||||
case FORMAT_XPRESS8K:
|
||||
return 13;
|
||||
case FORMAT_XPRESS16K:
|
||||
return 14;
|
||||
case FORMAT_LZX:
|
||||
return 15;
|
||||
}
|
||||
/* Not reached */
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* ntfs_get_system_compressed_file_size - Return the compressed size of a system
|
||||
* compressed file
|
||||
*
|
||||
* @ni: The NTFS inode for the file
|
||||
* @reparse: (Optional) the contents of the file's reparse point attribute
|
||||
*
|
||||
* On success, return the compressed size in bytes. On failure, return -1 and
|
||||
* set errno. If the file is not a system compressed file, return -1 and set
|
||||
* errno to EOPNOTSUPP.
|
||||
*/
|
||||
s64 ntfs_get_system_compressed_file_size(ntfs_inode *ni,
|
||||
const REPARSE_POINT *reparse)
|
||||
{
|
||||
WOF_FILE_PROVIDER_COMPRESSION_FORMAT format;
|
||||
ntfs_attr_search_ctx *actx;
|
||||
s64 ret;
|
||||
|
||||
/* Verify this is a system compressed file. */
|
||||
if (get_compression_format(ni, reparse, &format))
|
||||
return -1;
|
||||
|
||||
/* Get the size of the WofCompressedData named data stream. */
|
||||
|
||||
actx = ntfs_attr_get_search_ctx(ni, NULL);
|
||||
if (!actx)
|
||||
return -1;
|
||||
|
||||
ret = ntfs_attr_lookup(AT_DATA, compressed_stream_name,
|
||||
sizeof(compressed_stream_name) /
|
||||
sizeof(compressed_stream_name[0]),
|
||||
CASE_SENSITIVE, 0, NULL, 0, actx);
|
||||
if (!ret)
|
||||
ret = ntfs_get_attribute_value_length(actx->attr);
|
||||
|
||||
ntfs_attr_put_search_ctx(actx);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* ntfs_open_system_decompression_ctx - Open a system-compressed file
|
||||
*
|
||||
* @ni: The NTFS inode for the file
|
||||
* @reparse: (Optional) the contents of the file's reparse point attribute
|
||||
*
|
||||
* On success, return a pointer to the decompression context. On failure,
|
||||
* return NULL and set errno. If the file is not a system-compressed file,
|
||||
* return NULL and set errno to EOPNOTSUPP.
|
||||
*/
|
||||
struct ntfs_system_decompression_ctx *
|
||||
ntfs_open_system_decompression_ctx(ntfs_inode *ni, const REPARSE_POINT *reparse)
|
||||
{
|
||||
WOF_FILE_PROVIDER_COMPRESSION_FORMAT format;
|
||||
struct ntfs_system_decompression_ctx *ctx;
|
||||
|
||||
/* Get the compression format. This also validates that the file really
|
||||
* is a system-compressed file. */
|
||||
if (get_compression_format(ni, reparse, &format))
|
||||
goto err;
|
||||
|
||||
/* Allocate the decompression context. */
|
||||
ctx = ntfs_malloc(sizeof(struct ntfs_system_decompression_ctx));
|
||||
if (!ctx)
|
||||
goto err;
|
||||
|
||||
/* Allocate the decompressor. */
|
||||
ctx->format = format;
|
||||
if (allocate_decompressor(ctx))
|
||||
goto err_free_ctx;
|
||||
|
||||
/* Open the WofCompressedData stream. */
|
||||
ctx->na = ntfs_attr_open(ni, AT_DATA, compressed_stream_name,
|
||||
sizeof(compressed_stream_name) /
|
||||
sizeof(compressed_stream_name[0]));
|
||||
if (!ctx->na)
|
||||
goto err_free_decompressor;
|
||||
|
||||
/* The uncompressed size of a system-compressed file is the size of its
|
||||
* unnamed data stream, which should be sparse so that it consumes no
|
||||
* disk space (though we don't rely on it being sparse). */
|
||||
ctx->uncompressed_size = ni->data_size;
|
||||
|
||||
/* Get the chunk size, which depends on the compression format. */
|
||||
ctx->chunk_order = get_chunk_order(ctx->format);
|
||||
ctx->chunk_size = (u32)1 << ctx->chunk_order;
|
||||
|
||||
/* Compute the number of chunks into which the file is divided. */
|
||||
ctx->num_chunks = (ctx->uncompressed_size +
|
||||
ctx->chunk_size - 1) >> ctx->chunk_order;
|
||||
|
||||
/* The compressed size of a system compressed file is the size of its
|
||||
* WofCompressedData stream. */
|
||||
ctx->compressed_size = ctx->na->data_size;
|
||||
|
||||
/* Initially, no chunk offsets are cached. */
|
||||
ctx->base_chunk_idx = INVALID_CHUNK_INDEX;
|
||||
|
||||
/* Allocate buffers for chunk data. */
|
||||
ctx->temp_buffer = ntfs_malloc(max(ctx->chunk_size,
|
||||
NUM_CHUNK_OFFSETS * sizeof(u64)));
|
||||
ctx->cached_chunk = ntfs_malloc(ctx->chunk_size);
|
||||
ctx->cached_chunk_idx = INVALID_CHUNK_INDEX;
|
||||
if (!ctx->temp_buffer || !ctx->cached_chunk)
|
||||
goto err_close_ctx;
|
||||
|
||||
return ctx;
|
||||
|
||||
err_close_ctx:
|
||||
free(ctx->cached_chunk);
|
||||
free(ctx->temp_buffer);
|
||||
ntfs_attr_close(ctx->na);
|
||||
err_free_decompressor:
|
||||
free_decompressor(ctx);
|
||||
err_free_ctx:
|
||||
free(ctx);
|
||||
err:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Retrieve the stored offset and size of a chunk stored in the compressed file
|
||||
* stream. */
|
||||
static int get_chunk_location(struct ntfs_system_decompression_ctx *ctx,
|
||||
u64 chunk_idx,
|
||||
u64 *offset_ret, u32 *stored_size_ret)
|
||||
{
|
||||
size_t cache_idx;
|
||||
|
||||
/* To get the stored size of the chunk, we need its offset and the next
|
||||
* chunk's offset. Use the cached values if possible; otherwise load
|
||||
* the needed offsets into the cache. To reduce the number of chunk
|
||||
* table reads that may be required later, also load some extra. */
|
||||
if (chunk_idx < ctx->base_chunk_idx ||
|
||||
chunk_idx + 1 >= ctx->base_chunk_idx + NUM_CHUNK_OFFSETS)
|
||||
{
|
||||
const u64 start_chunk = chunk_idx;
|
||||
const u64 end_chunk =
|
||||
chunk_idx + min(NUM_CHUNK_OFFSETS - 1,
|
||||
ctx->num_chunks - chunk_idx);
|
||||
const int entry_shift =
|
||||
(ctx->uncompressed_size <= UINT32_MAX) ? 2 : 3;
|
||||
le32 * const offsets32 = ctx->temp_buffer;
|
||||
le64 * const offsets64 = ctx->temp_buffer;
|
||||
u64 first_entry_to_read;
|
||||
size_t num_entries_to_read;
|
||||
size_t i, j;
|
||||
s64 res;
|
||||
|
||||
num_entries_to_read = end_chunk - start_chunk;
|
||||
|
||||
/* The first chunk has no explicit chunk table entry. */
|
||||
if (start_chunk == 0) {
|
||||
num_entries_to_read--;
|
||||
first_entry_to_read = 0;
|
||||
} else {
|
||||
first_entry_to_read = start_chunk - 1;
|
||||
}
|
||||
|
||||
if (end_chunk != ctx->num_chunks)
|
||||
num_entries_to_read++;
|
||||
|
||||
/* Read the chunk table entries into a temporary buffer. */
|
||||
res = ntfs_attr_pread(ctx->na,
|
||||
first_entry_to_read << entry_shift,
|
||||
num_entries_to_read << entry_shift,
|
||||
ctx->temp_buffer);
|
||||
|
||||
if ((u64)res != num_entries_to_read << entry_shift) {
|
||||
if (res >= 0)
|
||||
errno = EINVAL;
|
||||
ctx->base_chunk_idx = INVALID_CHUNK_INDEX;
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Prepare the cached chunk offsets. */
|
||||
|
||||
i = 0;
|
||||
if (start_chunk == 0) {
|
||||
/* Implicit first entry */
|
||||
ctx->chunk_offsets[i++] = 0;
|
||||
ctx->base_chunk_offset = 0;
|
||||
} else {
|
||||
if (entry_shift == 3) {
|
||||
ctx->base_chunk_offset =
|
||||
le64_to_cpu(offsets64[0]);
|
||||
} else {
|
||||
ctx->base_chunk_offset =
|
||||
le32_to_cpu(offsets32[0]);
|
||||
}
|
||||
}
|
||||
|
||||
if (entry_shift == 3) {
|
||||
/* 64-bit entries (huge file) */
|
||||
for (j = 0; j < num_entries_to_read; j++) {
|
||||
ctx->chunk_offsets[i++] =
|
||||
le64_to_cpu(offsets64[j]) -
|
||||
ctx->base_chunk_offset;
|
||||
}
|
||||
} else {
|
||||
/* 32-bit entries */
|
||||
for (j = 0; j < num_entries_to_read; j++) {
|
||||
ctx->chunk_offsets[i++] =
|
||||
le32_to_cpu(offsets32[j]) -
|
||||
ctx->base_chunk_offset;
|
||||
}
|
||||
}
|
||||
|
||||
/* Account for the chunk table itself. */
|
||||
ctx->base_chunk_offset += (ctx->num_chunks - 1) << entry_shift;
|
||||
|
||||
if (end_chunk == ctx->num_chunks) {
|
||||
/* Implicit last entry */
|
||||
ctx->chunk_offsets[i] = ctx->compressed_size -
|
||||
ctx->base_chunk_offset;
|
||||
}
|
||||
|
||||
ctx->base_chunk_idx = start_chunk;
|
||||
}
|
||||
|
||||
cache_idx = chunk_idx - ctx->base_chunk_idx;
|
||||
*offset_ret = ctx->base_chunk_offset + ctx->chunk_offsets[cache_idx];
|
||||
*stored_size_ret = ctx->chunk_offsets[cache_idx + 1] -
|
||||
ctx->chunk_offsets[cache_idx];
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Retrieve into @buffer the uncompressed data of chunk @chunk_idx. */
|
||||
static int read_and_decompress_chunk(struct ntfs_system_decompression_ctx *ctx,
|
||||
u64 chunk_idx, void *buffer)
|
||||
{
|
||||
u64 offset;
|
||||
u32 stored_size;
|
||||
u32 uncompressed_size;
|
||||
void *read_buffer;
|
||||
s64 res;
|
||||
|
||||
/* Get the location of the chunk data as stored in the file. */
|
||||
if (get_chunk_location(ctx, chunk_idx, &offset, &stored_size))
|
||||
return -1;
|
||||
|
||||
/* All chunks decompress to 'chunk_size' bytes except possibly the last,
|
||||
* which decompresses to whatever remains. */
|
||||
if (chunk_idx == ctx->num_chunks - 1)
|
||||
uncompressed_size = ((ctx->uncompressed_size - 1) &
|
||||
(ctx->chunk_size - 1)) + 1;
|
||||
else
|
||||
uncompressed_size = ctx->chunk_size;
|
||||
|
||||
/* Forbid strange compressed sizes. */
|
||||
if (stored_size <= 0 || stored_size > uncompressed_size) {
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Chunks that didn't compress to less than their original size are
|
||||
* stored uncompressed. */
|
||||
if (stored_size == uncompressed_size) {
|
||||
/* Chunk is stored uncompressed */
|
||||
read_buffer = buffer;
|
||||
} else {
|
||||
/* Chunk is stored compressed */
|
||||
read_buffer = ctx->temp_buffer;
|
||||
}
|
||||
|
||||
/* Read the stored chunk data. */
|
||||
res = ntfs_attr_pread(ctx->na, offset, stored_size, read_buffer);
|
||||
if (res != stored_size) {
|
||||
if (res >= 0)
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* If the chunk was stored uncompressed, then we're done. */
|
||||
if (read_buffer == buffer)
|
||||
return 0;
|
||||
|
||||
/* The chunk was stored compressed. Decompress its data. */
|
||||
return decompress(ctx, read_buffer, stored_size,
|
||||
buffer, uncompressed_size);
|
||||
}
|
||||
|
||||
/* Retrieve a pointer to the uncompressed data of the specified chunk. On
|
||||
* failure, return NULL and set errno. */
|
||||
static const void *get_chunk_data(struct ntfs_system_decompression_ctx *ctx,
|
||||
u64 chunk_idx)
|
||||
{
|
||||
if (chunk_idx != ctx->cached_chunk_idx) {
|
||||
ctx->cached_chunk_idx = INVALID_CHUNK_INDEX;
|
||||
if (read_and_decompress_chunk(ctx, chunk_idx, ctx->cached_chunk))
|
||||
return NULL;
|
||||
ctx->cached_chunk_idx = chunk_idx;
|
||||
}
|
||||
return ctx->cached_chunk;
|
||||
}
|
||||
|
||||
/*
|
||||
* ntfs_read_system_compressed_data - Read data from a system-compressed file
|
||||
*
|
||||
* @ctx: The decompression context for the file
|
||||
* @pos: The byte offset into the uncompressed data to read from
|
||||
* @count: The number of bytes of uncompressed data to read
|
||||
* @buf: The buffer into which to read the data
|
||||
*
|
||||
* On full or partial success, return the number of bytes read (0 indicates
|
||||
* end-of-file). On complete failure, return -1 and set errno.
|
||||
*/
|
||||
ssize_t ntfs_read_system_compressed_data(struct ntfs_system_decompression_ctx *ctx,
|
||||
s64 pos, size_t count, void *buf)
|
||||
{
|
||||
u64 offset;
|
||||
u8 *p;
|
||||
u8 *end_p;
|
||||
u64 chunk_idx;
|
||||
u32 offset_in_chunk;
|
||||
u32 chunk_size;
|
||||
|
||||
if (!ctx || pos < 0) {
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
offset = (u64)pos;
|
||||
if (offset >= ctx->uncompressed_size)
|
||||
return 0;
|
||||
|
||||
count = min(count, ctx->uncompressed_size - offset);
|
||||
if (!count)
|
||||
return 0;
|
||||
|
||||
p = buf;
|
||||
end_p = p + count;
|
||||
chunk_idx = offset >> ctx->chunk_order;
|
||||
offset_in_chunk = offset & (ctx->chunk_size - 1);
|
||||
chunk_size = ctx->chunk_size;
|
||||
do {
|
||||
u32 len_to_copy;
|
||||
const u8 *chunk;
|
||||
|
||||
if (chunk_idx == ctx->num_chunks - 1)
|
||||
chunk_size = ((ctx->uncompressed_size - 1) &
|
||||
(ctx->chunk_size - 1)) + 1;
|
||||
|
||||
len_to_copy = min((size_t)(end_p - p),
|
||||
chunk_size - offset_in_chunk);
|
||||
|
||||
chunk = get_chunk_data(ctx, chunk_idx);
|
||||
if (!chunk)
|
||||
break;
|
||||
|
||||
memcpy(p, &chunk[offset_in_chunk], len_to_copy);
|
||||
|
||||
p += len_to_copy;
|
||||
chunk_idx++;
|
||||
offset_in_chunk = 0;
|
||||
} while (p != end_p);
|
||||
|
||||
return (p == buf) ? -1 : p - (u8 *)buf;
|
||||
}
|
||||
|
||||
/*
|
||||
* ntfs_close_system_decompression_ctx - Close a system-compressed file
|
||||
*/
|
||||
void ntfs_close_system_decompression_ctx(struct ntfs_system_decompression_ctx *ctx)
|
||||
{
|
||||
if (ctx) {
|
||||
free(ctx->cached_chunk);
|
||||
free(ctx->temp_buffer);
|
||||
ntfs_attr_close(ctx->na);
|
||||
free_decompressor(ctx);
|
||||
free(ctx);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,71 @@
|
|||
/*
|
||||
* system_compression.h - declarations for accessing System Compressed files
|
||||
*
|
||||
* Copyright (C) 2015 Eric Biggers
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify it under
|
||||
* the terms of the GNU General Public License as published by the Free Software
|
||||
* Foundation, either version 2 of the License, or (at your option) any later
|
||||
* version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along with
|
||||
* this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef _NTFS_SYSTEM_COMPRESSION_H
|
||||
#define _NTFS_SYSTEM_COMPRESSION_H
|
||||
|
||||
#include <stddef.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#include <ntfs-3g/inode.h>
|
||||
#include <ntfs-3g/types.h>
|
||||
|
||||
/* System compressed file access */
|
||||
|
||||
struct system_decompression_ctx;
|
||||
|
||||
extern s64 ntfs_get_system_compressed_file_size(ntfs_inode *ni,
|
||||
const REPARSE_POINT *reparse);
|
||||
|
||||
extern struct ntfs_system_decompression_ctx *
|
||||
ntfs_open_system_decompression_ctx(ntfs_inode *ni,
|
||||
const REPARSE_POINT *reparse);
|
||||
|
||||
extern ssize_t
|
||||
ntfs_read_system_compressed_data(struct ntfs_system_decompression_ctx *ctx,
|
||||
s64 pos, size_t count, void *buf);
|
||||
|
||||
extern void
|
||||
ntfs_close_system_decompression_ctx(struct ntfs_system_decompression_ctx *ctx);
|
||||
|
||||
/* XPRESS decompression */
|
||||
|
||||
struct xpress_decompressor;
|
||||
|
||||
extern struct xpress_decompressor *xpress_allocate_decompressor(void);
|
||||
|
||||
extern int xpress_decompress(struct xpress_decompressor *decompressor,
|
||||
const void *compressed_data, size_t compressed_size,
|
||||
void *uncompressed_data, size_t uncompressed_size);
|
||||
|
||||
extern void xpress_free_decompressor(struct xpress_decompressor *decompressor);
|
||||
|
||||
/* LZX decompression */
|
||||
|
||||
struct lzx_decompressor;
|
||||
|
||||
extern struct lzx_decompressor *lzx_allocate_decompressor(void);
|
||||
|
||||
extern int lzx_decompress(struct lzx_decompressor *decompressor,
|
||||
const void *compressed_data, size_t compressed_size,
|
||||
void *uncompressed_data, size_t uncompressed_size);
|
||||
|
||||
extern void lzx_free_decompressor(struct lzx_decompressor *decompressor);
|
||||
|
||||
#endif /* _NTFS_SYSTEM_COMPRESSION_H */
|
|
@ -0,0 +1,164 @@
|
|||
/*
|
||||
* xpress_decompress.c - A decompressor for the XPRESS compression format
|
||||
* (Huffman variant), which can be used in "System Compressed" files. This is
|
||||
* based on the code from wimlib.
|
||||
*
|
||||
* Copyright (C) 2015 Eric Biggers
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify it under
|
||||
* the terms of the GNU General Public License as published by the Free Software
|
||||
* Foundation, either version 2 of the License, or (at your option) any later
|
||||
* version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along with
|
||||
* this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include <errno.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include <ntfs-3g/misc.h>
|
||||
|
||||
#include "decompress_common.h"
|
||||
#include "system_compression.h"
|
||||
|
||||
#define XPRESS_NUM_SYMBOLS 512
|
||||
#define XPRESS_MAX_CODEWORD_LEN 15
|
||||
#define XPRESS_MIN_MATCH_LEN 3
|
||||
|
||||
/* This value is chosen for fast decompression. */
|
||||
#define XPRESS_TABLEBITS 12
|
||||
|
||||
/* Reusable heap-allocated memory for XPRESS decompression */
|
||||
struct xpress_decompressor {
|
||||
|
||||
/* The Huffman decoding table */
|
||||
u16 decode_table[(1 << XPRESS_TABLEBITS) + 2 * XPRESS_NUM_SYMBOLS];
|
||||
|
||||
/* An array that maps symbols to codeword lengths */
|
||||
u8 lens[XPRESS_NUM_SYMBOLS];
|
||||
|
||||
/* Temporary space for make_huffman_decode_table() */
|
||||
u16 working_space[2 * (1 + XPRESS_MAX_CODEWORD_LEN) +
|
||||
XPRESS_NUM_SYMBOLS];
|
||||
};
|
||||
|
||||
/*
|
||||
* xpress_allocate_decompressor - Allocate an XPRESS decompressor
|
||||
*
|
||||
* Return the pointer to the decompressor on success, or return NULL and set
|
||||
* errno on failure.
|
||||
*/
|
||||
struct xpress_decompressor *xpress_allocate_decompressor(void)
|
||||
{
|
||||
return ntfs_malloc(sizeof(struct xpress_decompressor));
|
||||
}
|
||||
|
||||
/*
|
||||
* xpress_decompress - Decompress a buffer of XPRESS-compressed data
|
||||
*
|
||||
* @decompressor: A decompressor that was allocated with
|
||||
* xpress_allocate_decompressor()
|
||||
* @compressed_data: The buffer of data to decompress
|
||||
* @compressed_size: Number of bytes of compressed data
|
||||
* @uncompressed_data: The buffer in which to store the decompressed data
|
||||
* @uncompressed_size: The number of bytes the data decompresses into
|
||||
*
|
||||
* Return 0 on success, or return -1 and set errno on failure.
|
||||
*/
|
||||
int xpress_decompress(struct xpress_decompressor *decompressor,
|
||||
const void *compressed_data, size_t compressed_size,
|
||||
void *uncompressed_data, size_t uncompressed_size)
|
||||
{
|
||||
struct xpress_decompressor *d = decompressor;
|
||||
const u8 * const in_begin = compressed_data;
|
||||
u8 * const out_begin = uncompressed_data;
|
||||
u8 *out_next = out_begin;
|
||||
u8 * const out_end = out_begin + uncompressed_size;
|
||||
struct input_bitstream is;
|
||||
unsigned i;
|
||||
|
||||
/* Read the Huffman codeword lengths. */
|
||||
if (compressed_size < XPRESS_NUM_SYMBOLS / 2)
|
||||
goto invalid;
|
||||
for (i = 0; i < XPRESS_NUM_SYMBOLS / 2; i++) {
|
||||
d->lens[i*2 + 0] = in_begin[i] & 0xF;
|
||||
d->lens[i*2 + 1] = in_begin[i] >> 4;
|
||||
}
|
||||
|
||||
/* Build a decoding table for the Huffman code. */
|
||||
if (make_huffman_decode_table(d->decode_table, XPRESS_NUM_SYMBOLS,
|
||||
XPRESS_TABLEBITS, d->lens,
|
||||
XPRESS_MAX_CODEWORD_LEN,
|
||||
d->working_space))
|
||||
goto invalid;
|
||||
|
||||
/* Decode the matches and literals. */
|
||||
|
||||
init_input_bitstream(&is, in_begin + XPRESS_NUM_SYMBOLS / 2,
|
||||
compressed_size - XPRESS_NUM_SYMBOLS / 2);
|
||||
|
||||
while (out_next != out_end) {
|
||||
unsigned sym;
|
||||
unsigned log2_offset;
|
||||
u32 length;
|
||||
u32 offset;
|
||||
|
||||
sym = read_huffsym(&is, d->decode_table,
|
||||
XPRESS_TABLEBITS, XPRESS_MAX_CODEWORD_LEN);
|
||||
if (sym < 256) {
|
||||
/* Literal */
|
||||
*out_next++ = sym;
|
||||
} else {
|
||||
/* Match */
|
||||
length = sym & 0xf;
|
||||
log2_offset = (sym >> 4) & 0xf;
|
||||
|
||||
bitstream_ensure_bits(&is, 16);
|
||||
|
||||
offset = ((u32)1 << log2_offset) |
|
||||
bitstream_pop_bits(&is, log2_offset);
|
||||
|
||||
if (length == 0xf) {
|
||||
length += bitstream_read_byte(&is);
|
||||
if (length == 0xf + 0xff)
|
||||
length = bitstream_read_u16(&is);
|
||||
}
|
||||
length += XPRESS_MIN_MATCH_LEN;
|
||||
|
||||
if (offset > (size_t)(out_next - out_begin))
|
||||
goto invalid;
|
||||
|
||||
if (length > (size_t)(out_end - out_next))
|
||||
goto invalid;
|
||||
|
||||
out_next = lz_copy(out_next, length, offset, out_end,
|
||||
XPRESS_MIN_MATCH_LEN);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
|
||||
invalid:
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
* xpress_free_decompressor - Free an XPRESS decompressor
|
||||
*
|
||||
* @decompressor: A decompressor that was allocated with
|
||||
* xpress_allocate_decompressor(), or NULL.
|
||||
*/
|
||||
void xpress_free_decompressor(struct xpress_decompressor *decompressor)
|
||||
{
|
||||
free(decompressor);
|
||||
}
|
Loading…
Reference in New Issue