commit 3ddd227ee8e3f03bafe41092d97c961f70c4597c Author: Eric Biggers Date: Sat Jul 2 16:44:18 2016 -0500 Initial release diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e5d4088 --- /dev/null +++ b/.gitignore @@ -0,0 +1,20 @@ +*.la +*.lo +*.o +/.libs +/Makefile +/Makefile.in +/aclocal.m4 +/autom4te.cache/ +/build-aux/ +/config.h +/config.h.in +/config.h.in~ +/config.log +/config.status +/configure +/libtool +/m4/ +/src/.deps/ +/src/.dirstamp +/stamp-h1 diff --git a/COPYING b/COPYING new file mode 100644 index 0000000..623b625 --- /dev/null +++ b/COPYING @@ -0,0 +1,340 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Library General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General +Public License instead of this License. diff --git a/Makefile.am b/Makefile.am new file mode 100644 index 0000000..191ac40 --- /dev/null +++ b/Makefile.am @@ -0,0 +1,21 @@ +ACLOCAL_AMFLAGS = -I m4 + +EXTRA_DIST = README.md COPYING + +plugindir = $(libdir)/ntfs-3g + +plugin_LTLIBRARIES = ntfs-plugin-80000017.la + +ntfs_plugin_80000017_la_SOURCES = \ + src/decompress_common.c \ + src/decompress_common.h \ + src/lzx_decompress.c \ + src/plugin.c \ + src/system_compression.c \ + src/system_compression.h \ + src/xpress_decompress.c + +ntfs_plugin_80000017_la_LDFLAGS = -module -shared -avoid-version +ntfs_plugin_80000017_la_CPPFLAGS = -D_FILE_OFFSET_BITS=64 +ntfs_plugin_80000017_la_CFLAGS = $(LIBNTFS_3G_CFLAGS) +ntfs_plugin_80000017_la_LIBADD = $(LIBNTFS_3G_LIBS) diff --git a/README.md b/README.md new file mode 100644 index 0000000..45a0072 --- /dev/null +++ b/README.md @@ -0,0 +1,42 @@ +# Overview + +System compression, also known as "Compact OS", is a Windows feature that allows +rarely modified files to be compressed using the XPRESS or LZX compression +formats. It is not built directly into NTFS but rather is implemented using +reparse points. This feature appeared in Windows 10 and it appears that many +Windows 10 systems have been using it by default. + +This repository contains a plugin which enables the NTFS-3G FUSE driver to +transparently read from system-compressed files. It must be built against +NTFS-3G version 2016.2.22AR.1 or later, since that was the first version to +include support for reparse point plugins. + +Currently, only reading is supported. Compressing an existing file may be done +by using the "compact" utility on Windows, with one of the options below +("xpress4k" is the weakest and fastest, "lzx" is the strongest and slowest): + + /exe:xpress4k + /exe:xpress8k + /exe:xpress16k + /exe:lzx + +# Installation + +The plugin can be built by running `./configure && make`. The build system must +be able to find the NTFS-3G library and headers. On some platforms this may +require that the "ntfs-3g-dev" package or similar be installed in addition to +the main "ntfs-3g" package. + +After compiling, run `make install` to install the plugin to the NTFS-3G plugin +directory, which will be a subdirectory "ntfs-3g" of the system library +directory (`$libdir`). An example full path to the installed plugin is +`/usr/lib/ntfs-3g/ntfs-plugin-80000017.so`. It may differ slightly on different +platforms. `make install` will create the plugin directory if it does not +already exist. + +# License + +This software may be redistributed and/or modified under the terms of the GNU +General Public License as published by the Free Software Foundation, either +version 2 of the License, or (at your option) any later version. There is NO +WARRANY, to the extent permitted by law. See the COPYING file for details. diff --git a/configure.ac b/configure.ac new file mode 100644 index 0000000..30f9e58 --- /dev/null +++ b/configure.ac @@ -0,0 +1,29 @@ +AC_INIT([ntfs-3g-system-compression], [0.1], [ebiggers3@gmail.com]) + +AC_CONFIG_SRCDIR([src/plugin.c]) +AC_CONFIG_MACRO_DIR([m4]) +AC_CONFIG_AUX_DIR([build-aux]) +AM_INIT_AUTOMAKE([-Wall -Werror subdir-objects foreign]) +AM_SILENT_RULES([yes]) +m4_ifdef([AM_PROG_AR], [AM_PROG_AR]) + +AC_PROG_CC +AC_C_BIGENDIAN + +LT_INIT([dlopen]) + +AC_CONFIG_HEADERS([config.h]) +AC_CONFIG_FILES([Makefile]) + +AC_CHECK_HEADERS([errno.h \ + limits.h \ + stdarg.h \ + stddef.h \ + stdlib.h \ + string.h \ + sys/types.h]) + +# TODO: should be changed to require 2017.#.## when released +PKG_CHECK_MODULES([LIBNTFS_3G], [libntfs-3g >= 2016.2.22], [], + [AC_MSG_ERROR(["Unable to find libntfs-3g"])]) +AC_OUTPUT diff --git a/src/decompress_common.c b/src/decompress_common.c new file mode 100644 index 0000000..a963b54 --- /dev/null +++ b/src/decompress_common.c @@ -0,0 +1,325 @@ +/* + * decompress_common.c - Code shared by the XPRESS and LZX decompressors + * + * Copyright (C) 2015 Eric Biggers + * + * This program is free software: you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free Software + * Foundation, either version 2 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include + +#include "decompress_common.h" + +/* + * make_huffman_decode_table() - + * + * Build a decoding table for a canonical prefix code, or "Huffman code". + * + * This is an internal function, not part of the library API! + * + * This takes as input the length of the codeword for each symbol in the + * alphabet and produces as output a table that can be used for fast + * decoding of prefix-encoded symbols using read_huffsym(). + * + * Strictly speaking, a canonical prefix code might not be a Huffman + * code. But this algorithm will work either way; and in fact, since + * Huffman codes are defined in terms of symbol frequencies, there is no + * way for the decompressor to know whether the code is a true Huffman + * code or not until all symbols have been decoded. + * + * Because the prefix code is assumed to be "canonical", it can be + * reconstructed directly from the codeword lengths. A prefix code is + * canonical if and only if a longer codeword never lexicographically + * precedes a shorter codeword, and the lexicographic ordering of + * codewords of the same length is the same as the lexicographic ordering + * of the corresponding symbols. Consequently, we can sort the symbols + * primarily by codeword length and secondarily by symbol value, then + * reconstruct the prefix code by generating codewords lexicographically + * in that order. + * + * This function does not, however, generate the prefix code explicitly. + * Instead, it directly builds a table for decoding symbols using the + * code. The basic idea is this: given the next 'max_codeword_len' bits + * in the input, we can look up the decoded symbol by indexing a table + * containing 2**max_codeword_len entries. A codeword with length + * 'max_codeword_len' will have exactly one entry in this table, whereas + * a codeword shorter than 'max_codeword_len' will have multiple entries + * in this table. Precisely, a codeword of length n will be represented + * by 2**(max_codeword_len - n) entries in this table. The 0-based index + * of each such entry will contain the corresponding codeword as a prefix + * when zero-padded on the left to 'max_codeword_len' binary digits. + * + * That's the basic idea, but we implement two optimizations regarding + * the format of the decode table itself: + * + * - For many compression formats, the maximum codeword length is too + * long for it to be efficient to build the full decoding table + * whenever a new prefix code is used. Instead, we can build the table + * using only 2**table_bits entries, where 'table_bits' is some number + * less than or equal to 'max_codeword_len'. Then, only codewords of + * length 'table_bits' and shorter can be directly looked up. For + * longer codewords, the direct lookup instead produces the root of a + * binary tree. Using this tree, the decoder can do traditional + * bit-by-bit decoding of the remainder of the codeword. Child nodes + * are allocated in extra entries at the end of the table; leaf nodes + * contain symbols. Note that the long-codeword case is, in general, + * not performance critical, since in Huffman codes the most frequently + * used symbols are assigned the shortest codeword lengths. + * + * - When we decode a symbol using a direct lookup of the table, we still + * need to know its length so that the bitstream can be advanced by the + * appropriate number of bits. The simple solution is to simply retain + * the 'lens' array and use the decoded symbol as an index into it. + * However, this requires two separate array accesses in the fast path. + * The optimization is to store the length directly in the decode + * table. We use the bottom 11 bits for the symbol and the top 5 bits + * for the length. In addition, to combine this optimization with the + * previous one, we introduce a special case where the top 2 bits of + * the length are both set if the entry is actually the root of a + * binary tree. + * + * @decode_table: + * The array in which to create the decoding table. This must have + * a length of at least ((2**table_bits) + 2 * num_syms) entries. + * + * @num_syms: + * The number of symbols in the alphabet; also, the length of the + * 'lens' array. Must be less than or equal to 2048. + * + * @table_bits: + * The order of the decode table size, as explained above. Must be + * less than or equal to 13. + * + * @lens: + * An array of length @num_syms, indexable by symbol, that gives the + * length of the codeword, in bits, for that symbol. The length can + * be 0, which means that the symbol does not have a codeword + * assigned. + * + * @max_codeword_len: + * The longest codeword length allowed in the compression format. + * All entries in 'lens' must be less than or equal to this value. + * This must be less than or equal to 23. + * + * @working_space + * A temporary array of length '2 * (max_codeword_len + 1) + + * num_syms'. + * + * Returns 0 on success, or -1 if the lengths do not form a valid prefix + * code. + */ +int make_huffman_decode_table(u16 decode_table[], const unsigned num_syms, + const unsigned table_bits, const u8 lens[], + const unsigned max_codeword_len, + u16 working_space[]) +{ + const unsigned table_num_entries = 1 << table_bits; + u16 * const len_counts = &working_space[0]; + u16 * const offsets = &working_space[1 * (max_codeword_len + 1)]; + u16 * const sorted_syms = &working_space[2 * (max_codeword_len + 1)]; + int left; + void *decode_table_ptr; + unsigned sym_idx; + unsigned codeword_len; + unsigned stores_per_loop; + unsigned decode_table_pos; + unsigned len; + unsigned sym; + + /* Count how many symbols have each possible codeword length. + * Note that a length of 0 indicates the corresponding symbol is not + * used in the code and therefore does not have a codeword. */ + for (len = 0; len <= max_codeword_len; len++) + len_counts[len] = 0; + for (sym = 0; sym < num_syms; sym++) + len_counts[lens[sym]]++; + + /* We can assume all lengths are <= max_codeword_len, but we + * cannot assume they form a valid prefix code. A codeword of + * length n should require a proportion of the codespace equaling + * (1/2)^n. The code is valid if and only if the codespace is + * exactly filled by the lengths, by this measure. */ + left = 1; + for (len = 1; len <= max_codeword_len; len++) { + left <<= 1; + left -= len_counts[len]; + if (left < 0) { + /* The lengths overflow the codespace; that is, the code + * is over-subscribed. */ + return -1; + } + } + + if (left != 0) { + /* The lengths do not fill the codespace; that is, they form an + * incomplete set. */ + if (left == (1 << max_codeword_len)) { + /* The code is completely empty. This is arguably + * invalid, but in fact it is valid in LZX and XPRESS, + * so we must allow it. By definition, no symbols can + * be decoded with an empty code. Consequently, we + * technically don't even need to fill in the decode + * table. However, to avoid accessing uninitialized + * memory if the algorithm nevertheless attempts to + * decode symbols using such a code, we zero out the + * decode table. */ + memset(decode_table, 0, + table_num_entries * sizeof(decode_table[0])); + return 0; + } + return -1; + } + + /* Sort the symbols primarily by length and secondarily by symbol order. + */ + + /* Initialize 'offsets' so that offsets[len] for 1 <= len <= + * max_codeword_len is the number of codewords shorter than 'len' bits. + */ + offsets[1] = 0; + for (len = 1; len < max_codeword_len; len++) + offsets[len + 1] = offsets[len] + len_counts[len]; + + /* Use the 'offsets' array to sort the symbols. Note that we do not + * include symbols that are not used in the code. Consequently, fewer + * than 'num_syms' entries in 'sorted_syms' may be filled. */ + for (sym = 0; sym < num_syms; sym++) + if (lens[sym] != 0) + sorted_syms[offsets[lens[sym]]++] = sym; + + /* Fill entries for codewords with length <= table_bits + * --- that is, those short enough for a direct mapping. + * + * The table will start with entries for the shortest codeword(s), which + * have the most entries. From there, the number of entries per + * codeword will decrease. */ + decode_table_ptr = decode_table; + sym_idx = 0; + codeword_len = 1; + stores_per_loop = (1 << (table_bits - codeword_len)); + for (; stores_per_loop != 0; codeword_len++, stores_per_loop >>= 1) { + unsigned end_sym_idx = sym_idx + len_counts[codeword_len]; + for (; sym_idx < end_sym_idx; sym_idx++) { + u16 entry; + u16 *p; + unsigned n; + + entry = ((u32)codeword_len << 11) | sorted_syms[sym_idx]; + p = (u16*)decode_table_ptr; + n = stores_per_loop; + + do { + *p++ = entry; + } while (--n); + + decode_table_ptr = p; + } + } + + /* If we've filled in the entire table, we are done. Otherwise, + * there are codewords longer than table_bits for which we must + * generate binary trees. */ + + decode_table_pos = (u16*)decode_table_ptr - decode_table; + if (decode_table_pos != table_num_entries) { + unsigned j; + unsigned next_free_tree_slot; + unsigned cur_codeword; + + /* First, zero out the remaining entries. This is + * necessary so that these entries appear as + * "unallocated" in the next part. Each of these entries + * will eventually be filled with the representation of + * the root node of a binary tree. */ + j = decode_table_pos; + do { + decode_table[j] = 0; + } while (++j != table_num_entries); + + /* We allocate child nodes starting at the end of the + * direct lookup table. Note that there should be + * 2*num_syms extra entries for this purpose, although + * fewer than this may actually be needed. */ + next_free_tree_slot = table_num_entries; + + /* Iterate through each codeword with length greater than + * 'table_bits', primarily in order of codeword length + * and secondarily in order of symbol. */ + for (cur_codeword = decode_table_pos << 1; + codeword_len <= max_codeword_len; + codeword_len++, cur_codeword <<= 1) + { + unsigned end_sym_idx = sym_idx + len_counts[codeword_len]; + for (; sym_idx < end_sym_idx; sym_idx++, cur_codeword++) + { + /* 'sorted_sym' is the symbol represented by the + * codeword. */ + unsigned sorted_sym = sorted_syms[sym_idx]; + + unsigned extra_bits = codeword_len - table_bits; + + unsigned node_idx = cur_codeword >> extra_bits; + + /* Go through each bit of the current codeword + * beyond the prefix of length @table_bits and + * walk the appropriate binary tree, allocating + * any slots that have not yet been allocated. + * + * Note that the 'pointer' entry to the binary + * tree, which is stored in the direct lookup + * portion of the table, is represented + * identically to other internal (non-leaf) + * nodes of the binary tree; it can be thought + * of as simply the root of the tree. The + * representation of these internal nodes is + * simply the index of the left child combined + * with the special bits 0xC000 to distingush + * the entry from direct mapping and leaf node + * entries. */ + do { + + /* At least one bit remains in the + * codeword, but the current node is an + * unallocated leaf. Change it to an + * internal node. */ + if (decode_table[node_idx] == 0) { + decode_table[node_idx] = + next_free_tree_slot | 0xC000; + decode_table[next_free_tree_slot++] = 0; + decode_table[next_free_tree_slot++] = 0; + } + + /* Go to the left child if the next bit + * in the codeword is 0; otherwise go to + * the right child. */ + node_idx = decode_table[node_idx] & 0x3FFF; + --extra_bits; + node_idx += (cur_codeword >> extra_bits) & 1; + } while (extra_bits != 0); + + /* We've traversed the tree using the entire + * codeword, and we're now at the entry where + * the actual symbol will be stored. This is + * distinguished from internal nodes by not + * having its high two bits set. */ + decode_table[node_idx] = sorted_sym; + } + } + } + return 0; +} diff --git a/src/decompress_common.h b/src/decompress_common.h new file mode 100644 index 0000000..cfb035c --- /dev/null +++ b/src/decompress_common.h @@ -0,0 +1,375 @@ +/* + * decompress_common.h - Code shared by the XPRESS and LZX decompressors + * + * Copyright (C) 2015 Eric Biggers + * + * This program is free software: you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free Software + * Foundation, either version 2 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#include +#include + +#include +#include + +/* "Force inline" macro (not required, but helpful for performance) */ +#ifdef __GNUC__ +# define forceinline inline __attribute__((always_inline)) +#else +# define forceinline inline +#endif + +/* Enable whole-word match copying on selected architectures */ +#if defined(__i386__) || defined(__x86_64__) || defined(__ARM_FEATURE_UNALIGNED) +# define FAST_UNALIGNED_ACCESS +#endif + +/* Size of a machine word */ +#define WORDBYTES (sizeof(size_t)) + +/* Inline functions to read and write unaligned data. + * We use just memcpy() for this. It is standard and modern compilers will + * usually replace it with load/store instructions. */ + +static forceinline u16 get_unaligned_le16(const u8 *p) +{ + le16 v_le; + memcpy(&v_le, p, 2); + return le16_to_cpu(v_le); +} + +static forceinline u32 get_unaligned_le32(const u8 *p) +{ + le32 v_le; + memcpy(&v_le, p, 4); + return le32_to_cpu(v_le); +} + +static forceinline void put_unaligned_le32(u32 v, u8 *p) +{ + le32 v_le = cpu_to_le32(v); + memcpy(p, &v_le, 4); +} + +/* Load a "word" with platform-dependent size and endianness. */ +static forceinline size_t get_unaligned_word(const u8 *p) +{ + size_t v; + memcpy(&v, p, WORDBYTES); + return v; +} + +/* Store a "word" with platform-dependent size and endianness. */ +static forceinline void put_unaligned_word(size_t v, u8 *p) +{ + memcpy(p, &v, WORDBYTES); +} + +/* Copy a "word" with platform-dependent size. */ +static forceinline void copy_unaligned_word(const u8 *src, u8 *dst) +{ + put_unaligned_word(get_unaligned_word(src), dst); +} + +/* Generate a "word" with platform-dependent size whose bytes all contain the + * value 'b'. */ +static forceinline size_t repeat_byte(u8 b) +{ + size_t v; + + v = b; + v |= v << 8; + v |= v << 16; + v |= v << ((WORDBYTES == 8) ? 32 : 0); + return v; +} + +/* Structure that encapsulates a block of in-memory data being interpreted as a + * stream of bits, optionally with interwoven literal bytes. Bits are assumed + * to be stored in little endian 16-bit coding units, with the bits ordered high + * to low. */ +struct input_bitstream { + + /* Bits that have been read from the input buffer. The bits are + * left-justified; the next bit is always bit 31. */ + u32 bitbuf; + + /* Number of bits currently held in @bitbuf. */ + unsigned bitsleft; + + /* Pointer to the next byte to be retrieved from the input buffer. */ + const u8 *next; + + /* Pointer to just past the end of the input buffer. */ + const u8 *end; +}; + +/* Initialize a bitstream to read from the specified input buffer. */ +static forceinline void init_input_bitstream(struct input_bitstream *is, + const void *buffer, u32 size) +{ + is->bitbuf = 0; + is->bitsleft = 0; + is->next = buffer; + is->end = is->next + size; +} + +/* Ensure the bit buffer variable for the bitstream contains at least @num_bits + * bits. Following this, bitstream_peek_bits() and/or bitstream_remove_bits() + * may be called on the bitstream to peek or remove up to @num_bits bits. Note + * that @num_bits must be <= 16. */ +static forceinline void bitstream_ensure_bits(struct input_bitstream *is, + unsigned num_bits) +{ + if (is->bitsleft < num_bits) { + if (is->end - is->next >= 2) { + is->bitbuf |= (u32)get_unaligned_le16(is->next) + << (16 - is->bitsleft); + is->next += 2; + } + is->bitsleft += 16; + } +} + +/* Return the next @num_bits bits from the bitstream, without removing them. + * There must be at least @num_bits remaining in the buffer variable, from a + * previous call to bitstream_ensure_bits(). */ +static forceinline u32 bitstream_peek_bits(const struct input_bitstream *is, + unsigned num_bits) +{ + if (num_bits == 0) + return 0; + return is->bitbuf >> (32 - num_bits); +} + +/* Remove @num_bits from the bitstream. There must be at least @num_bits + * remaining in the buffer variable, from a previous call to + * bitstream_ensure_bits(). */ +static forceinline void bitstream_remove_bits(struct input_bitstream *is, + unsigned num_bits) +{ + is->bitbuf <<= num_bits; + is->bitsleft -= num_bits; +} + +/* Remove and return @num_bits bits from the bitstream. There must be at least + * @num_bits remaining in the buffer variable, from a previous call to + * bitstream_ensure_bits(). */ +static forceinline u32 bitstream_pop_bits(struct input_bitstream *is, + unsigned num_bits) +{ + u32 bits = bitstream_peek_bits(is, num_bits); + bitstream_remove_bits(is, num_bits); + return bits; +} + +/* Read and return the next @num_bits bits from the bitstream. */ +static forceinline u32 bitstream_read_bits(struct input_bitstream *is, + unsigned num_bits) +{ + bitstream_ensure_bits(is, num_bits); + return bitstream_pop_bits(is, num_bits); +} + +/* Read and return the next literal byte embedded in the bitstream. */ +static forceinline u8 bitstream_read_byte(struct input_bitstream *is) +{ + if (is->end == is->next) + return 0; + return *is->next++; +} + +/* Read and return the next 16-bit integer embedded in the bitstream. */ +static forceinline u16 bitstream_read_u16(struct input_bitstream *is) +{ + u16 v; + + if (is->end - is->next < 2) + return 0; + v = get_unaligned_le16(is->next); + is->next += 2; + return v; +} + +/* Read and return the next 32-bit integer embedded in the bitstream. */ +static forceinline u32 bitstream_read_u32(struct input_bitstream *is) +{ + u32 v; + + if (is->end - is->next < 4) + return 0; + v = get_unaligned_le32(is->next); + is->next += 4; + return v; +} + +/* Read into @dst_buffer an array of literal bytes embedded in the bitstream. + * Return either a pointer to the byte past the last written, or NULL if the + * read overflows the input buffer. */ +static forceinline void *bitstream_read_bytes(struct input_bitstream *is, + void *dst_buffer, size_t count) +{ + if ((size_t)(is->end - is->next) < count) + return NULL; + memcpy(dst_buffer, is->next, count); + is->next += count; + return (u8 *)dst_buffer + count; +} + +/* Align the input bitstream on a coding-unit boundary. */ +static forceinline void bitstream_align(struct input_bitstream *is) +{ + is->bitsleft = 0; + is->bitbuf = 0; +} + +extern int make_huffman_decode_table(u16 decode_table[], const unsigned num_syms, + const unsigned num_bits, const u8 lens[], + const unsigned max_codeword_len, + u16 working_space[]); + + +/* Reads and returns the next Huffman-encoded symbol from a bitstream. If the + * input data is exhausted, the Huffman symbol is decoded as if the missing bits + * are all zeroes. */ +static forceinline unsigned read_huffsym(struct input_bitstream *istream, + const u16 decode_table[], + unsigned table_bits, + unsigned max_codeword_len) +{ + unsigned entry; + unsigned key_bits; + + bitstream_ensure_bits(istream, max_codeword_len); + + /* Index the decode table by the next table_bits bits of the input. */ + key_bits = bitstream_peek_bits(istream, table_bits); + entry = decode_table[key_bits]; + if (entry < 0xC000) { + /* Fast case: The decode table directly provided the + * symbol and codeword length. The low 11 bits are the + * symbol, and the high 5 bits are the codeword length. */ + bitstream_remove_bits(istream, entry >> 11); + return entry & 0x7FF; + } else { + /* Slow case: The codeword for the symbol is longer than + * table_bits, so the symbol does not have an entry + * directly in the first (1 << table_bits) entries of the + * decode table. Traverse the appropriate binary tree + * bit-by-bit to decode the symbol. */ + bitstream_remove_bits(istream, table_bits); + do { + key_bits = (entry & 0x3FFF) + bitstream_pop_bits(istream, 1); + } while ((entry = decode_table[key_bits]) >= 0xC000); + return entry; + } +} + +/* + * Copy an LZ77 match at (dst - offset) to dst. + * + * The length and offset must be already validated --- that is, (dst - offset) + * can't underrun the output buffer, and (dst + length) can't overrun the output + * buffer. Also, the length cannot be 0. + * + * @bufend points to the byte past the end of the output buffer. This function + * won't write any data beyond this position. + * + * Returns dst + length. + */ +static forceinline u8 *lz_copy(u8 *dst, u32 length, u32 offset, const u8 *bufend, + u32 min_length) +{ + const u8 *src = dst - offset; + + /* + * Try to copy one machine word at a time. On i386 and x86_64 this is + * faster than copying one byte at a time, unless the data is + * near-random and all the matches have very short lengths. Note that + * since this requires unaligned memory accesses, it won't necessarily + * be faster on every architecture. + * + * Also note that we might copy more than the length of the match. For + * example, if a word is 8 bytes and the match is of length 5, then + * we'll simply copy 8 bytes. This is okay as long as we don't write + * beyond the end of the output buffer, hence the check for (bufend - + * end >= WORDBYTES - 1). + */ +#ifdef FAST_UNALIGNED_ACCESS + u8 * const end = dst + length; + if (bufend - end >= (ptrdiff_t)(WORDBYTES - 1)) { + + if (offset >= WORDBYTES) { + /* The source and destination words don't overlap. */ + + /* To improve branch prediction, one iteration of this + * loop is unrolled. Most matches are short and will + * fail the first check. But if that check passes, then + * it becomes increasing likely that the match is long + * and we'll need to continue copying. */ + + copy_unaligned_word(src, dst); + src += WORDBYTES; + dst += WORDBYTES; + + if (dst < end) { + do { + copy_unaligned_word(src, dst); + src += WORDBYTES; + dst += WORDBYTES; + } while (dst < end); + } + return end; + } else if (offset == 1) { + + /* Offset 1 matches are equivalent to run-length + * encoding of the previous byte. This case is common + * if the data contains many repeated bytes. */ + + size_t v = repeat_byte(*(dst - 1)); + do { + put_unaligned_word(v, dst); + src += WORDBYTES; + dst += WORDBYTES; + } while (dst < end); + return end; + } + /* + * We don't bother with special cases for other 'offset < + * WORDBYTES', which are usually rarer than 'offset == 1'. Extra + * checks will just slow things down. Actually, it's possible + * to handle all the 'offset < WORDBYTES' cases using the same + * code, but it still becomes more complicated doesn't seem any + * faster overall; it definitely slows down the more common + * 'offset == 1' case. + */ + } +#endif /* FAST_UNALIGNED_ACCESS */ + + /* Fall back to a bytewise copy. */ + + if (min_length >= 2) { + *dst++ = *src++; + length--; + } + if (min_length >= 3) { + *dst++ = *src++; + length--; + } + do { + *dst++ = *src++; + } while (--length); + + return dst; +} diff --git a/src/lzx_decompress.c b/src/lzx_decompress.c new file mode 100644 index 0000000..a6bed16 --- /dev/null +++ b/src/lzx_decompress.c @@ -0,0 +1,678 @@ +/* + * lzx_decompress.c - A decompressor for the LZX compression format, which can + * be used in "System Compressed" files. This is based on the code from wimlib. + * This code only supports a window size (dictionary size) of 32768 bytes, since + * this is the only size used in System Compression. + * + * Copyright (C) 2015 Eric Biggers + * + * This program is free software: you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free Software + * Foundation, either version 2 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include + +#include + +#include "decompress_common.h" +#include "system_compression.h" + +/* Number of literal byte values */ +#define LZX_NUM_CHARS 256 + +/* The smallest and largest allowed match lengths */ +#define LZX_MIN_MATCH_LEN 2 +#define LZX_MAX_MATCH_LEN 257 + +/* Number of distinct match lengths that can be represented */ +#define LZX_NUM_LENS (LZX_MAX_MATCH_LEN - LZX_MIN_MATCH_LEN + 1) + +/* Number of match lengths for which no length symbol is required */ +#define LZX_NUM_PRIMARY_LENS 7 +#define LZX_NUM_LEN_HEADERS (LZX_NUM_PRIMARY_LENS + 1) + +/* Valid values of the 3-bit block type field */ +#define LZX_BLOCKTYPE_VERBATIM 1 +#define LZX_BLOCKTYPE_ALIGNED 2 +#define LZX_BLOCKTYPE_UNCOMPRESSED 3 + +/* Number of offset slots for a window size of 32768 */ +#define LZX_NUM_OFFSET_SLOTS 30 + +/* Number of symbols in the main code for a window size of 32768 */ +#define LZX_MAINCODE_NUM_SYMBOLS \ + (LZX_NUM_CHARS + (LZX_NUM_OFFSET_SLOTS * LZX_NUM_LEN_HEADERS)) + +/* Number of symbols in the length code */ +#define LZX_LENCODE_NUM_SYMBOLS (LZX_NUM_LENS - LZX_NUM_PRIMARY_LENS) + +/* Number of symbols in the precode */ +#define LZX_PRECODE_NUM_SYMBOLS 20 + +/* Number of bits in which each precode codeword length is represented */ +#define LZX_PRECODE_ELEMENT_SIZE 4 + +/* Number of low-order bits of each match offset that are entropy-encoded in + * aligned offset blocks */ +#define LZX_NUM_ALIGNED_OFFSET_BITS 3 + +/* Number of symbols in the aligned offset code */ +#define LZX_ALIGNEDCODE_NUM_SYMBOLS (1 << LZX_NUM_ALIGNED_OFFSET_BITS) + +/* Mask for the match offset bits that are entropy-encoded in aligned offset + * blocks */ +#define LZX_ALIGNED_OFFSET_BITMASK ((1 << LZX_NUM_ALIGNED_OFFSET_BITS) - 1) + +/* Number of bits in which each aligned offset codeword length is represented */ +#define LZX_ALIGNEDCODE_ELEMENT_SIZE 3 + +/* Maximum lengths (in bits) of the codewords in each Huffman code */ +#define LZX_MAX_MAIN_CODEWORD_LEN 16 +#define LZX_MAX_LEN_CODEWORD_LEN 16 +#define LZX_MAX_PRE_CODEWORD_LEN ((1 << LZX_PRECODE_ELEMENT_SIZE) - 1) +#define LZX_MAX_ALIGNED_CODEWORD_LEN ((1 << LZX_ALIGNEDCODE_ELEMENT_SIZE) - 1) + +/* The default "filesize" value used in pre/post-processing. In the LZX format + * used in cabinet files this value must be given to the decompressor, whereas + * in the LZX format used in WIM files and system-compressed files this value is + * fixed at 12000000. */ +#define LZX_DEFAULT_FILESIZE 12000000 + +/* Assumed block size when the encoded block size begins with a 0 bit. */ +#define LZX_DEFAULT_BLOCK_SIZE 32768 + +/* Number of offsets in the recent (or "repeat") offsets queue. */ +#define LZX_NUM_RECENT_OFFSETS 3 + +/* These values are chosen for fast decompression. */ +#define LZX_MAINCODE_TABLEBITS 11 +#define LZX_LENCODE_TABLEBITS 10 +#define LZX_PRECODE_TABLEBITS 6 +#define LZX_ALIGNEDCODE_TABLEBITS 7 + +#define LZX_READ_LENS_MAX_OVERRUN 50 + +/* Mapping: offset slot => first match offset that uses that offset slot. + */ +static const u32 lzx_offset_slot_base[LZX_NUM_OFFSET_SLOTS + 1] = { + 0 , 1 , 2 , 3 , 4 , /* 0 --- 4 */ + 6 , 8 , 12 , 16 , 24 , /* 5 --- 9 */ + 32 , 48 , 64 , 96 , 128 , /* 10 --- 14 */ + 192 , 256 , 384 , 512 , 768 , /* 15 --- 19 */ + 1024 , 1536 , 2048 , 3072 , 4096 , /* 20 --- 24 */ + 6144 , 8192 , 12288 , 16384 , 24576 , /* 25 --- 29 */ + 32768 , /* extra */ +}; + +/* Mapping: offset slot => how many extra bits must be read and added to the + * corresponding offset slot base to decode the match offset. */ +static const u8 lzx_extra_offset_bits[LZX_NUM_OFFSET_SLOTS] = { + 0 , 0 , 0 , 0 , 1 , + 1 , 2 , 2 , 3 , 3 , + 4 , 4 , 5 , 5 , 6 , + 6 , 7 , 7 , 8 , 8 , + 9 , 9 , 10, 10, 11, + 11, 12, 12, 13, 13, +}; + +/* Reusable heap-allocated memory for LZX decompression */ +struct lzx_decompressor { + + /* Huffman decoding tables, and arrays that map symbols to codeword + * lengths */ + + u16 maincode_decode_table[(1 << LZX_MAINCODE_TABLEBITS) + + (LZX_MAINCODE_NUM_SYMBOLS * 2)]; + u8 maincode_lens[LZX_MAINCODE_NUM_SYMBOLS + LZX_READ_LENS_MAX_OVERRUN]; + + + u16 lencode_decode_table[(1 << LZX_LENCODE_TABLEBITS) + + (LZX_LENCODE_NUM_SYMBOLS * 2)]; + u8 lencode_lens[LZX_LENCODE_NUM_SYMBOLS + LZX_READ_LENS_MAX_OVERRUN]; + + + u16 alignedcode_decode_table[(1 << LZX_ALIGNEDCODE_TABLEBITS) + + (LZX_ALIGNEDCODE_NUM_SYMBOLS * 2)]; + u8 alignedcode_lens[LZX_ALIGNEDCODE_NUM_SYMBOLS]; + + u16 precode_decode_table[(1 << LZX_PRECODE_TABLEBITS) + + (LZX_PRECODE_NUM_SYMBOLS * 2)]; + u8 precode_lens[LZX_PRECODE_NUM_SYMBOLS]; + + /* Temporary space for make_huffman_decode_table() */ + u16 working_space[2 * (1 + LZX_MAX_MAIN_CODEWORD_LEN) + + LZX_MAINCODE_NUM_SYMBOLS]; +}; + +static void undo_e8_translation(void *target, s32 input_pos) +{ + s32 abs_offset, rel_offset; + + abs_offset = get_unaligned_le32(target); + if (abs_offset >= 0) { + if (abs_offset < LZX_DEFAULT_FILESIZE) { + /* "good translation" */ + rel_offset = abs_offset - input_pos; + put_unaligned_le32(rel_offset, target); + } + } else { + if (abs_offset >= -input_pos) { + /* "compensating translation" */ + rel_offset = abs_offset + LZX_DEFAULT_FILESIZE; + put_unaligned_le32(rel_offset, target); + } + } +} + +/* + * Undo the 'E8' preprocessing used in LZX. Before compression, the + * uncompressed data was preprocessed by changing the targets of suspected x86 + * CALL instructions from relative offsets to absolute offsets. After + * match/literal decoding, the decompressor must undo the translation. + */ +static void lzx_postprocess(u8 *data, u32 size) +{ + /* + * A worthwhile optimization is to push the end-of-buffer check into the + * relatively rare E8 case. This is possible if we replace the last six + * bytes of data with E8 bytes; then we are guaranteed to hit an E8 byte + * before reaching end-of-buffer. In addition, this scheme guarantees + * that no translation can begin following an E8 byte in the last 10 + * bytes because a 4-byte offset containing E8 as its high byte is a + * large negative number that is not valid for translation. That is + * exactly what we need. + */ + u8 *tail; + u8 saved_bytes[6]; + u8 *p; + + if (size <= 10) + return; + + tail = &data[size - 6]; + memcpy(saved_bytes, tail, 6); + memset(tail, 0xE8, 6); + p = data; + for (;;) { + while (*p != 0xE8) + p++; + if (p >= tail) + break; + undo_e8_translation(p + 1, p - data); + p += 5; + } + memcpy(tail, saved_bytes, 6); +} + +/* Read a Huffman-encoded symbol using the precode. */ +static forceinline unsigned read_presym(const struct lzx_decompressor *d, + struct input_bitstream *is) +{ + return read_huffsym(is, d->precode_decode_table, + LZX_PRECODE_TABLEBITS, LZX_MAX_PRE_CODEWORD_LEN); +} + +/* Read a Huffman-encoded symbol using the main code. */ +static forceinline unsigned read_mainsym(const struct lzx_decompressor *d, + struct input_bitstream *is) +{ + return read_huffsym(is, d->maincode_decode_table, + LZX_MAINCODE_TABLEBITS, LZX_MAX_MAIN_CODEWORD_LEN); +} + +/* Read a Huffman-encoded symbol using the length code. */ +static forceinline unsigned read_lensym(const struct lzx_decompressor *d, + struct input_bitstream *is) +{ + return read_huffsym(is, d->lencode_decode_table, + LZX_LENCODE_TABLEBITS, LZX_MAX_LEN_CODEWORD_LEN); +} + +/* Read a Huffman-encoded symbol using the aligned offset code. */ +static forceinline unsigned read_alignedsym(const struct lzx_decompressor *d, + struct input_bitstream *is) +{ + return read_huffsym(is, d->alignedcode_decode_table, + LZX_ALIGNEDCODE_TABLEBITS, + LZX_MAX_ALIGNED_CODEWORD_LEN); +} + +/* + * Read the precode from the compressed input bitstream, then use it to decode + * @num_lens codeword length values. + * + * @is: The input bitstream. + * + * @lens: An array that contains the length values from the previous time + * the codeword lengths for this Huffman code were read, or all 0's + * if this is the first time. This array must have at least + * (@num_lens + LZX_READ_LENS_MAX_OVERRUN) entries. + * + * @num_lens: Number of length values to decode. + * + * Returns 0 on success, or -1 if the data was invalid. + */ +static int lzx_read_codeword_lens(struct lzx_decompressor *d, + struct input_bitstream *is, + u8 *lens, unsigned num_lens) +{ + u8 *len_ptr = lens; + u8 *lens_end = lens + num_lens; + int i; + + /* Read the lengths of the precode codewords. These are given + * explicitly. */ + for (i = 0; i < LZX_PRECODE_NUM_SYMBOLS; i++) { + d->precode_lens[i] = + bitstream_read_bits(is, LZX_PRECODE_ELEMENT_SIZE); + } + + /* Make the decoding table for the precode. */ + if (make_huffman_decode_table(d->precode_decode_table, + LZX_PRECODE_NUM_SYMBOLS, + LZX_PRECODE_TABLEBITS, + d->precode_lens, + LZX_MAX_PRE_CODEWORD_LEN, + d->working_space)) + return -1; + + /* Decode the codeword lengths. */ + do { + unsigned presym; + u8 len; + + /* Read the next precode symbol. */ + presym = read_presym(d, is); + if (presym < 17) { + /* Difference from old length */ + len = *len_ptr - presym; + if ((s8)len < 0) + len += 17; + *len_ptr++ = len; + } else { + /* Special RLE values */ + + unsigned run_len; + + if (presym == 17) { + /* Run of 0's */ + run_len = 4 + bitstream_read_bits(is, 4); + len = 0; + } else if (presym == 18) { + /* Longer run of 0's */ + run_len = 20 + bitstream_read_bits(is, 5); + len = 0; + } else { + /* Run of identical lengths */ + run_len = 4 + bitstream_read_bits(is, 1); + presym = read_presym(d, is); + if (presym > 17) + return -1; + len = *len_ptr - presym; + if ((s8)len < 0) + len += 17; + } + + do { + *len_ptr++ = len; + } while (--run_len); + /* Worst case overrun is when presym == 18, + * run_len == 20 + 31, and only 1 length was remaining. + * So LZX_READ_LENS_MAX_OVERRUN == 50. + * + * Overrun while reading the first half of maincode_lens + * can corrupt the previous values in the second half. + * This doesn't really matter because the resulting + * lengths will still be in range, and data that + * generates overruns is invalid anyway. */ + } + } while (len_ptr < lens_end); + + return 0; +} + +/* + * Read the header of an LZX block and save the block type and (uncompressed) + * size in *block_type_ret and *block_size_ret, respectively. + * + * If the block is compressed, also update the Huffman decode @tables with the + * new Huffman codes. If the block is uncompressed, also update the match + * offset @queue with the new match offsets. + * + * Return 0 on success, or -1 if the data was invalid. + */ +static int lzx_read_block_header(struct lzx_decompressor *d, + struct input_bitstream *is, + int *block_type_ret, + u32 *block_size_ret, + u32 recent_offsets[]) +{ + int block_type; + u32 block_size; + int i; + + bitstream_ensure_bits(is, 4); + + /* The first three bits tell us what kind of block it is, and should be + * one of the LZX_BLOCKTYPE_* values. */ + block_type = bitstream_pop_bits(is, 3); + + /* Read the block size. */ + if (bitstream_pop_bits(is, 1)) { + block_size = LZX_DEFAULT_BLOCK_SIZE; + } else { + block_size = 0; + block_size |= bitstream_read_bits(is, 8); + block_size <<= 8; + block_size |= bitstream_read_bits(is, 8); + } + + switch (block_type) { + + case LZX_BLOCKTYPE_ALIGNED: + + /* Read the aligned offset code and prepare its decode table. + */ + + for (i = 0; i < LZX_ALIGNEDCODE_NUM_SYMBOLS; i++) { + d->alignedcode_lens[i] = + bitstream_read_bits(is, + LZX_ALIGNEDCODE_ELEMENT_SIZE); + } + + if (make_huffman_decode_table(d->alignedcode_decode_table, + LZX_ALIGNEDCODE_NUM_SYMBOLS, + LZX_ALIGNEDCODE_TABLEBITS, + d->alignedcode_lens, + LZX_MAX_ALIGNED_CODEWORD_LEN, + d->working_space)) + return -1; + + /* Fall though, since the rest of the header for aligned offset + * blocks is the same as that for verbatim blocks. */ + + case LZX_BLOCKTYPE_VERBATIM: + + /* Read the main code and prepare its decode table. + * + * Note that the codeword lengths in the main code are encoded + * in two parts: one part for literal symbols, and one part for + * match symbols. */ + + if (lzx_read_codeword_lens(d, is, d->maincode_lens, + LZX_NUM_CHARS)) + return -1; + + if (lzx_read_codeword_lens(d, is, + d->maincode_lens + LZX_NUM_CHARS, + LZX_MAINCODE_NUM_SYMBOLS - LZX_NUM_CHARS)) + return -1; + + if (make_huffman_decode_table(d->maincode_decode_table, + LZX_MAINCODE_NUM_SYMBOLS, + LZX_MAINCODE_TABLEBITS, + d->maincode_lens, + LZX_MAX_MAIN_CODEWORD_LEN, + d->working_space)) + return -1; + + /* Read the length code and prepare its decode table. */ + + if (lzx_read_codeword_lens(d, is, d->lencode_lens, + LZX_LENCODE_NUM_SYMBOLS)) + return -1; + + if (make_huffman_decode_table(d->lencode_decode_table, + LZX_LENCODE_NUM_SYMBOLS, + LZX_LENCODE_TABLEBITS, + d->lencode_lens, + LZX_MAX_LEN_CODEWORD_LEN, + d->working_space)) + return -1; + + break; + + case LZX_BLOCKTYPE_UNCOMPRESSED: + + /* Before reading the three recent offsets from the uncompressed + * block header, the stream must be aligned on a 16-bit + * boundary. But if the stream is *already* aligned, then the + * next 16 bits must be discarded. */ + bitstream_ensure_bits(is, 1); + bitstream_align(is); + + recent_offsets[0] = bitstream_read_u32(is); + recent_offsets[1] = bitstream_read_u32(is); + recent_offsets[2] = bitstream_read_u32(is); + + /* Offsets of 0 are invalid. */ + if (recent_offsets[0] == 0 || recent_offsets[1] == 0 || + recent_offsets[2] == 0) + return -1; + break; + + default: + /* Unrecognized block type. */ + return -1; + } + + *block_type_ret = block_type; + *block_size_ret = block_size; + return 0; +} + +/* Decompress a block of LZX-compressed data. */ +static int lzx_decompress_block(const struct lzx_decompressor *d, + struct input_bitstream *is, + int block_type, u32 block_size, + u8 * const out_begin, u8 *out_next, + u32 recent_offsets[]) +{ + u8 * const block_end = out_next + block_size; + unsigned ones_if_aligned = 0U - (block_type == LZX_BLOCKTYPE_ALIGNED); + + do { + unsigned mainsym; + unsigned match_len; + u32 match_offset; + unsigned offset_slot; + unsigned num_extra_bits; + + mainsym = read_mainsym(d, is); + if (mainsym < LZX_NUM_CHARS) { + /* Literal */ + *out_next++ = mainsym; + continue; + } + + /* Match */ + + /* Decode the length header and offset slot. */ + mainsym -= LZX_NUM_CHARS; + match_len = mainsym % LZX_NUM_LEN_HEADERS; + offset_slot = mainsym / LZX_NUM_LEN_HEADERS; + + /* If needed, read a length symbol to decode the full length. */ + if (match_len == LZX_NUM_PRIMARY_LENS) + match_len += read_lensym(d, is); + match_len += LZX_MIN_MATCH_LEN; + + if (offset_slot < LZX_NUM_RECENT_OFFSETS) { + /* Repeat offset */ + + /* Note: This isn't a real LRU queue, since using the R2 + * offset doesn't bump the R1 offset down to R2. This + * quirk allows all 3 recent offsets to be handled by + * the same code. (For R0, the swap is a no-op.) */ + match_offset = recent_offsets[offset_slot]; + recent_offsets[offset_slot] = recent_offsets[0]; + recent_offsets[0] = match_offset; + } else { + /* Explicit offset */ + + /* Look up the number of extra bits that need to be read + * to decode offsets with this offset slot. */ + num_extra_bits = lzx_extra_offset_bits[offset_slot]; + + /* Start with the offset slot base value. */ + match_offset = lzx_offset_slot_base[offset_slot]; + + /* In aligned offset blocks, the low-order 3 bits of + * each offset are encoded using the aligned offset + * code. Otherwise, all the extra bits are literal. */ + + if ((num_extra_bits & ones_if_aligned) >= LZX_NUM_ALIGNED_OFFSET_BITS) { + match_offset += + bitstream_read_bits(is, num_extra_bits - + LZX_NUM_ALIGNED_OFFSET_BITS) + << LZX_NUM_ALIGNED_OFFSET_BITS; + match_offset += read_alignedsym(d, is); + } else { + match_offset += bitstream_read_bits(is, num_extra_bits); + } + + /* Adjust the offset. */ + match_offset -= (LZX_NUM_RECENT_OFFSETS - 1); + + /* Update the recent offsets. */ + recent_offsets[2] = recent_offsets[1]; + recent_offsets[1] = recent_offsets[0]; + recent_offsets[0] = match_offset; + } + + /* Validate the match, then copy it to the current position. */ + + if (match_len > (size_t)(block_end - out_next)) + return -1; + + if (match_offset > (size_t)(out_next - out_begin)) + return -1; + + out_next = lz_copy(out_next, match_len, match_offset, + block_end, LZX_MIN_MATCH_LEN); + + } while (out_next != block_end); + + return 0; +} + +/* + * lzx_allocate_decompressor - Allocate an LZX decompressor + * + * Return the pointer to the decompressor on success, or return NULL and set + * errno on failure. + */ +struct lzx_decompressor *lzx_allocate_decompressor(void) +{ + return ntfs_malloc(sizeof(struct lzx_decompressor)); +} + +/* + * lzx_decompress - Decompress a buffer of LZX-compressed data + * + * @decompressor: A decompressor allocated with lzx_allocate_decompressor() + * @compressed_data: The buffer of data to decompress + * @compressed_size: Number of bytes of compressed data + * @uncompressed_data: The buffer in which to store the decompressed data + * @uncompressed_size: The number of bytes the data decompresses into + * + * Return 0 on success, or return -1 and set errno on failure. + */ +int lzx_decompress(struct lzx_decompressor *decompressor, + const void *compressed_data, size_t compressed_size, + void *uncompressed_data, size_t uncompressed_size) +{ + struct lzx_decompressor *d = decompressor; + u8 * const out_begin = uncompressed_data; + u8 *out_next = out_begin; + u8 * const out_end = out_begin + uncompressed_size; + struct input_bitstream is; + u32 recent_offsets[LZX_NUM_RECENT_OFFSETS] = {1, 1, 1}; + int e8_status = 0; + + init_input_bitstream(&is, compressed_data, compressed_size); + + /* Codeword lengths begin as all 0's for delta encoding purposes. */ + memset(d->maincode_lens, 0, LZX_MAINCODE_NUM_SYMBOLS); + memset(d->lencode_lens, 0, LZX_LENCODE_NUM_SYMBOLS); + + /* Decompress blocks until we have all the uncompressed data. */ + + while (out_next != out_end) { + int block_type; + u32 block_size; + + if (lzx_read_block_header(d, &is, &block_type, &block_size, + recent_offsets)) + goto invalid; + + if (block_size < 1 || block_size > (size_t)(out_end - out_next)) + goto invalid; + + if (block_type != LZX_BLOCKTYPE_UNCOMPRESSED) { + + /* Compressed block */ + + if (lzx_decompress_block(d, + &is, + block_type, + block_size, + out_begin, + out_next, + recent_offsets)) + goto invalid; + + e8_status |= d->maincode_lens[0xe8]; + out_next += block_size; + } else { + /* Uncompressed block */ + + out_next = bitstream_read_bytes(&is, out_next, + block_size); + if (!out_next) + goto invalid; + + if (block_size & 1) + bitstream_read_byte(&is); + + e8_status = 1; + } + } + + /* Postprocess the data unless it cannot possibly contain 0xe8 bytes. */ + if (e8_status) + lzx_postprocess(uncompressed_data, uncompressed_size); + + return 0; + +invalid: + errno = EINVAL; + return -1; +} + +/* + * lzx_free_decompressor - Free an LZX decompressor + * + * @decompressor: A decompressor that was allocated with + * lzx_allocate_decompressor(), or NULL. + */ +void lzx_free_decompressor(struct lzx_decompressor *decompressor) +{ + free(decompressor); +} diff --git a/src/plugin.c b/src/plugin.c new file mode 100644 index 0000000..3c861ed --- /dev/null +++ b/src/plugin.c @@ -0,0 +1,102 @@ +/* + * plugin.c - NTFS-3G system compression plugin + * + * Copyright (C) 2015 Jean-Pierre Andre + * Copyright (C) 2015-2016 Eric Biggers + * + * This program is free software: you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free Software + * Foundation, either version 2 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ +#include "config.h" + +#include + +#ifdef HAVE_ERRNO_H +#include +#endif + +#include +#include + +#include "system_compression.h" + +static int compressed_getattr(ntfs_inode *ni, const REPARSE_POINT *reparse, + struct stat *stbuf) +{ + s64 compressed_size = ntfs_get_system_compressed_file_size(ni, reparse); + + if (compressed_size >= 0) { + /* System-compressed file */ + stbuf->st_size = ni->data_size; + stbuf->st_blocks = (compressed_size + 511) >> 9; + stbuf->st_mode = S_IFREG | 0555; + return 0; + } + + /* Not a system compressed file, or another error occurred */ + return -errno; +} + +static int compressed_open(ntfs_inode *ni __attribute__((unused)), + const REPARSE_POINT *reparse __attribute__((unused)), + struct fuse_file_info *fi) +{ + if ((fi->flags & O_ACCMODE) != O_RDONLY) + return -EOPNOTSUPP; + return 0; +} + +static int compressed_release(ntfs_inode *ni __attribute__((unused)), + const REPARSE_POINT *reparse __attribute__((unused)), + struct fuse_file_info *fi __attribute__((unused))) +{ + return 0; +} + +static int compressed_read(ntfs_inode *ni, const REPARSE_POINT *reparse, + char *buf, size_t size, off_t offset, + struct fuse_file_info *fi __attribute__((unused))) +{ + struct ntfs_system_decompression_ctx *dctx; + ssize_t res; + + /* TODO: there needs to be more investigation into reusing decompression + * contexts for multiple reads. */ + + dctx = ntfs_open_system_decompression_ctx(ni, reparse); + if (!dctx) + return -errno; + + res = ntfs_read_system_compressed_data(dctx, offset, size, buf); + + ntfs_close_system_decompression_ctx(dctx); + + if (res < 0) + return -errno; + return res; +} + +static const struct plugin_operations ops = { + .getattr = compressed_getattr, + .open = compressed_open, + .release = compressed_release, + .read = compressed_read, +}; + +const struct plugin_operations *init(le32 tag) +{ + if (tag == IO_REPARSE_TAG_WOF) + return &ops; + errno = EINVAL; + return NULL; +} diff --git a/src/system_compression.c b/src/system_compression.c new file mode 100644 index 0000000..287e6c3 --- /dev/null +++ b/src/system_compression.c @@ -0,0 +1,687 @@ +/* + * system_compression.c - Support for reading System Compressed files + * + * Copyright (C) 2015-2016 Eric Biggers + * + * This program is free software: you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free Software + * Foundation, either version 2 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +/* + * Windows 10 introduced a new filesystem compression feature: System + * Compression, also called "Compact OS". The feature allows rarely modified + * files to be compressed more heavily than is possible with regular NTFS + * compression (which uses the LZNT1 algorithm with 4096-byte chunks). + * System-compressed files can only be read, not written; on Windows, if a + * program attempts to write to such a file, it is automatically decompressed + * and turned into an ordinary uncompressed file. + * + * Rather than building it directly into NTFS, Microsoft implemented this new + * compression mode using the Windows Overlay Filesystem (WOF) filter driver + * that was added in Windows 8.1. A system-compressed file contains the + * following NTFS attributes: + * + * - A reparse point attribute in the format WOF_FILE_PROVIDER_REPARSE_POINT_V1, + * documented below + * - A sparse unnamed data attribute, containing all zero bytes, with data size + * equal to the uncompressed file size + * - A data attribute named "WofCompressedData" containing the compressed data + * of the file. + * + * The compressed data contains a series of chunks, each of which decompresses + * to a known size determined by the compression format specified in the reparse + * point. The last chunk can be an exception, since it decompresses to whatever + * size remains. Chunks that did not compress to less than their original size + * are stored uncompressed. The compressed chunks are concatenated in order and + * are prefixed by a table of 4-byte (for files < 4 GiB in size uncompressed) or + * 8-byte (for files >= 4 GiB in size uncompressed) little endian numbers which + * give the offset of each compressed chunk from the end of the table. Since + * every chunk can be decompressed independently and its location can be + * discovered from the chunk offset table, "random access" reads are possible + * with chunk granularity. Writes are not possible, in general, without + * rewriting the entire file. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include +#include + +#include +#include +#include + +#include "system_compression.h" + +/******************************************************************************/ + +/* Known values of the WOF protocol / reparse point format */ +typedef enum { + WOF_CURRENT_VERSION = const_cpu_to_le32(1), +} WOF_VERSION; + +/* Known WOF providers */ +typedef enum { + /* WIM backing provider ("WIMBoot") */ + WOF_PROVIDER_WIM = const_cpu_to_le32(1), + + /* System compressed file provider */ + WOF_PROVIDER_FILE = const_cpu_to_le32(2), +} WOF_PROVIDER; + +/* Known versions of the compressed file provider */ +typedef enum { + WOF_FILE_PROVIDER_CURRENT_VERSION = const_cpu_to_le32(1), +} WOF_FILE_PROVIDER_VERSION; + +/* Information needed to specify a WOF provider */ +typedef struct { + le32 version; + le32 provider; +} WOF_EXTERNAL_INFO; + +/* Metadata for the compressed file provider --- indicates how the file + * is compressed */ +typedef struct { + le32 version; + le32 compression_format; +} WOF_FILE_PROVIDER_EXTERNAL_INFO_V1; + +/* Format of the reparse point attribute of system compressed files */ +typedef struct { + /* The reparse point header. This indicates that the reparse point is + * supposed to be interpreted by the WOF filter driver. */ + REPARSE_POINT reparse; + + /* The WOF provider specification. This indicates the "provider" that + * the WOF filter driver is supposed to hand control to. */ + WOF_EXTERNAL_INFO wof; + + /* The metadata specific to the compressed file "provider" */ + WOF_FILE_PROVIDER_EXTERNAL_INFO_V1 file; + +} WOF_FILE_PROVIDER_REPARSE_POINT_V1; + +/* The available compression formats for system compressed files */ +typedef enum { + FORMAT_XPRESS4K = const_cpu_to_le32(0), + FORMAT_LZX = const_cpu_to_le32(1), + FORMAT_XPRESS8K = const_cpu_to_le32(2), + FORMAT_XPRESS16K = const_cpu_to_le32(3), +} WOF_FILE_PROVIDER_COMPRESSION_FORMAT; + +/* "WofCompressedData": the name of the named data stream which contains the + * compressed data of a system compressed file */ +static ntfschar compressed_stream_name[] = { + const_cpu_to_le16('W'), const_cpu_to_le16('o'), + const_cpu_to_le16('f'), const_cpu_to_le16('C'), + const_cpu_to_le16('o'), const_cpu_to_le16('m'), + const_cpu_to_le16('p'), const_cpu_to_le16('r'), + const_cpu_to_le16('e'), const_cpu_to_le16('s'), + const_cpu_to_le16('s'), const_cpu_to_le16('e'), + const_cpu_to_le16('d'), const_cpu_to_le16('D'), + const_cpu_to_le16('a'), const_cpu_to_le16('t'), + const_cpu_to_le16('a'), +}; + +/******************************************************************************/ + +/* The maximum number of chunk offsets that may be cached at any one time. This + * is purely an implementation detail, and this number can be changed. The + * minimum possible value is 2, and the maximum possible value is UINT32_MAX + * divided by the maximum chunk size. */ +#define NUM_CHUNK_OFFSETS 128 + +/* A special marker value not used by any chunk index */ +#define INVALID_CHUNK_INDEX UINT64_MAX + +/* The decompression context for a system compressed file */ +struct ntfs_system_decompression_ctx { + + /* The open compressed stream ("WofCompressedData") */ + ntfs_attr *na; + + /* The compression format of the file */ + WOF_FILE_PROVIDER_COMPRESSION_FORMAT format; + + /* The decompressor for the file */ + void *decompressor; + + /* The uncompressed size of the file in bytes */ + u64 uncompressed_size; + + /* The compressed size of the file in bytes */ + u64 compressed_size; + + /* The number of chunks into which the file is divided */ + u64 num_chunks; + + /* The base 2 logarithm of chunk_size */ + u32 chunk_order; + + /* The uncompressed chunk size in bytes. All chunks have this + * uncompressed size except possibly the last. */ + u32 chunk_size; + + /* + * The chunk offsets cache. If 'base_chunk_idx == INVALID_CHUNK_INDEX', + * then the cache is empty. Otherwise, 'base_chunk_idx' is the 0-based + * index of the chunk that has its offset cached in 'chunk_offsets[0]'. + * The offsets of the subsequent chunks follow until either the array is + * full or the offset of the file's last chunk has been cached. There + * is an extra entry at end-of-file which contains the end-of-file + * offset. All offsets are stored relative to 'base_chunk_offset'. + */ + u64 base_chunk_idx; + u64 base_chunk_offset; + u32 chunk_offsets[NUM_CHUNK_OFFSETS]; + + /* A temporary buffer used to hold the compressed chunk currently being + * decompressed or the chunk offset data currently being parsed. */ + void *temp_buffer; + + /* + * A cache for the most recently decompressed chunk. 'cached_chunk' is + * a buffer which, if 'cached_chunk_idx != INVALID_CHUNK_INDEX', + * contains the uncompressed data of the chunk with index + * 'cached_chunk_idx'. + * + * This cache is intended to prevent adjacent reads with lengths shorter + * than the chunk size from causing redundant chunk decompressions. + * It's not intended to be a general purpose data cache. + */ + void *cached_chunk; + u64 cached_chunk_idx; +}; + +static int allocate_decompressor(struct ntfs_system_decompression_ctx *ctx) +{ + if (ctx->format == FORMAT_LZX) + ctx->decompressor = lzx_allocate_decompressor(); + else + ctx->decompressor = xpress_allocate_decompressor(); + if (!ctx->decompressor) + return -1; + return 0; +} + +static void free_decompressor(struct ntfs_system_decompression_ctx *ctx) +{ + if (ctx->format == FORMAT_LZX) + lzx_free_decompressor(ctx->decompressor); + else + xpress_free_decompressor(ctx->decompressor); +} + +static int decompress(struct ntfs_system_decompression_ctx *ctx, + const void *compressed_data, size_t compressed_size, + void *uncompressed_data, size_t uncompressed_size) +{ + if (ctx->format == FORMAT_LZX) + return lzx_decompress(ctx->decompressor, + compressed_data, compressed_size, + uncompressed_data, uncompressed_size); + else + return xpress_decompress(ctx->decompressor, + compressed_data, compressed_size, + uncompressed_data, uncompressed_size); +} + +static int get_compression_format(ntfs_inode *ni, const REPARSE_POINT *reparse, + WOF_FILE_PROVIDER_COMPRESSION_FORMAT *format_ret) +{ + WOF_FILE_PROVIDER_REPARSE_POINT_V1 *rp; + s64 rpbuflen; + int ret; + + if (!ni) { + errno = EINVAL; + return -1; + } + + /* Is this a reparse point file? */ + if (!(ni->flags & FILE_ATTR_REPARSE_POINT)) { + errno = EOPNOTSUPP; + return -1; + } + + /* Read the reparse point if not done already. */ + if (reparse) { + rp = (WOF_FILE_PROVIDER_REPARSE_POINT_V1 *)reparse; + rpbuflen = sizeof(REPARSE_POINT) + + le16_to_cpu(reparse->reparse_data_length); + } else { + rp = ntfs_attr_readall(ni, AT_REPARSE_POINT, AT_UNNAMED, 0, + &rpbuflen); + if (!rp) + return -1; + } + + /* Does the reparse point indicate a system compressed file? */ + if (rpbuflen >= (s64)sizeof(WOF_FILE_PROVIDER_REPARSE_POINT_V1) && + rp->reparse.reparse_tag == IO_REPARSE_TAG_WOF && + rp->wof.version == WOF_CURRENT_VERSION && + rp->wof.provider == WOF_PROVIDER_FILE && + rp->file.version == WOF_FILE_PROVIDER_CURRENT_VERSION && + (rp->file.compression_format == FORMAT_XPRESS4K || + rp->file.compression_format == FORMAT_XPRESS8K || + rp->file.compression_format == FORMAT_XPRESS16K || + rp->file.compression_format == FORMAT_LZX)) + { + /* Yes, it's a system compressed file. Save the compression + * format identifier. */ + *format_ret = rp->file.compression_format; + ret = 0; + } else { + /* No, it's not a system compressed file. */ + errno = EOPNOTSUPP; + ret = -1; + } + + if ((const REPARSE_POINT *)rp != reparse) + free(rp); + return ret; +} + +static u32 get_chunk_order(WOF_FILE_PROVIDER_COMPRESSION_FORMAT format) +{ + switch (format) { + case FORMAT_XPRESS4K: + return 12; + case FORMAT_XPRESS8K: + return 13; + case FORMAT_XPRESS16K: + return 14; + case FORMAT_LZX: + return 15; + } + /* Not reached */ + return 0; +} + +/* + * ntfs_get_system_compressed_file_size - Return the compressed size of a system + * compressed file + * + * @ni: The NTFS inode for the file + * @reparse: (Optional) the contents of the file's reparse point attribute + * + * On success, return the compressed size in bytes. On failure, return -1 and + * set errno. If the file is not a system compressed file, return -1 and set + * errno to EOPNOTSUPP. + */ +s64 ntfs_get_system_compressed_file_size(ntfs_inode *ni, + const REPARSE_POINT *reparse) +{ + WOF_FILE_PROVIDER_COMPRESSION_FORMAT format; + ntfs_attr_search_ctx *actx; + s64 ret; + + /* Verify this is a system compressed file. */ + if (get_compression_format(ni, reparse, &format)) + return -1; + + /* Get the size of the WofCompressedData named data stream. */ + + actx = ntfs_attr_get_search_ctx(ni, NULL); + if (!actx) + return -1; + + ret = ntfs_attr_lookup(AT_DATA, compressed_stream_name, + sizeof(compressed_stream_name) / + sizeof(compressed_stream_name[0]), + CASE_SENSITIVE, 0, NULL, 0, actx); + if (!ret) + ret = ntfs_get_attribute_value_length(actx->attr); + + ntfs_attr_put_search_ctx(actx); + + return ret; +} + +/* + * ntfs_open_system_decompression_ctx - Open a system-compressed file + * + * @ni: The NTFS inode for the file + * @reparse: (Optional) the contents of the file's reparse point attribute + * + * On success, return a pointer to the decompression context. On failure, + * return NULL and set errno. If the file is not a system-compressed file, + * return NULL and set errno to EOPNOTSUPP. + */ +struct ntfs_system_decompression_ctx * +ntfs_open_system_decompression_ctx(ntfs_inode *ni, const REPARSE_POINT *reparse) +{ + WOF_FILE_PROVIDER_COMPRESSION_FORMAT format; + struct ntfs_system_decompression_ctx *ctx; + + /* Get the compression format. This also validates that the file really + * is a system-compressed file. */ + if (get_compression_format(ni, reparse, &format)) + goto err; + + /* Allocate the decompression context. */ + ctx = ntfs_malloc(sizeof(struct ntfs_system_decompression_ctx)); + if (!ctx) + goto err; + + /* Allocate the decompressor. */ + ctx->format = format; + if (allocate_decompressor(ctx)) + goto err_free_ctx; + + /* Open the WofCompressedData stream. */ + ctx->na = ntfs_attr_open(ni, AT_DATA, compressed_stream_name, + sizeof(compressed_stream_name) / + sizeof(compressed_stream_name[0])); + if (!ctx->na) + goto err_free_decompressor; + + /* The uncompressed size of a system-compressed file is the size of its + * unnamed data stream, which should be sparse so that it consumes no + * disk space (though we don't rely on it being sparse). */ + ctx->uncompressed_size = ni->data_size; + + /* Get the chunk size, which depends on the compression format. */ + ctx->chunk_order = get_chunk_order(ctx->format); + ctx->chunk_size = (u32)1 << ctx->chunk_order; + + /* Compute the number of chunks into which the file is divided. */ + ctx->num_chunks = (ctx->uncompressed_size + + ctx->chunk_size - 1) >> ctx->chunk_order; + + /* The compressed size of a system compressed file is the size of its + * WofCompressedData stream. */ + ctx->compressed_size = ctx->na->data_size; + + /* Initially, no chunk offsets are cached. */ + ctx->base_chunk_idx = INVALID_CHUNK_INDEX; + + /* Allocate buffers for chunk data. */ + ctx->temp_buffer = ntfs_malloc(max(ctx->chunk_size, + NUM_CHUNK_OFFSETS * sizeof(u64))); + ctx->cached_chunk = ntfs_malloc(ctx->chunk_size); + ctx->cached_chunk_idx = INVALID_CHUNK_INDEX; + if (!ctx->temp_buffer || !ctx->cached_chunk) + goto err_close_ctx; + + return ctx; + +err_close_ctx: + free(ctx->cached_chunk); + free(ctx->temp_buffer); + ntfs_attr_close(ctx->na); +err_free_decompressor: + free_decompressor(ctx); +err_free_ctx: + free(ctx); +err: + return NULL; +} + +/* Retrieve the stored offset and size of a chunk stored in the compressed file + * stream. */ +static int get_chunk_location(struct ntfs_system_decompression_ctx *ctx, + u64 chunk_idx, + u64 *offset_ret, u32 *stored_size_ret) +{ + size_t cache_idx; + + /* To get the stored size of the chunk, we need its offset and the next + * chunk's offset. Use the cached values if possible; otherwise load + * the needed offsets into the cache. To reduce the number of chunk + * table reads that may be required later, also load some extra. */ + if (chunk_idx < ctx->base_chunk_idx || + chunk_idx + 1 >= ctx->base_chunk_idx + NUM_CHUNK_OFFSETS) + { + const u64 start_chunk = chunk_idx; + const u64 end_chunk = + chunk_idx + min(NUM_CHUNK_OFFSETS - 1, + ctx->num_chunks - chunk_idx); + const int entry_shift = + (ctx->uncompressed_size <= UINT32_MAX) ? 2 : 3; + le32 * const offsets32 = ctx->temp_buffer; + le64 * const offsets64 = ctx->temp_buffer; + u64 first_entry_to_read; + size_t num_entries_to_read; + size_t i, j; + s64 res; + + num_entries_to_read = end_chunk - start_chunk; + + /* The first chunk has no explicit chunk table entry. */ + if (start_chunk == 0) { + num_entries_to_read--; + first_entry_to_read = 0; + } else { + first_entry_to_read = start_chunk - 1; + } + + if (end_chunk != ctx->num_chunks) + num_entries_to_read++; + + /* Read the chunk table entries into a temporary buffer. */ + res = ntfs_attr_pread(ctx->na, + first_entry_to_read << entry_shift, + num_entries_to_read << entry_shift, + ctx->temp_buffer); + + if ((u64)res != num_entries_to_read << entry_shift) { + if (res >= 0) + errno = EINVAL; + ctx->base_chunk_idx = INVALID_CHUNK_INDEX; + return -1; + } + + /* Prepare the cached chunk offsets. */ + + i = 0; + if (start_chunk == 0) { + /* Implicit first entry */ + ctx->chunk_offsets[i++] = 0; + ctx->base_chunk_offset = 0; + } else { + if (entry_shift == 3) { + ctx->base_chunk_offset = + le64_to_cpu(offsets64[0]); + } else { + ctx->base_chunk_offset = + le32_to_cpu(offsets32[0]); + } + } + + if (entry_shift == 3) { + /* 64-bit entries (huge file) */ + for (j = 0; j < num_entries_to_read; j++) { + ctx->chunk_offsets[i++] = + le64_to_cpu(offsets64[j]) - + ctx->base_chunk_offset; + } + } else { + /* 32-bit entries */ + for (j = 0; j < num_entries_to_read; j++) { + ctx->chunk_offsets[i++] = + le32_to_cpu(offsets32[j]) - + ctx->base_chunk_offset; + } + } + + /* Account for the chunk table itself. */ + ctx->base_chunk_offset += (ctx->num_chunks - 1) << entry_shift; + + if (end_chunk == ctx->num_chunks) { + /* Implicit last entry */ + ctx->chunk_offsets[i] = ctx->compressed_size - + ctx->base_chunk_offset; + } + + ctx->base_chunk_idx = start_chunk; + } + + cache_idx = chunk_idx - ctx->base_chunk_idx; + *offset_ret = ctx->base_chunk_offset + ctx->chunk_offsets[cache_idx]; + *stored_size_ret = ctx->chunk_offsets[cache_idx + 1] - + ctx->chunk_offsets[cache_idx]; + return 0; +} + +/* Retrieve into @buffer the uncompressed data of chunk @chunk_idx. */ +static int read_and_decompress_chunk(struct ntfs_system_decompression_ctx *ctx, + u64 chunk_idx, void *buffer) +{ + u64 offset; + u32 stored_size; + u32 uncompressed_size; + void *read_buffer; + s64 res; + + /* Get the location of the chunk data as stored in the file. */ + if (get_chunk_location(ctx, chunk_idx, &offset, &stored_size)) + return -1; + + /* All chunks decompress to 'chunk_size' bytes except possibly the last, + * which decompresses to whatever remains. */ + if (chunk_idx == ctx->num_chunks - 1) + uncompressed_size = ((ctx->uncompressed_size - 1) & + (ctx->chunk_size - 1)) + 1; + else + uncompressed_size = ctx->chunk_size; + + /* Forbid strange compressed sizes. */ + if (stored_size <= 0 || stored_size > uncompressed_size) { + errno = EINVAL; + return -1; + } + + /* Chunks that didn't compress to less than their original size are + * stored uncompressed. */ + if (stored_size == uncompressed_size) { + /* Chunk is stored uncompressed */ + read_buffer = buffer; + } else { + /* Chunk is stored compressed */ + read_buffer = ctx->temp_buffer; + } + + /* Read the stored chunk data. */ + res = ntfs_attr_pread(ctx->na, offset, stored_size, read_buffer); + if (res != stored_size) { + if (res >= 0) + errno = EINVAL; + return -1; + } + + /* If the chunk was stored uncompressed, then we're done. */ + if (read_buffer == buffer) + return 0; + + /* The chunk was stored compressed. Decompress its data. */ + return decompress(ctx, read_buffer, stored_size, + buffer, uncompressed_size); +} + +/* Retrieve a pointer to the uncompressed data of the specified chunk. On + * failure, return NULL and set errno. */ +static const void *get_chunk_data(struct ntfs_system_decompression_ctx *ctx, + u64 chunk_idx) +{ + if (chunk_idx != ctx->cached_chunk_idx) { + ctx->cached_chunk_idx = INVALID_CHUNK_INDEX; + if (read_and_decompress_chunk(ctx, chunk_idx, ctx->cached_chunk)) + return NULL; + ctx->cached_chunk_idx = chunk_idx; + } + return ctx->cached_chunk; +} + +/* + * ntfs_read_system_compressed_data - Read data from a system-compressed file + * + * @ctx: The decompression context for the file + * @pos: The byte offset into the uncompressed data to read from + * @count: The number of bytes of uncompressed data to read + * @buf: The buffer into which to read the data + * + * On full or partial success, return the number of bytes read (0 indicates + * end-of-file). On complete failure, return -1 and set errno. + */ +ssize_t ntfs_read_system_compressed_data(struct ntfs_system_decompression_ctx *ctx, + s64 pos, size_t count, void *buf) +{ + u64 offset; + u8 *p; + u8 *end_p; + u64 chunk_idx; + u32 offset_in_chunk; + u32 chunk_size; + + if (!ctx || pos < 0) { + errno = EINVAL; + return -1; + } + + offset = (u64)pos; + if (offset >= ctx->uncompressed_size) + return 0; + + count = min(count, ctx->uncompressed_size - offset); + if (!count) + return 0; + + p = buf; + end_p = p + count; + chunk_idx = offset >> ctx->chunk_order; + offset_in_chunk = offset & (ctx->chunk_size - 1); + chunk_size = ctx->chunk_size; + do { + u32 len_to_copy; + const u8 *chunk; + + if (chunk_idx == ctx->num_chunks - 1) + chunk_size = ((ctx->uncompressed_size - 1) & + (ctx->chunk_size - 1)) + 1; + + len_to_copy = min((size_t)(end_p - p), + chunk_size - offset_in_chunk); + + chunk = get_chunk_data(ctx, chunk_idx); + if (!chunk) + break; + + memcpy(p, &chunk[offset_in_chunk], len_to_copy); + + p += len_to_copy; + chunk_idx++; + offset_in_chunk = 0; + } while (p != end_p); + + return (p == buf) ? -1 : p - (u8 *)buf; +} + +/* + * ntfs_close_system_decompression_ctx - Close a system-compressed file + */ +void ntfs_close_system_decompression_ctx(struct ntfs_system_decompression_ctx *ctx) +{ + if (ctx) { + free(ctx->cached_chunk); + free(ctx->temp_buffer); + ntfs_attr_close(ctx->na); + free_decompressor(ctx); + free(ctx); + } +} diff --git a/src/system_compression.h b/src/system_compression.h new file mode 100644 index 0000000..9abc512 --- /dev/null +++ b/src/system_compression.h @@ -0,0 +1,71 @@ +/* + * system_compression.h - declarations for accessing System Compressed files + * + * Copyright (C) 2015 Eric Biggers + * + * This program is free software: you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free Software + * Foundation, either version 2 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#ifndef _NTFS_SYSTEM_COMPRESSION_H +#define _NTFS_SYSTEM_COMPRESSION_H + +#include +#include + +#include +#include + +/* System compressed file access */ + +struct system_decompression_ctx; + +extern s64 ntfs_get_system_compressed_file_size(ntfs_inode *ni, + const REPARSE_POINT *reparse); + +extern struct ntfs_system_decompression_ctx * +ntfs_open_system_decompression_ctx(ntfs_inode *ni, + const REPARSE_POINT *reparse); + +extern ssize_t +ntfs_read_system_compressed_data(struct ntfs_system_decompression_ctx *ctx, + s64 pos, size_t count, void *buf); + +extern void +ntfs_close_system_decompression_ctx(struct ntfs_system_decompression_ctx *ctx); + +/* XPRESS decompression */ + +struct xpress_decompressor; + +extern struct xpress_decompressor *xpress_allocate_decompressor(void); + +extern int xpress_decompress(struct xpress_decompressor *decompressor, + const void *compressed_data, size_t compressed_size, + void *uncompressed_data, size_t uncompressed_size); + +extern void xpress_free_decompressor(struct xpress_decompressor *decompressor); + +/* LZX decompression */ + +struct lzx_decompressor; + +extern struct lzx_decompressor *lzx_allocate_decompressor(void); + +extern int lzx_decompress(struct lzx_decompressor *decompressor, + const void *compressed_data, size_t compressed_size, + void *uncompressed_data, size_t uncompressed_size); + +extern void lzx_free_decompressor(struct lzx_decompressor *decompressor); + +#endif /* _NTFS_SYSTEM_COMPRESSION_H */ diff --git a/src/xpress_decompress.c b/src/xpress_decompress.c new file mode 100644 index 0000000..61aa42d --- /dev/null +++ b/src/xpress_decompress.c @@ -0,0 +1,164 @@ +/* + * xpress_decompress.c - A decompressor for the XPRESS compression format + * (Huffman variant), which can be used in "System Compressed" files. This is + * based on the code from wimlib. + * + * Copyright (C) 2015 Eric Biggers + * + * This program is free software: you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free Software + * Foundation, either version 2 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include + +#include + +#include "decompress_common.h" +#include "system_compression.h" + +#define XPRESS_NUM_SYMBOLS 512 +#define XPRESS_MAX_CODEWORD_LEN 15 +#define XPRESS_MIN_MATCH_LEN 3 + +/* This value is chosen for fast decompression. */ +#define XPRESS_TABLEBITS 12 + +/* Reusable heap-allocated memory for XPRESS decompression */ +struct xpress_decompressor { + + /* The Huffman decoding table */ + u16 decode_table[(1 << XPRESS_TABLEBITS) + 2 * XPRESS_NUM_SYMBOLS]; + + /* An array that maps symbols to codeword lengths */ + u8 lens[XPRESS_NUM_SYMBOLS]; + + /* Temporary space for make_huffman_decode_table() */ + u16 working_space[2 * (1 + XPRESS_MAX_CODEWORD_LEN) + + XPRESS_NUM_SYMBOLS]; +}; + +/* + * xpress_allocate_decompressor - Allocate an XPRESS decompressor + * + * Return the pointer to the decompressor on success, or return NULL and set + * errno on failure. + */ +struct xpress_decompressor *xpress_allocate_decompressor(void) +{ + return ntfs_malloc(sizeof(struct xpress_decompressor)); +} + +/* + * xpress_decompress - Decompress a buffer of XPRESS-compressed data + * + * @decompressor: A decompressor that was allocated with + * xpress_allocate_decompressor() + * @compressed_data: The buffer of data to decompress + * @compressed_size: Number of bytes of compressed data + * @uncompressed_data: The buffer in which to store the decompressed data + * @uncompressed_size: The number of bytes the data decompresses into + * + * Return 0 on success, or return -1 and set errno on failure. + */ +int xpress_decompress(struct xpress_decompressor *decompressor, + const void *compressed_data, size_t compressed_size, + void *uncompressed_data, size_t uncompressed_size) +{ + struct xpress_decompressor *d = decompressor; + const u8 * const in_begin = compressed_data; + u8 * const out_begin = uncompressed_data; + u8 *out_next = out_begin; + u8 * const out_end = out_begin + uncompressed_size; + struct input_bitstream is; + unsigned i; + + /* Read the Huffman codeword lengths. */ + if (compressed_size < XPRESS_NUM_SYMBOLS / 2) + goto invalid; + for (i = 0; i < XPRESS_NUM_SYMBOLS / 2; i++) { + d->lens[i*2 + 0] = in_begin[i] & 0xF; + d->lens[i*2 + 1] = in_begin[i] >> 4; + } + + /* Build a decoding table for the Huffman code. */ + if (make_huffman_decode_table(d->decode_table, XPRESS_NUM_SYMBOLS, + XPRESS_TABLEBITS, d->lens, + XPRESS_MAX_CODEWORD_LEN, + d->working_space)) + goto invalid; + + /* Decode the matches and literals. */ + + init_input_bitstream(&is, in_begin + XPRESS_NUM_SYMBOLS / 2, + compressed_size - XPRESS_NUM_SYMBOLS / 2); + + while (out_next != out_end) { + unsigned sym; + unsigned log2_offset; + u32 length; + u32 offset; + + sym = read_huffsym(&is, d->decode_table, + XPRESS_TABLEBITS, XPRESS_MAX_CODEWORD_LEN); + if (sym < 256) { + /* Literal */ + *out_next++ = sym; + } else { + /* Match */ + length = sym & 0xf; + log2_offset = (sym >> 4) & 0xf; + + bitstream_ensure_bits(&is, 16); + + offset = ((u32)1 << log2_offset) | + bitstream_pop_bits(&is, log2_offset); + + if (length == 0xf) { + length += bitstream_read_byte(&is); + if (length == 0xf + 0xff) + length = bitstream_read_u16(&is); + } + length += XPRESS_MIN_MATCH_LEN; + + if (offset > (size_t)(out_next - out_begin)) + goto invalid; + + if (length > (size_t)(out_end - out_next)) + goto invalid; + + out_next = lz_copy(out_next, length, offset, out_end, + XPRESS_MIN_MATCH_LEN); + } + } + return 0; + +invalid: + errno = EINVAL; + return -1; +} + +/* + * xpress_free_decompressor - Free an XPRESS decompressor + * + * @decompressor: A decompressor that was allocated with + * xpress_allocate_decompressor(), or NULL. + */ +void xpress_free_decompressor(struct xpress_decompressor *decompressor) +{ + free(decompressor); +}