From e13713fd276446607954e9e6ff2411b6894fd739 Mon Sep 17 00:00:00 2001 From: "cantab.net!aia21" Date: Tue, 13 Jan 2004 11:05:18 +0000 Subject: [PATCH] Finish attribute decompression code. (Completely untested!) (Logical change 1.261) --- TODO.libntfs | 2 - include/compress.h | 2 +- libntfs/compress.c | 458 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 459 insertions(+), 3 deletions(-) diff --git a/TODO.libntfs b/TODO.libntfs index 2f775696..9e67e62c 100644 --- a/TODO.libntfs +++ b/TODO.libntfs @@ -18,8 +18,6 @@ - write ntfs_mft_record_allocate() (see ntfs 2.4 driver in CVS) -- add read of compressed attributes - ******************* * MEDIUM priority * ******************* diff --git a/include/compress.h b/include/compress.h index c21a3cf5..93df37af 100644 --- a/include/compress.h +++ b/include/compress.h @@ -26,7 +26,7 @@ #include "types.h" #include "attrib.h" -extern s64 ntfs_compressed_attr_pread(ntfs_attr *na, const s64 pos, s64 count, +extern s64 ntfs_compressed_attr_pread(ntfs_attr *na, s64 pos, s64 count, void *b); #endif /* defined _NTFS_COMPRESS_H */ diff --git a/libntfs/compress.c b/libntfs/compress.c index e69de29b..312351d6 100644 --- a/libntfs/compress.c +++ b/libntfs/compress.c @@ -0,0 +1,458 @@ +/* + * compress.c - Compressed attribute handling code. Part of the Linux-NTFS + * project. + * + * Copyright (c) 2004 Anton Altaparmakov + * + * This program/include file is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as published + * by the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program/include file is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program (in the main directory of the Linux-NTFS + * distribution in the file COPYING); if not, write to the Free Software + * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include + +#include "attrib.h" +#include "debug.h" +#include "volume.h" +#include "types.h" +#include "layout.h" +#include "runlist.h" + +/** + * ntfs_compression_constants - enum of constants used in the compression code + */ +typedef enum { + /* Token types and access mask. */ + NTFS_SYMBOL_TOKEN = 0, + NTFS_PHRASE_TOKEN = 1, + NTFS_TOKEN_MASK = 1, + + /* Compression sub-block constants. */ + NTFS_SB_SIZE_MASK = 0x0fff, + NTFS_SB_SIZE = 0x1000, + NTFS_SB_IS_COMPRESSED = 0x8000, +} ntfs_compression_constants; + +/** + * ntfs_decompress - decompress a compression block into an array of pages + * @dest: buffer to which to write the decompressed data + * @dest_size: size of buffer @dest in bytes + * @cb_start: compression block to decompress + * @cb_size: size of compression block @cb_start in bytes + * + * This decompresses the compression block @cb_start into the destination + * buffer @dest. + * + * @cb_start is a pointer to the compression block which needs decompressing + * and @cb_size is the size of @cb_start in bytes (8-64kiB). + * + * Return 0 if success or -EOVERFLOW on error in the compressed stream. + */ +static int ntfs_decompress(u8 *dest, const u32 dest_size, + u8 *const cb_start, const u32 cb_size) +{ + /* + * Pointers into the compressed data, i.e. the compression block (cb), + * and the therein contained sub-blocks (sb). + */ + u8 *cb_end = cb_start + cb_size; /* End of cb. */ + u8 *cb = cb_start; /* Current position in cb. */ + u8 *cb_sb_start = cb; /* Beginning of the current sb in the cb. */ + u8 *cb_sb_end; /* End of current sb / beginning of next sb. */ + /* Variables for uncompressed data / destination. */ + u8 *dest_end = dest + dest_size; /* End of dest buffer. */ + u8 *dest_sb_start; /* Start of current sub-block in dest. */ + u8 *dest_sb_end; /* End of current sb in dest. */ + /* Variables for tag and token parsing. */ + u8 tag; /* Current tag. */ + int token; /* Loop counter for the eight tokens in tag. */ + + Dprintf("Entering, cb_size = 0x%x.\n", cb_size); +do_next_sb: + Dprintf("Beginning sub-block at offset = 0x%x in the cb.\n", + cb - cb_start); + /* Have we reached the end of the compression block? */ + if (cb == cb_end || !le16_to_cpup((u16*)cb)) { + Dprintf("Completed. Returning success (0).\n"); + return 0; + } + /* Setup offset for the current sub-block destination. */ + dest_sb_start = dest; + dest_sb_end = dest + NTFS_SB_SIZE; + /* Check that we are still within allowed boundaries. */ + if (dest_sb_end > dest_end) + goto return_overflow; + /* Does the minimum size of a compressed sb overflow valid range? */ + if (cb + 6 > cb_end) + goto return_overflow; + /* Setup the current sub-block source pointers and validate range. */ + cb_sb_start = cb; + cb_sb_end = cb_sb_start + (le16_to_cpup((u16*)cb) & NTFS_SB_SIZE_MASK) + + 3; + if (cb_sb_end > cb_end) + goto return_overflow; + /* Now, we are ready to process the current sub-block (sb). */ + if (!(le16_to_cpup((u16*)cb) & NTFS_SB_IS_COMPRESSED)) { + Dprintf("Found uncompressed sub-block.\n"); + /* This sb is not compressed, just copy it into destination. */ + /* Advance source position to first data byte. */ + cb += 2; + /* An uncompressed sb must be full size. */ + if (cb_sb_end - cb != NTFS_SB_SIZE) + goto return_overflow; + /* Copy the block and advance the source position. */ + memcpy(dest, cb, NTFS_SB_SIZE); + cb += NTFS_SB_SIZE; + /* Advance destination position to next sub-block. */ + dest += NTFS_SB_SIZE; + goto do_next_sb; + } + Dprintf("Found compressed sub-block.\n"); + /* This sb is compressed, decompress it into destination. */ + /* Forward to the first tag in the sub-block. */ + cb += 2; +do_next_tag: + if (cb == cb_sb_end) { + /* Check if the decompressed sub-block was not full-length. */ + if (dest < dest_sb_end) { + int nr_bytes = dest_sb_end - dest; + + Dprintf("Filling incomplete sub-block with zeroes.\n"); + /* Zero remainder and update destination position. */ + memset(dest, 0, nr_bytes); + dest += nr_bytes; + } + /* We have finished the current sub-block. */ + goto do_next_sb; + } + /* Check we are still in range. */ + if (cb > cb_sb_end || dest > dest_sb_end) + goto return_overflow; + /* Get the next tag and advance to first token. */ + tag = *cb++; + /* Parse the eight tokens described by the tag. */ + for (token = 0; token < 8; token++, tag >>= 1) { + u16 lg, pt, length, max_non_overlap; + register u16 i; + u8 *dest_back_addr; + + /* Check if we are done / still in range. */ + if (cb >= cb_sb_end || dest > dest_sb_end) + break; + /* Determine token type and parse appropriately.*/ + if ((tag & NTFS_TOKEN_MASK) == NTFS_SYMBOL_TOKEN) { + /* + * We have a symbol token, copy the symbol across, and + * advance the source and destination positions. + */ + *dest++ = *cb++; + /* Continue with the next token. */ + continue; + } + /* + * We have a phrase token. Make sure it is not the first tag in + * the sb as this is illegal and would confuse the code below. + */ + if (dest == dest_sb_start) + goto return_overflow; + /* + * Determine the number of bytes to go back (p) and the number + * of bytes to copy (l). We use an optimized algorithm in which + * we first calculate log2(current destination position in sb), + * which allows determination of l and p in O(1) rather than + * O(n). We just need an arch-optimized log2() function now. + */ + lg = 0; + for (i = dest - dest_sb_start - 1; i >= 0x10; i >>= 1) + lg++; + /* Get the phrase token into i. */ + pt = le16_to_cpup((u16*)cb); + /* + * Calculate starting position of the byte sequence in + * the destination using the fact that p = (pt >> (12 - lg)) + 1 + * and make sure we don't go too far back. + */ + dest_back_addr = dest - (pt >> (12 - lg)) - 1; + if (dest_back_addr < dest_sb_start) + goto return_overflow; + /* Now calculate the length of the byte sequence. */ + length = (pt & (0xfff >> lg)) + 3; + /* Advance destination position and verify it is in range. */ + dest += length; + if (dest > dest_sb_end) + goto return_overflow; + /* The number of non-overlapping bytes. */ + max_non_overlap = dest - dest_back_addr; + if (length <= max_non_overlap) { + /* The byte sequence doesn't overlap, just copy it. */ + memcpy(dest, dest_back_addr, length); + /* Advance destination pointer. */ + dest += length; + } else { + /* + * The byte sequence does overlap, copy non-overlapping + * part and then do a slow byte by byte copy for the + * overlapping part. Also, advance the destination + * pointer. + */ + memcpy(dest, dest_back_addr, max_non_overlap); + dest += max_non_overlap; + dest_back_addr += max_non_overlap; + length -= max_non_overlap; + while (length--) + *dest++ = *dest_back_addr++; + } + /* Advance source position and continue with the next token. */ + cb += 2; + } + /* No tokens left in the current tag. Continue with the next tag. */ + goto do_next_tag; +return_overflow: + Dprintf("Failed. Returning -EOVERFLOW.\n"); + errno = EOVERFLOW; + return -1; +} + +/** + * ntfs_compressed_attr_pread - read from a compressed attribute + * @na: ntfs attribute to read from + * @pos: byte position in the attribute to begin reading from + * @count: number of bytes to read + * @b: output data buffer + * + * NOTE: You probably want to be using attrib.c::ntfs_attr_pread() instead. + * + * This function will read @count bytes starting at offset @pos from the + * compressed ntfs attribute @na into the data buffer @b. + * + * On success, return the number of successfully read bytes. If this number + * is lower than @count this means that the read reached end of file or that + * an error was encountered during the read so that the read is partial. + * 0 means end of file or nothing was read (also return 0 when @count is 0). + * + * On error and nothing has been read, return -1 with errno set appropriately + * to the return code of ntfs_pread(), or to EINVAL in case of invalid + * arguments. + */ +s64 ntfs_compressed_attr_pread(ntfs_attr *na, s64 pos, s64 count, void *b) +{ + s64 br, to_read, ofs, total, total2; + u64 cb_size_mask; + VCN start_vcn, vcn, end_vcn; + ntfs_volume *vol; + runlist_element *rl; + u8 *dest, *cb, *cb_pos, *cb_end; + u32 cb_size; + int err; + unsigned int nr_cbs, cb_clusters; + + Dprintf("%s(): Entering for inode 0x%Lx, attr 0x%x, pos 0x%Lx, " + "count 0x%Lx.\n", __FUNCTION__, + (unsigned long long)na->ni->mft_no, na->type, + (long long)pos, (long long)count); + if (!na || !NAttrCompressed(na) || !na->ni || !na->ni->vol || !b || + pos < 0 || count < 0) { + errno = EINVAL; + return -1; + } + /* + * Encrypted attributes are not supported. We return access denied, + * which is what Windows NT4 does, too. + */ + if (NAttrEncrypted(na)) { + errno = EACCES; + return -1; + } + if (!count) + return 0; + /* Truncate reads beyond end of attribute. */ + if (pos + count > na->data_size) { + if (pos >= na->data_size) + return 0; + count = na->data_size - pos; + } + /* If it is a resident attribute, simply use ntfs_attr_pread(). */ + if (!NAttrNonResident(na)) + return ntfs_attr_pread(na, pos, count, b); + total = total2 = 0; + /* Zero out reads beyond initialized size. */ + if (pos + count > na->initialized_size) { + if (pos >= na->initialized_size) { + memset(b, 0, count); + return count; + } + total2 = pos + count - na->initialized_size; + count -= total2; + memset((u8*)b + count, 0, total2); + } + vol = na->ni->vol; + cb_size = na->compression_block_size; + cb_size_mask = cb_size - 1UL; + cb_clusters = na->compression_block_clusters; + /* Need a temporary buffer for each loaded compression block. */ + cb = malloc(cb_size); + if (!cb) + return -1; + /* Need a temporary buffer for each uncompressed block. */ + dest = malloc(cb_size); + if (!dest) { + err = errno; + free(cb); + errno = err; + return -1; + } + /* + * The first vcn in the first compression block (cb) which we need to + * decompress. + */ + start_vcn = (pos & ~cb_size_mask) >> vol->cluster_size_bits; + /* Offset in the uncompressed cb at which to start reading data. */ + ofs = pos & cb_size_mask; + /* + * The first vcn in the cb after the last cb which we need to + * decompress. + */ + end_vcn = ((pos + count + cb_size - 1) & ~cb_size_mask) >> + vol->cluster_size_bits; + /* Number of compression blocks (cbs) in the wanted vcn range. */ + nr_cbs = (end_vcn - start_vcn) << vol->cluster_size_bits >> + na->compression_block_size_bits; + cb_end = cb + cb_size; +do_next_cb: + nr_cbs--; + cb_pos = cb; + vcn = start_vcn; + start_vcn += cb_clusters; + + /* Check whether the compression block is sparse. */ + rl = ntfs_attr_find_vcn(na, vcn); + if (!rl || rl->lcn < LCN_HOLE) { + free(cb); + free(dest); + if (total) + return total; + /* FIXME: Do we want EIO or the error code? (AIA) */ + errno = EIO; + return -1; + } + if (rl->lcn == LCN_HOLE) { + /* Sparse cb, zero out destination range overlapping the cb. */ + Dprintf("Found sparse compression block.\n"); + to_read = min(count, cb_size - ofs); + memset(b, 0, to_read); + ofs = 0; + total += to_read; + count -= to_read; + (u8*)b += to_read; + } else if (rl->length - (vcn - rl->vcn) >= cb_clusters) { + /* + * Uncompressed cb, read it straight into the destination range + * overlapping the cb. + */ + Dprintf("Found uncompressed compression block.\n"); + /* + * Read the uncompressed data into the destination buffer. + * NOTE: We cheat a little bit here by marking the attribute as + * not compressed in the ntfs_attr structure so that we can + * read the data by simply using ntfs_attr_pread(). (-8 + */ + to_read = min(count, cb_size - ofs); + ofs += vcn << vol->cluster_size_bits; + NAttrClearCompressed(na); + do { + br = ntfs_attr_pread(na, ofs, to_read, b); + if (br < 0) { + err = errno; + NAttrSetCompressed(na); + free(cb); + free(dest); + if (total) + return total; + errno = err; + return br; + } + total += br; + count -= br; + (u8*)b += br; + to_read -= br; + ofs += br; + } while (to_read > 0); + NAttrSetCompressed(na); + ofs = 0; + } else { + /* + * Compressed cb, decompress it into the temporary buffer, then + * copy the data to the destination range overlapping the cb. + */ + Dprintf("Found compressed compression block.\n"); + /* + * Read the compressed data into the temporary buffer. + * NOTE: We cheat a little bit here by marking the attribute as + * not compressed in the ntfs_attr structure so that we can + * read the raw, compressed data by simply using + * ntfs_attr_pread(). (-8 + */ + to_read = cb_size; + NAttrClearCompressed(na); + do { + br = ntfs_attr_pread(na, + (vcn << vol->cluster_size_bits) + + (cb_pos - cb), to_read, cb_pos); + if (br < 0) { + err = errno; + NAttrSetCompressed(na); + free(cb); + free(dest); + if (total) + return total; + errno = err; + return br; + } + cb_pos += br; + to_read -= br; + } while (to_read > 0); + NAttrSetCompressed(na); + /* Just a precaution. */ + if (cb_pos + 2 <= cb_end) + *(u16*)cb_pos = 0; + Dprintf("Successfully read the compression block.\n"); + if (ntfs_decompress(dest, cb_size, cb, cb_size) < 0) { + err = errno; + free(cb); + free(dest); + if (total) + return total; + errno = err; + return -1; + } + to_read = min(count, cb_size - ofs); + memcpy(b, dest + ofs, to_read); + total += to_read; + count -= to_read; + (u8*)b += to_read; + ofs = 0; + } + /* Do we have more work to do? */ + if (nr_cbs) + goto do_next_cb; + /* We no longer need the buffers. */ + free(cb); + free(dest); + /* Return number of bytes read. */ + return total + total2; +} +