ntfs-3g/libntfs/mft.c

534 lines
18 KiB
C

/*
* mft.c - Mft record handling code. Part of the Linux-NTFS project.
*
* Copyright (c) 2000-2004 Anton Altaparmakov
*
* This program/include file is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as published
* by the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program/include file is distributed in the hope that it will be
* useful, but WITHOUT ANY WARRANTY; without even the implied warranty
* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program (in the main directory of the Linux-NTFS
* distribution in the file COPYING); if not, write to the Free Software
* Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include "config.h"
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
#include <string.h>
#include "compat.h"
#include "types.h"
#include "device.h"
#include "debug.h"
#include "bitmap.h"
#include "attrib.h"
#include "inode.h"
#include "volume.h"
#include "layout.h"
#include "mft.h"
/**
* ntfs_mft_records_read - read records from the mft from disk
* @vol: volume to read from
* @mref: starting mft record number to read
* @count: number of mft records to read
* @b: output data buffer
*
* Read @count mft records starting at @mref from volume @vol into buffer
* @b. Return 0 on success or -1 on error, with errno set to the error
* code.
*
* If any of the records exceed the initialized size of the $MFT/$DATA
* attribute, i.e. they cannot possibly be allocated mft records, assume this
* is a bug and return error code ESPIPE.
*
* The read mft records are mst deprotected and are hence ready to use. The
* caller should check each record with is_baad_record() in case mst
* deprotection failed.
*
* NOTE: @b has to be at least of size @count * vol->mft_record_size.
*/
int ntfs_mft_records_read(const ntfs_volume *vol, const MFT_REF mref,
const s64 count, MFT_RECORD *b)
{
s64 br;
VCN m;
Dprintf("%s(): Entering for inode 0x%llx.\n", __FUNCTION__, MREF(mref));
if (!vol || !vol->mft_na || !b || count < 0) {
errno = EINVAL;
return -1;
}
m = MREF(mref);
/* Refuse to read non-allocated mft records. */
if (m + count > vol->mft_na->initialized_size >>
vol->mft_record_size_bits) {
errno = ESPIPE;
return -1;
}
br = ntfs_attr_mst_pread(vol->mft_na, m << vol->mft_record_size_bits,
count, vol->mft_record_size, b);
if (br != count) {
if (br != -1)
errno = EIO;
if (br >= 0)
Dputs("Error: partition is smaller than it should be!");
else
Dperror("Error reading $Mft record(s)");
return -1;
}
return 0;
}
/**
* ntfs_mft_records_write - write mft records to disk
* @vol: volume to write to
* @mref: starting mft record number to write
* @count: number of mft records to write
* @b: data buffer containing the mft records to write
*
* Write @count mft records starting at @mref from data buffer @b to volume
* @vol. Return 0 on success or -1 on error, with errno set to the error code.
*
* If any of the records exceed the initialized size of the $MFT/$DATA
* attribute, i.e. they cannot possibly be allocated mft records, assume this
* is a bug and return error code ESPIPE.
*
* Before the mft records are written, they are mst protected. After the write,
* they are deprotected again, thus resulting in an increase in the update
* sequence number inside the data buffer @b.
*
* If any mft records are written which are also represented in the mft mirror
* $MFTMirr, we make a copy of the relevant parts of the data buffer @b into a
* temporary buffer before we do the actual write. Then if at least one mft
* record was successfully written, we write the appropriate mft records from
* the copied buffer to the mft mirror, too.
*/
int ntfs_mft_records_write(const ntfs_volume *vol, const MFT_REF mref,
const s64 count, MFT_RECORD *b)
{
s64 bw;
VCN m;
void *bmirr = NULL;
int cnt = 0, res = 0;
Dprintf("%s(): Entering for inode 0x%llx.\n", __FUNCTION__, MREF(mref));
if (!vol || !vol->mft_na || vol->mftmirr_size <= 0 || !b || count < 0) {
errno = EINVAL;
return -1;
}
m = MREF(mref);
/* Refuse to write non-allocated mft records. */
if (m + count > vol->mft_na->initialized_size >>
vol->mft_record_size_bits) {
errno = ESPIPE;
return -1;
}
if (m < vol->mftmirr_size) {
if (!vol->mftmirr_na) {
errno = EINVAL;
return -1;
}
cnt = vol->mftmirr_size - m;
if (cnt > count)
cnt = count;
bmirr = malloc(cnt * vol->mft_record_size);
if (!bmirr)
return -1;
memcpy(bmirr, b, cnt * vol->mft_record_size);
}
bw = ntfs_attr_mst_pwrite(vol->mft_na, m << vol->mft_record_size_bits,
count, vol->mft_record_size, b);
if (bw != count) {
if (bw != -1)
errno = EIO;
if (bw >= 0)
Dputs("Error: partial write while writing $Mft "
"record(s)!\n");
else
Dperror("Error writing $Mft record(s)");
res = errno;
}
if (bmirr && bw > 0) {
if (bw < cnt)
cnt = bw;
bw = ntfs_attr_mst_pwrite(vol->mftmirr_na,
m << vol->mft_record_size_bits, cnt,
vol->mft_record_size, bmirr);
if (bw != cnt) {
if (bw != -1)
errno = EIO;
Dputs("Error: failed to sync $MFTMirr! Run chkdsk.");
res = errno;
}
}
if (bmirr)
free(bmirr);
if (!res)
return res;
errno = res;
return -1;
}
/**
* ntfs_file_record_read - read a FILE record from the mft from disk
* @vol: volume to read from
* @mref: mft reference specifying mft record to read
* @mrec: address of pointer in which to return the mft record
* @attr: address of pointer in which to return the first attribute
*
* Read a FILE record from the mft of @vol from the storage medium. @mref
* specifies the mft record to read, including the sequence number, which can
* be 0 if no sequence number checking is to be performed.
*
* The function allocates a buffer large enough to hold the mft record and
* reads the record into the buffer (mst deprotecting it in the process).
* *@mrec is then set to point to the buffer.
*
* If @attr is not NULL, *@attr is set to point to the first attribute in the
* mft record, i.e. *@attr is a pointer into *@mrec.
*
* Return 0 on success, or -1 on error, with errno set to the error code.
*
* The read mft record is checked for having the magic FILE,
* and for having a matching sequence number (if MSEQNO(*@mref) != 0).
* If either of these fails, -1 is returned and errno is set to EIO. If you get
* this, but you still want to read the mft record (e.g. in order to correct
* it), use ntfs_mft_record_read() directly.
*
* Note: Caller has to free *@mrec when finished.
*
* Note: We do not check if the mft record is flagged in use. The caller can
* check if desired.
*/
int ntfs_file_record_read(const ntfs_volume *vol, const MFT_REF mref,
MFT_RECORD **mrec, ATTR_RECORD **attr)
{
MFT_RECORD *m;
ATTR_RECORD *a;
int err;
if (!vol || !mrec) {
errno = EINVAL;
return -1;
}
m = *mrec;
if (!m) {
m = (MFT_RECORD*)malloc(vol->mft_record_size);
if (!m)
return -1;
}
if (ntfs_mft_record_read(vol, mref, m)) {
err = errno;
goto read_failed;
}
if (!ntfs_is_file_record(m->magic))
goto file_corrupt;
if (MSEQNO(mref) && MSEQNO(mref) != le16_to_cpu(m->sequence_number))
goto file_corrupt;
a = (ATTR_RECORD*)((char*)m + le16_to_cpu(m->attrs_offset));
if (p2n(a) < p2n(m) || (char*)a > (char*)m + vol->mft_record_size)
goto file_corrupt;
*mrec = m;
if (attr)
*attr = a;
return 0;
file_corrupt:
Dputs("ntfs_file_record_read(): file is corrupt.");
err = EIO;
read_failed:
if (m != *mrec)
free(m);
errno = err;
return -1;
}
/**
* ntfs_mft_record_layout - layout an mft record into a memory buffer
* @vol: volume to which the mft record will belong
* @mref: mft reference specifying the mft record number
* @m: destination buffer of size >= @vol->mft_record_size bytes
*
* Layout an empty, unused mft record with the mft reference @mref into the
* buffer @m. The volume @vol is needed because the mft record structure was
* modified in NTFS 3.1 so we need to know which volume version this mft record
* will be used on.
*
* On success return 0 and on error return -1 with errno set to the error code.
*/
int ntfs_mft_record_layout(const ntfs_volume *vol, const MFT_REF mref,
MFT_RECORD *m)
{
ATTR_RECORD *a;
if (!vol || !m) {
errno = EINVAL;
return -1;
}
/* Aligned to 2-byte boundary. */
if (vol->major_ver < 3 || (vol->major_ver == 3 && !vol->minor_ver))
m->usa_ofs = cpu_to_le16((sizeof(MFT_RECORD_OLD) + 1) & ~1);
else {
/* Abort if mref is > 32 bits. */
if (MREF(mref) & 0x0000ffff00000000ull) {
Dputs("Mft reference exceeds 32 bits!");
errno = ERANGE;
return -1;
}
m->usa_ofs = cpu_to_le16((sizeof(MFT_RECORD) + 1) & ~1);
/*
* Set the NTFS 3.1+ specific fields while we know that the
* volume version is 3.1+.
*/
m->reserved = cpu_to_le16(0);
m->mft_record_number = cpu_to_le32(MREF(mref));
}
m->magic = magic_FILE;
if (vol->mft_record_size >= NTFS_SECTOR_SIZE)
m->usa_count = cpu_to_le16(vol->mft_record_size /
NTFS_SECTOR_SIZE + 1);
else {
m->usa_count = cpu_to_le16(1);
Dprintf("Sector size is bigger than MFT record size. "
"Setting usa_count to 1. If Windows\nchkdsk "
"reports this as corruption, please email "
"linux-ntfs-dev@lists.sf.net\nstating that "
"you saw this message and that the file "
"system created was corrupt.\nThank you.\n");
}
/* Set the update sequence number to 1. */
*(u16*)((u8*)m + le16_to_cpu(m->usa_ofs)) = cpu_to_le16(1);
m->lsn = cpu_to_le64(0ull);
m->sequence_number = cpu_to_le16(1);
m->link_count = cpu_to_le16(0);
/* Aligned to 8-byte boundary. */
m->attrs_offset = cpu_to_le16((le16_to_cpu(m->usa_ofs) +
(le16_to_cpu(m->usa_count) << 1) + 7) & ~7);
m->flags = cpu_to_le16(0);
/*
* Using attrs_offset plus eight bytes (for the termination attribute),
* aligned to 8-byte boundary.
*/
m->bytes_in_use = cpu_to_le32((le16_to_cpu(m->attrs_offset) + 8 + 7) &
~7);
m->bytes_allocated = cpu_to_le32(vol->mft_record_size);
m->base_mft_record = cpu_to_le64((MFT_REF)0);
m->next_attr_instance = cpu_to_le16(0);
a = (ATTR_RECORD*)((u8*)m + le16_to_cpu(m->attrs_offset));
a->type = AT_END;
a->length = cpu_to_le32(0);
/* Finally, clear the unused part of the mft record. */
memset((u8*)a + 8, 0, vol->mft_record_size - ((u8*)a + 8 - (u8*)m));
return 0;
}
/**
* ntfs_mft_record_format - format an mft record on an ntfs volume
* @vol: volume on which to format the mft record
* @mref: mft reference specifying mft record to format
*
* Format the mft record with the mft reference @mref in $MFT/$DATA, i.e. lay
* out an empty, unused mft record in memory and write it to the volume @vol.
*
* On success return 0 and on error return -1 with errno set to the error code.
*/
int ntfs_mft_record_format(const ntfs_volume *vol, const MFT_REF mref)
{
MFT_RECORD *m;
int err;
if (!vol || !vol->mft_na) {
errno = EINVAL;
return -1;
}
m = malloc(vol->mft_record_size);
if (!m)
return -1;
if (ntfs_mft_record_layout(vol, mref, m)) {
err = errno;
free(m);
errno = err;
return -1;
}
if (ntfs_mft_record_write(vol, mref, m)) {
err = errno;
free(m);
errno = err;
return -1;
}
free(m);
return 0;
}
/**
* ntfs_mft_record_alloc - allocate an mft record on an ntfs volume
* @vol: volume on which to allocate the mft record
* @start: starting mft record at which to allocate (or -1 if none)
*
* Allocate an mft record in $MFT/$DATA starting to search for a free record
* at mft record number @start or at the current allocator position if @start
* is -1, on the mounted ntfs volume @vol.
*
* On success return the now opened ntfs inode of the mft record.
*
* On error return NULL with errno set to the error code.
*
* To find a free mft record, we scan the mft bitmap for a zero bit. To
* optimize this we start scanning at the place specified by @start or if
* @start is -1 we start where we last stopped and we perform wrap around when
* we reach the end. Note, we do not try to allocate mft records below number
* 24 because numbers 0 to 15 are the defined system files anyway and 16 to 24
* are special in that they are used for storing extension mft records for the
* $DATA attribute of $MFT. This is required to avoid the possibility of
* creating a run list with a circular dependence which once written to disk
* can never be read in again. Windows will only use records 16 to 24 for
* normal files if the volume is completely out of space. We never use them
* which means that when the volume is really out of space we cannot create any
* more files while Windows can still create up to 8 small files. We can start
* doing this at some later time, it does not matter much for now.
*
* When scanning the mft bitmap, we only search up to the last allocated mft
* record. If there are no free records left in the range 24 to number of
* allocated mft records, then we extend the $MFT/$DATA attribute in order to
* create free mft records. We extend the allocated size of $MFT/$DATA by 16
* records at a time or one cluster, if cluster size is above 16kiB. If there
* is not sufficient space to do this, we try to extend by a single mft record
* or one cluster, if cluster size is above the mft record size, but we only do
* this if there is enough free space, which we know from the values returned
* by the failed cluster allocation function when we tried to do the first
* allocation.
*
* No matter how many mft records we allocate, we initialize only the first
* allocated mft record, incrementing mft data size and initialized size
* accordingly, open an ntfs_inode for it and return it to the caller, unless
* there are less than 24 mft records, in which case we allocate and initialize
* mft records until we reach record 24 which we consider as the first free mft
* record for use by normal files.
*
* If during any stage we overflow the initialized data in the mft bitmap, we
* extend the initialized size (and data size) by 8 bytes, allocating another
* cluster if required. The bitmap data size has to be at least equal to the
* number of mft records in the mft, but it can be bigger, in which case the
* superflous bits are padded with zeroes.
*
* Thus, when we return successfully (return value non-zero), we will have:
* - initialized / extended the mft bitmap if necessary,
* - initialized / extended the mft data if necessary,
* - set the bit corresponding to the mft record being allocated in the
* mft bitmap,
* - open an ntfs_inode for the allocated mft record, and we will
* - return the ntfs_inode.
*
* On error (return value zero), nothing will have changed. If we had changed
* anything before the error occured, we will have reverted back to the
* starting state before returning to the caller. Thus, except for bugs, we
* should always leave the volume in a consistent state when returning from
* this function.
*
* Note, this function cannot make use of most of the normal functions, like
* for example for attribute resizing, etc, because when the run list overflows
* the base mft record and an attribute list is used, it is very important that
* the extension mft records used to store the $DATA attribute of $MFT can be
* reached without having to read the information contained inside them, as
* this would make it impossible to find them in the first place after the
* volume is dismounted. $MFT/$BITMAP probably does not need to follow this
* rule because the bitmap is not essential for finding the mft records, but on
* the other hand, handling the bitmap in this special way would make life
* easier because otherwise there might be circular invocations of functions
* when reading the bitmap but if we are careful, we should be able to avoid
* all problems.
*/
#ifndef PAGE_SIZE
#define PAGE_SIZE 4096
#endif
ntfs_inode *ntfs_mft_record_alloc(ntfs_volume *vol, s64 start)
{
ntfs_debug("Entering (start 0x%llx).", (long long)start);
if (!vol || !vol->mft_na || !vol->mftbmp_na || start < -1) {
errno = EINVAL;
return NULL;
}
errno = ENOTSUP;
return NULL;
}
/**
* ntfs_mft_record_free - free an mft record on an ntfs volume
* @vol: volume on which to free the mft record
* @ni: open ntfs inode of the mft record to free
*
* Free the mft record of the open inode @ni on the mounted ntfs volume @vol.
* Note that this function calls ntfs_inode_close() internally and hence you
* cannot use the pointer @ni any more after this function returns success.
*
* On success return 0 and on error return -1 with errno set to the error code.
*/
int ntfs_mft_record_free(ntfs_volume *vol, ntfs_inode *ni)
{
u64 mft_no;
int err;
u16 seq_no, old_seq_no;
if (!vol || !vol->mftbmp_na || !ni) {
errno = EINVAL;
return -1;
}
/* Cache the mft reference for later. */
mft_no = ni->mft_no;
/* Mark the mft record as not in use. */
ni->mrec->flags &= ~MFT_RECORD_IN_USE;
/* Increment the sequence number, skipping zero, if it is not zero. */
old_seq_no = seq_no = le16_to_cpu(ni->mrec->sequence_number);
if (seq_no == 0xffff)
seq_no = 1;
else if (seq_no)
seq_no++;
ni->mrec->sequence_number = cpu_to_le16(seq_no);
/* Set the inode dirty and write it out. */
ntfs_inode_mark_dirty(ni);
if (ntfs_inode_sync(ni)) {
err = errno;
goto sync_rollback;
}
/* Clear the bit in the $MFT/$BITMAP corresponding to this record. */
if (ntfs_bitmap_clear_run(vol->mftbmp_na, mft_no, 1)) {
err = errno;
// FIXME: If ntfs_bitmap_clear_run() guarantees atomicity on
// error, this could be changed to goto sync_rollback;
goto bitmap_rollback;
}
/* Throw away the now freed inode. */
if (!ntfs_inode_close(ni))
return 0;
err = errno;
/* Rollback what we did... */
bitmap_rollback:
if (ntfs_bitmap_set_run(vol->mftbmp_na, mft_no, 1))
Dputs("Eeek! Rollback failed in ntfs_mft_record_free(). "
"Leaving inconsistent metadata!");
sync_rollback:
ni->mrec->flags |= MFT_RECORD_IN_USE;
ni->mrec->sequence_number = cpu_to_le16(old_seq_no);
ntfs_inode_mark_dirty(ni);
errno = err;
return -1;
}