534 lines
18 KiB
C
534 lines
18 KiB
C
/*
|
|
* mft.c - Mft record handling code. Part of the Linux-NTFS project.
|
|
*
|
|
* Copyright (c) 2000-2004 Anton Altaparmakov
|
|
*
|
|
* This program/include file is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License as published
|
|
* by the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program/include file is distributed in the hope that it will be
|
|
* useful, but WITHOUT ANY WARRANTY; without even the implied warranty
|
|
* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program (in the main directory of the Linux-NTFS
|
|
* distribution in the file COPYING); if not, write to the Free Software
|
|
* Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
*/
|
|
|
|
#include "config.h"
|
|
|
|
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
#include <errno.h>
|
|
#include <string.h>
|
|
|
|
#include "compat.h"
|
|
|
|
#include "types.h"
|
|
#include "device.h"
|
|
#include "debug.h"
|
|
#include "bitmap.h"
|
|
#include "attrib.h"
|
|
#include "inode.h"
|
|
#include "volume.h"
|
|
#include "layout.h"
|
|
#include "mft.h"
|
|
|
|
/**
|
|
* ntfs_mft_records_read - read records from the mft from disk
|
|
* @vol: volume to read from
|
|
* @mref: starting mft record number to read
|
|
* @count: number of mft records to read
|
|
* @b: output data buffer
|
|
*
|
|
* Read @count mft records starting at @mref from volume @vol into buffer
|
|
* @b. Return 0 on success or -1 on error, with errno set to the error
|
|
* code.
|
|
*
|
|
* If any of the records exceed the initialized size of the $MFT/$DATA
|
|
* attribute, i.e. they cannot possibly be allocated mft records, assume this
|
|
* is a bug and return error code ESPIPE.
|
|
*
|
|
* The read mft records are mst deprotected and are hence ready to use. The
|
|
* caller should check each record with is_baad_record() in case mst
|
|
* deprotection failed.
|
|
*
|
|
* NOTE: @b has to be at least of size @count * vol->mft_record_size.
|
|
*/
|
|
int ntfs_mft_records_read(const ntfs_volume *vol, const MFT_REF mref,
|
|
const s64 count, MFT_RECORD *b)
|
|
{
|
|
s64 br;
|
|
VCN m;
|
|
|
|
Dprintf("%s(): Entering for inode 0x%llx.\n", __FUNCTION__, MREF(mref));
|
|
if (!vol || !vol->mft_na || !b || count < 0) {
|
|
errno = EINVAL;
|
|
return -1;
|
|
}
|
|
m = MREF(mref);
|
|
/* Refuse to read non-allocated mft records. */
|
|
if (m + count > vol->mft_na->initialized_size >>
|
|
vol->mft_record_size_bits) {
|
|
errno = ESPIPE;
|
|
return -1;
|
|
}
|
|
br = ntfs_attr_mst_pread(vol->mft_na, m << vol->mft_record_size_bits,
|
|
count, vol->mft_record_size, b);
|
|
if (br != count) {
|
|
if (br != -1)
|
|
errno = EIO;
|
|
if (br >= 0)
|
|
Dputs("Error: partition is smaller than it should be!");
|
|
else
|
|
Dperror("Error reading $Mft record(s)");
|
|
return -1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* ntfs_mft_records_write - write mft records to disk
|
|
* @vol: volume to write to
|
|
* @mref: starting mft record number to write
|
|
* @count: number of mft records to write
|
|
* @b: data buffer containing the mft records to write
|
|
*
|
|
* Write @count mft records starting at @mref from data buffer @b to volume
|
|
* @vol. Return 0 on success or -1 on error, with errno set to the error code.
|
|
*
|
|
* If any of the records exceed the initialized size of the $MFT/$DATA
|
|
* attribute, i.e. they cannot possibly be allocated mft records, assume this
|
|
* is a bug and return error code ESPIPE.
|
|
*
|
|
* Before the mft records are written, they are mst protected. After the write,
|
|
* they are deprotected again, thus resulting in an increase in the update
|
|
* sequence number inside the data buffer @b.
|
|
*
|
|
* If any mft records are written which are also represented in the mft mirror
|
|
* $MFTMirr, we make a copy of the relevant parts of the data buffer @b into a
|
|
* temporary buffer before we do the actual write. Then if at least one mft
|
|
* record was successfully written, we write the appropriate mft records from
|
|
* the copied buffer to the mft mirror, too.
|
|
*/
|
|
int ntfs_mft_records_write(const ntfs_volume *vol, const MFT_REF mref,
|
|
const s64 count, MFT_RECORD *b)
|
|
{
|
|
s64 bw;
|
|
VCN m;
|
|
void *bmirr = NULL;
|
|
int cnt = 0, res = 0;
|
|
|
|
Dprintf("%s(): Entering for inode 0x%llx.\n", __FUNCTION__, MREF(mref));
|
|
if (!vol || !vol->mft_na || vol->mftmirr_size <= 0 || !b || count < 0) {
|
|
errno = EINVAL;
|
|
return -1;
|
|
}
|
|
m = MREF(mref);
|
|
/* Refuse to write non-allocated mft records. */
|
|
if (m + count > vol->mft_na->initialized_size >>
|
|
vol->mft_record_size_bits) {
|
|
errno = ESPIPE;
|
|
return -1;
|
|
}
|
|
if (m < vol->mftmirr_size) {
|
|
if (!vol->mftmirr_na) {
|
|
errno = EINVAL;
|
|
return -1;
|
|
}
|
|
cnt = vol->mftmirr_size - m;
|
|
if (cnt > count)
|
|
cnt = count;
|
|
bmirr = malloc(cnt * vol->mft_record_size);
|
|
if (!bmirr)
|
|
return -1;
|
|
memcpy(bmirr, b, cnt * vol->mft_record_size);
|
|
}
|
|
bw = ntfs_attr_mst_pwrite(vol->mft_na, m << vol->mft_record_size_bits,
|
|
count, vol->mft_record_size, b);
|
|
if (bw != count) {
|
|
if (bw != -1)
|
|
errno = EIO;
|
|
if (bw >= 0)
|
|
Dputs("Error: partial write while writing $Mft "
|
|
"record(s)!\n");
|
|
else
|
|
Dperror("Error writing $Mft record(s)");
|
|
res = errno;
|
|
}
|
|
if (bmirr && bw > 0) {
|
|
if (bw < cnt)
|
|
cnt = bw;
|
|
bw = ntfs_attr_mst_pwrite(vol->mftmirr_na,
|
|
m << vol->mft_record_size_bits, cnt,
|
|
vol->mft_record_size, bmirr);
|
|
if (bw != cnt) {
|
|
if (bw != -1)
|
|
errno = EIO;
|
|
Dputs("Error: failed to sync $MFTMirr! Run chkdsk.");
|
|
res = errno;
|
|
}
|
|
}
|
|
if (bmirr)
|
|
free(bmirr);
|
|
if (!res)
|
|
return res;
|
|
errno = res;
|
|
return -1;
|
|
}
|
|
|
|
/**
|
|
* ntfs_file_record_read - read a FILE record from the mft from disk
|
|
* @vol: volume to read from
|
|
* @mref: mft reference specifying mft record to read
|
|
* @mrec: address of pointer in which to return the mft record
|
|
* @attr: address of pointer in which to return the first attribute
|
|
*
|
|
* Read a FILE record from the mft of @vol from the storage medium. @mref
|
|
* specifies the mft record to read, including the sequence number, which can
|
|
* be 0 if no sequence number checking is to be performed.
|
|
*
|
|
* The function allocates a buffer large enough to hold the mft record and
|
|
* reads the record into the buffer (mst deprotecting it in the process).
|
|
* *@mrec is then set to point to the buffer.
|
|
*
|
|
* If @attr is not NULL, *@attr is set to point to the first attribute in the
|
|
* mft record, i.e. *@attr is a pointer into *@mrec.
|
|
*
|
|
* Return 0 on success, or -1 on error, with errno set to the error code.
|
|
*
|
|
* The read mft record is checked for having the magic FILE,
|
|
* and for having a matching sequence number (if MSEQNO(*@mref) != 0).
|
|
* If either of these fails, -1 is returned and errno is set to EIO. If you get
|
|
* this, but you still want to read the mft record (e.g. in order to correct
|
|
* it), use ntfs_mft_record_read() directly.
|
|
*
|
|
* Note: Caller has to free *@mrec when finished.
|
|
*
|
|
* Note: We do not check if the mft record is flagged in use. The caller can
|
|
* check if desired.
|
|
*/
|
|
int ntfs_file_record_read(const ntfs_volume *vol, const MFT_REF mref,
|
|
MFT_RECORD **mrec, ATTR_RECORD **attr)
|
|
{
|
|
MFT_RECORD *m;
|
|
ATTR_RECORD *a;
|
|
int err;
|
|
|
|
if (!vol || !mrec) {
|
|
errno = EINVAL;
|
|
return -1;
|
|
}
|
|
m = *mrec;
|
|
if (!m) {
|
|
m = (MFT_RECORD*)malloc(vol->mft_record_size);
|
|
if (!m)
|
|
return -1;
|
|
}
|
|
if (ntfs_mft_record_read(vol, mref, m)) {
|
|
err = errno;
|
|
goto read_failed;
|
|
}
|
|
if (!ntfs_is_file_record(m->magic))
|
|
goto file_corrupt;
|
|
if (MSEQNO(mref) && MSEQNO(mref) != le16_to_cpu(m->sequence_number))
|
|
goto file_corrupt;
|
|
a = (ATTR_RECORD*)((char*)m + le16_to_cpu(m->attrs_offset));
|
|
if (p2n(a) < p2n(m) || (char*)a > (char*)m + vol->mft_record_size)
|
|
goto file_corrupt;
|
|
*mrec = m;
|
|
if (attr)
|
|
*attr = a;
|
|
return 0;
|
|
file_corrupt:
|
|
Dputs("ntfs_file_record_read(): file is corrupt.");
|
|
err = EIO;
|
|
read_failed:
|
|
if (m != *mrec)
|
|
free(m);
|
|
errno = err;
|
|
return -1;
|
|
}
|
|
|
|
/**
|
|
* ntfs_mft_record_layout - layout an mft record into a memory buffer
|
|
* @vol: volume to which the mft record will belong
|
|
* @mref: mft reference specifying the mft record number
|
|
* @m: destination buffer of size >= @vol->mft_record_size bytes
|
|
*
|
|
* Layout an empty, unused mft record with the mft reference @mref into the
|
|
* buffer @m. The volume @vol is needed because the mft record structure was
|
|
* modified in NTFS 3.1 so we need to know which volume version this mft record
|
|
* will be used on.
|
|
*
|
|
* On success return 0 and on error return -1 with errno set to the error code.
|
|
*/
|
|
int ntfs_mft_record_layout(const ntfs_volume *vol, const MFT_REF mref,
|
|
MFT_RECORD *m)
|
|
{
|
|
ATTR_RECORD *a;
|
|
|
|
if (!vol || !m) {
|
|
errno = EINVAL;
|
|
return -1;
|
|
}
|
|
/* Aligned to 2-byte boundary. */
|
|
if (vol->major_ver < 3 || (vol->major_ver == 3 && !vol->minor_ver))
|
|
m->usa_ofs = cpu_to_le16((sizeof(MFT_RECORD_OLD) + 1) & ~1);
|
|
else {
|
|
/* Abort if mref is > 32 bits. */
|
|
if (MREF(mref) & 0x0000ffff00000000ull) {
|
|
Dputs("Mft reference exceeds 32 bits!");
|
|
errno = ERANGE;
|
|
return -1;
|
|
}
|
|
m->usa_ofs = cpu_to_le16((sizeof(MFT_RECORD) + 1) & ~1);
|
|
/*
|
|
* Set the NTFS 3.1+ specific fields while we know that the
|
|
* volume version is 3.1+.
|
|
*/
|
|
m->reserved = cpu_to_le16(0);
|
|
m->mft_record_number = cpu_to_le32(MREF(mref));
|
|
}
|
|
m->magic = magic_FILE;
|
|
if (vol->mft_record_size >= NTFS_SECTOR_SIZE)
|
|
m->usa_count = cpu_to_le16(vol->mft_record_size /
|
|
NTFS_SECTOR_SIZE + 1);
|
|
else {
|
|
m->usa_count = cpu_to_le16(1);
|
|
Dprintf("Sector size is bigger than MFT record size. "
|
|
"Setting usa_count to 1. If Windows\nchkdsk "
|
|
"reports this as corruption, please email "
|
|
"linux-ntfs-dev@lists.sf.net\nstating that "
|
|
"you saw this message and that the file "
|
|
"system created was corrupt.\nThank you.\n");
|
|
}
|
|
/* Set the update sequence number to 1. */
|
|
*(u16*)((u8*)m + le16_to_cpu(m->usa_ofs)) = cpu_to_le16(1);
|
|
m->lsn = cpu_to_le64(0ull);
|
|
m->sequence_number = cpu_to_le16(1);
|
|
m->link_count = cpu_to_le16(0);
|
|
/* Aligned to 8-byte boundary. */
|
|
m->attrs_offset = cpu_to_le16((le16_to_cpu(m->usa_ofs) +
|
|
(le16_to_cpu(m->usa_count) << 1) + 7) & ~7);
|
|
m->flags = cpu_to_le16(0);
|
|
/*
|
|
* Using attrs_offset plus eight bytes (for the termination attribute),
|
|
* aligned to 8-byte boundary.
|
|
*/
|
|
m->bytes_in_use = cpu_to_le32((le16_to_cpu(m->attrs_offset) + 8 + 7) &
|
|
~7);
|
|
m->bytes_allocated = cpu_to_le32(vol->mft_record_size);
|
|
m->base_mft_record = cpu_to_le64((MFT_REF)0);
|
|
m->next_attr_instance = cpu_to_le16(0);
|
|
a = (ATTR_RECORD*)((u8*)m + le16_to_cpu(m->attrs_offset));
|
|
a->type = AT_END;
|
|
a->length = cpu_to_le32(0);
|
|
/* Finally, clear the unused part of the mft record. */
|
|
memset((u8*)a + 8, 0, vol->mft_record_size - ((u8*)a + 8 - (u8*)m));
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* ntfs_mft_record_format - format an mft record on an ntfs volume
|
|
* @vol: volume on which to format the mft record
|
|
* @mref: mft reference specifying mft record to format
|
|
*
|
|
* Format the mft record with the mft reference @mref in $MFT/$DATA, i.e. lay
|
|
* out an empty, unused mft record in memory and write it to the volume @vol.
|
|
*
|
|
* On success return 0 and on error return -1 with errno set to the error code.
|
|
*/
|
|
int ntfs_mft_record_format(const ntfs_volume *vol, const MFT_REF mref)
|
|
{
|
|
MFT_RECORD *m;
|
|
int err;
|
|
|
|
if (!vol || !vol->mft_na) {
|
|
errno = EINVAL;
|
|
return -1;
|
|
}
|
|
m = malloc(vol->mft_record_size);
|
|
if (!m)
|
|
return -1;
|
|
if (ntfs_mft_record_layout(vol, mref, m)) {
|
|
err = errno;
|
|
free(m);
|
|
errno = err;
|
|
return -1;
|
|
}
|
|
if (ntfs_mft_record_write(vol, mref, m)) {
|
|
err = errno;
|
|
free(m);
|
|
errno = err;
|
|
return -1;
|
|
}
|
|
free(m);
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* ntfs_mft_record_alloc - allocate an mft record on an ntfs volume
|
|
* @vol: volume on which to allocate the mft record
|
|
* @start: starting mft record at which to allocate (or -1 if none)
|
|
*
|
|
* Allocate an mft record in $MFT/$DATA starting to search for a free record
|
|
* at mft record number @start or at the current allocator position if @start
|
|
* is -1, on the mounted ntfs volume @vol.
|
|
*
|
|
* On success return the now opened ntfs inode of the mft record.
|
|
*
|
|
* On error return NULL with errno set to the error code.
|
|
*
|
|
* To find a free mft record, we scan the mft bitmap for a zero bit. To
|
|
* optimize this we start scanning at the place specified by @start or if
|
|
* @start is -1 we start where we last stopped and we perform wrap around when
|
|
* we reach the end. Note, we do not try to allocate mft records below number
|
|
* 24 because numbers 0 to 15 are the defined system files anyway and 16 to 24
|
|
* are special in that they are used for storing extension mft records for the
|
|
* $DATA attribute of $MFT. This is required to avoid the possibility of
|
|
* creating a run list with a circular dependence which once written to disk
|
|
* can never be read in again. Windows will only use records 16 to 24 for
|
|
* normal files if the volume is completely out of space. We never use them
|
|
* which means that when the volume is really out of space we cannot create any
|
|
* more files while Windows can still create up to 8 small files. We can start
|
|
* doing this at some later time, it does not matter much for now.
|
|
*
|
|
* When scanning the mft bitmap, we only search up to the last allocated mft
|
|
* record. If there are no free records left in the range 24 to number of
|
|
* allocated mft records, then we extend the $MFT/$DATA attribute in order to
|
|
* create free mft records. We extend the allocated size of $MFT/$DATA by 16
|
|
* records at a time or one cluster, if cluster size is above 16kiB. If there
|
|
* is not sufficient space to do this, we try to extend by a single mft record
|
|
* or one cluster, if cluster size is above the mft record size, but we only do
|
|
* this if there is enough free space, which we know from the values returned
|
|
* by the failed cluster allocation function when we tried to do the first
|
|
* allocation.
|
|
*
|
|
* No matter how many mft records we allocate, we initialize only the first
|
|
* allocated mft record, incrementing mft data size and initialized size
|
|
* accordingly, open an ntfs_inode for it and return it to the caller, unless
|
|
* there are less than 24 mft records, in which case we allocate and initialize
|
|
* mft records until we reach record 24 which we consider as the first free mft
|
|
* record for use by normal files.
|
|
*
|
|
* If during any stage we overflow the initialized data in the mft bitmap, we
|
|
* extend the initialized size (and data size) by 8 bytes, allocating another
|
|
* cluster if required. The bitmap data size has to be at least equal to the
|
|
* number of mft records in the mft, but it can be bigger, in which case the
|
|
* superflous bits are padded with zeroes.
|
|
*
|
|
* Thus, when we return successfully (return value non-zero), we will have:
|
|
* - initialized / extended the mft bitmap if necessary,
|
|
* - initialized / extended the mft data if necessary,
|
|
* - set the bit corresponding to the mft record being allocated in the
|
|
* mft bitmap,
|
|
* - open an ntfs_inode for the allocated mft record, and we will
|
|
* - return the ntfs_inode.
|
|
*
|
|
* On error (return value zero), nothing will have changed. If we had changed
|
|
* anything before the error occured, we will have reverted back to the
|
|
* starting state before returning to the caller. Thus, except for bugs, we
|
|
* should always leave the volume in a consistent state when returning from
|
|
* this function.
|
|
*
|
|
* Note, this function cannot make use of most of the normal functions, like
|
|
* for example for attribute resizing, etc, because when the run list overflows
|
|
* the base mft record and an attribute list is used, it is very important that
|
|
* the extension mft records used to store the $DATA attribute of $MFT can be
|
|
* reached without having to read the information contained inside them, as
|
|
* this would make it impossible to find them in the first place after the
|
|
* volume is dismounted. $MFT/$BITMAP probably does not need to follow this
|
|
* rule because the bitmap is not essential for finding the mft records, but on
|
|
* the other hand, handling the bitmap in this special way would make life
|
|
* easier because otherwise there might be circular invocations of functions
|
|
* when reading the bitmap but if we are careful, we should be able to avoid
|
|
* all problems.
|
|
*/
|
|
#ifndef PAGE_SIZE
|
|
#define PAGE_SIZE 4096
|
|
#endif
|
|
ntfs_inode *ntfs_mft_record_alloc(ntfs_volume *vol, s64 start)
|
|
{
|
|
ntfs_debug("Entering (start 0x%llx).", (long long)start);
|
|
if (!vol || !vol->mft_na || !vol->mftbmp_na || start < -1) {
|
|
errno = EINVAL;
|
|
return NULL;
|
|
}
|
|
errno = ENOTSUP;
|
|
return NULL;
|
|
}
|
|
|
|
/**
|
|
* ntfs_mft_record_free - free an mft record on an ntfs volume
|
|
* @vol: volume on which to free the mft record
|
|
* @ni: open ntfs inode of the mft record to free
|
|
*
|
|
* Free the mft record of the open inode @ni on the mounted ntfs volume @vol.
|
|
* Note that this function calls ntfs_inode_close() internally and hence you
|
|
* cannot use the pointer @ni any more after this function returns success.
|
|
*
|
|
* On success return 0 and on error return -1 with errno set to the error code.
|
|
*/
|
|
int ntfs_mft_record_free(ntfs_volume *vol, ntfs_inode *ni)
|
|
{
|
|
u64 mft_no;
|
|
int err;
|
|
u16 seq_no, old_seq_no;
|
|
|
|
if (!vol || !vol->mftbmp_na || !ni) {
|
|
errno = EINVAL;
|
|
return -1;
|
|
}
|
|
|
|
/* Cache the mft reference for later. */
|
|
mft_no = ni->mft_no;
|
|
|
|
/* Mark the mft record as not in use. */
|
|
ni->mrec->flags &= ~MFT_RECORD_IN_USE;
|
|
|
|
/* Increment the sequence number, skipping zero, if it is not zero. */
|
|
old_seq_no = seq_no = le16_to_cpu(ni->mrec->sequence_number);
|
|
if (seq_no == 0xffff)
|
|
seq_no = 1;
|
|
else if (seq_no)
|
|
seq_no++;
|
|
ni->mrec->sequence_number = cpu_to_le16(seq_no);
|
|
|
|
/* Set the inode dirty and write it out. */
|
|
ntfs_inode_mark_dirty(ni);
|
|
if (ntfs_inode_sync(ni)) {
|
|
err = errno;
|
|
goto sync_rollback;
|
|
}
|
|
|
|
/* Clear the bit in the $MFT/$BITMAP corresponding to this record. */
|
|
if (ntfs_bitmap_clear_run(vol->mftbmp_na, mft_no, 1)) {
|
|
err = errno;
|
|
// FIXME: If ntfs_bitmap_clear_run() guarantees atomicity on
|
|
// error, this could be changed to goto sync_rollback;
|
|
goto bitmap_rollback;
|
|
}
|
|
|
|
/* Throw away the now freed inode. */
|
|
if (!ntfs_inode_close(ni))
|
|
return 0;
|
|
err = errno;
|
|
|
|
/* Rollback what we did... */
|
|
bitmap_rollback:
|
|
if (ntfs_bitmap_set_run(vol->mftbmp_na, mft_no, 1))
|
|
Dputs("Eeek! Rollback failed in ntfs_mft_record_free(). "
|
|
"Leaving inconsistent metadata!");
|
|
sync_rollback:
|
|
ni->mrec->flags |= MFT_RECORD_IN_USE;
|
|
ni->mrec->sequence_number = cpu_to_le16(old_seq_no);
|
|
ntfs_inode_mark_dirty(ni);
|
|
errno = err;
|
|
return -1;
|
|
}
|