/*
 * $Id: disk_io.c,v 1.14 2001/06/01 02:07:26 antona Exp $
 *
 * disk_io.c - Disk io functions. Part of the Linux-NTFS project.
 *
 * Copyright (c) 2000,2001 Anton Altaparmakov.
 *
 * This program/include file is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as published
 * by the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program/include file is distributed in the hope that it will be 
 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty 
 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program (in the main directory of the Linux-NTFS 
 * distribution in the file COPYING); if not, write to the Free Software
 * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <stdio.h>

#include "types.h"
#include "disk_io.h"

__s64 ntfs_pwrite(int fd, const void *b, __s64 count, const __s64 pos)
{
	__s64 written, total;
	char retry;
	
	if (!b) {
		errno = EINVAL;
		return -1;
	}
	if (!count)
		return 0;
	/* Locate to position. */
	if (lseek(fd, pos, SEEK_SET) == (off_t)-1)
		return -1;
	/* Write the data. */
	total = retry = 0;
	do {
		written = write(fd, ((char *)b) + total, count);
		if (written == -1)
			return written;
		else if (!written)
			++retry;
		/* We just recycle count as a local variable here. */
		count -= written;
		total += written;
	} while (count && (retry < 3));
	/* Sync write to disk. */
	if (fdatasync(fd) == -1)
		return -1;
	/* Finally return the number of bytes written. */
	return total;
}

/*
 * NOTE: We mst protect the data, write it, then mst deprotect it using a quick
 * deprotect algorithm (no checking). This saves us from making a copy before
 * the write and at the same causes the usn to be incremented in the buffer.
 * This conceptually fits in better with the idea that cached data is always
 * deprotected and protection is performed when the data is actually going to
 * hit the disk and the cache is immediately deprotected again simulating an
 * mst read on the written data. This way cache coherency is achieved.
 */
__s64 mst_pwrite(int fd, const void *b, __s64 count, const __s64 pos)
{
	__s64 written, total, error;
	char retry;
	
	if (!b) {
		errno = EINVAL;
		error = -1;
		goto error_end;
	}
	if (!count) {
		error = 0;
		goto error_end;
	}
	/* Locate to position. */
	if (lseek(fd, pos, SEEK_SET) == (off_t)-1) {
		error = -1;
		goto error_end;
	}
	/* Prepare data for writing. */
	pre_write_mst_fixup((NTFS_RECORD*)b, count);
	/* Write the prepared data. */
	total = retry = 0;
	do {
		written = write(fd, ((char *)b) + total, count);
		if (written == -1) {
			error = -1;
			goto finished;
		}
		else if (!written)
			++retry;
		/* We just recycle count as a local variable here. */
		count -= written;
		total += written;
	} while (count && (retry < 3));
	/* Sync write to disk. */
	if (fdatasync(fd) == -1) {
		error = -1;
		goto finished;
	}
	/* Finally return the number of bytes written. */
	error = total;
finished:
	/* Quickly deprotect the data again. */
	__post_read_mst_fixup((NTFS_RECORD*)b, count);
error_end:
	return error;
}

__s64 ntfs_pread(const int fd, void *b, const __u32 bksize, __s64 count,
	         const __s64 pos)
{
	__s64 br, btr, total;
	char bksize_bits;
	
	/* Nothing to read so, return 0. */
	if (!count || !bksize)
		return 0;
	/* Calculate bksize_bits. */
	for (bksize_bits = 0, br = bksize; br > 1; br >>= 1)
		++bksize_bits;
	/* Check buffer. Only multiples of NTFS_SECTOR_SIZE and powers of
	   two are alowed for bksize. */
	if (!b || (bksize_bits < NTFS_SECTOR_SIZE_BITS) ||
		       			(bksize != 1 << bksize_bits)) {
#ifdef DEBUG
		if (!b)
			puts("ntfs_pread: b is NULL");
		if (bksize_bits < NTFS_SECTOR_SIZE_BITS)
			puts("ntfs_pread: bksize_bits < NTFS_SECTOR_SIZE_BITS");
		if (bksize != 1 << bksize_bits)
			puts("ntfs_pread: bksize != 1 << bksize_bits");
		puts("Returning EINVAL");
#endif
		errno = EINVAL;
		return -1;
	}
	/* Locate to position. */
	if (lseek(fd, pos, SEEK_SET) == (off_t)-1) {
#ifdef DEBUG
		int eo = errno;
		printf("ntfs_pread: lseek to 0x%Lx returned -1\n", pos);
		errno = eo;
#endif
		return -1;
	}
	/* Read the data. */
	total = 0;
	btr = count << bksize_bits;
	/* Loop until we have read all data even if it happens only a byte at
	   a time. */
	do {
		br = read(fd, ((char *)b) + total, btr);
		if (br == -1)
			return br;
		if (!br) {
			/* Calculate the number of complete blocks read
			   (and recycle count as a local variable). */
			count -= btr >> bksize_bits;
			break;
		}
		btr -= br;
		total += br;
	} while (btr);
	/* Finally, return the number of blocks read. */
	return count;
}

__s64 mst_pread(const int fd, void *b, const __u32 bksize, __s64 count,
		       const __s64 pos)
{
	__s64 br, i;
	char bksize_bits;
	
	/* Do the read. */
	count = ntfs_pread(fd, b, bksize, count, pos);
	/* Calculate bksize_bits. */
	for (bksize_bits = 0, br = bksize; br > 1; br >>= 1)
		++bksize_bits;
	/* Apply fixups to successfully read data. Note, that we disregard
	   any errors returned from the MST fixup function. This is because we
	   want to fixup everything possible and we rely on the fact that the
	   "BAAD" magic will be detected later on anyway (before the data is
	   made use of). */
	for (i = 0; i < count; ++i)
		post_read_mst_fixup((NTFS_RECORD*)(b + 
						(i << bksize_bits)), bksize);
	/* Finally, return the number of blocks read. */
	return count;
}

int get_clusters(const ntfs_volume *vol, __u8 *buf, const __s64 lcn,
		 const int count)
{
	__s64 br;
	
	if (!vol || !buf || count < 0 || lcn < 0)
		return -EINVAL;
	if (!vol->fd)
		return -EBADF;
	if (vol->number_of_clusters < lcn + count)
		return -ESPIPE;
	br = ntfs_pread(vol->fd, buf, vol->cluster_size, count, 
						lcn << vol->cluster_size_bits);
	if (br != count) {
		int eo = errno;
#define ESTR "Error reading cluster(s)"
                if (br == -1) {
                        perror(ESTR);
			return -eo;
		} else if (!br)
                        fprintf(stderr, "Error: partition is smaller than "
                                        "it should be!?! Weird!\n");
		else
                        fprintf(stderr, ESTR ": unknown error\n");
#undef ESTR
		return -EIO;
	}
	return br;
}

int put_clusters(ntfs_volume *vol, const __u8 *buf, const __s64 lcn, int count)
{
	__s64 bw;
	
	if (!vol || !buf || count < 0 || lcn < 0)
		return -EINVAL;
	if (!vol->fd)
		return -EBADF;
	if (vol->number_of_clusters < lcn + count)
		return -ESPIPE;
	bw = ntfs_pwrite(vol->fd, buf, count << vol->cluster_size_bits, 
						lcn << vol->cluster_size_bits);
	if (bw != count << vol->cluster_size_bits) {
		int eo = errno;
#define ESTR "Error writting cluster(s)"
                if (bw == -1) {
                        perror(ESTR);
			return -eo;
		} else if (!bw)
                        fprintf(stderr, ESTR ": Ran out of input data!\n");
		else
                        fprintf(stderr, ESTR ": unknown error\n");
#undef ESTR
		return -EIO;
	}
	return bw >> vol->cluster_size_bits;
}

/**
 * get_mft_records - read records from the mft from disk
 * @vol:	volume to read from
 * @mrec:	output data buffer
 * @mref:	starting mft record number
 * @count:	number of mft records to read
 *
 * Read @count mft records starting at @mref from volume @vol into buffer
 * @mrec. Return @count on success or -ERRNO on error, where ERRNO is the error
 * code. This function will do the error reporting so caller really only needs
 * to check for sucess / failure.
 *
 * NOTE: @mrec has to be at least of size @count * vol->mft_record_size.
 */
int get_mft_records(const ntfs_volume *vol, const MFT_RECORD *mrec,
		    const MFT_REF *mref, const int count)
{
	__s64 br, ofs;
	LCN lcn;
	VCN m;
	
	if (!vol || !mrec)
		return -EINVAL;
	if (!vol->fd)
		return -EBADF;
	m = MREF(*mref);
	if (vol->number_of_mft_records < m + count)
		return -ESPIPE;
	if (!ntfs_get_bit(vol->mft_bitmap, m))
		return -ENOENT;
	lcn = vcn_to_lcn(vol->mft_runlist, m << vol->mft_record_size_bits >>
						vol->cluster_size_bits);
	if (lcn == -1) {
		perror("Error sparse $Mft records are not supported");
		return -ENOTSUP;
	}
	ofs = m << vol->mft_record_size_bits & vol->cluster_size - 1;
	br = mst_pread(vol->fd, (__u8*)mrec, vol->mft_record_size, count, 
					(lcn << vol->cluster_size_bits) + ofs);
	if (br != count) {
		int eo = errno;
#define ESTR "Error reading $Mft record"
                if (br == -1) {
                        perror(ESTR);
			return -eo;
		} else if (!br)
                        fprintf(stderr, "Error: partition is smaller than "
                                        "it should be!?! Weird!\n");
		else
                        fprintf(stderr, ESTR ": unknown error\n");
#undef ESTR
		return -EIO;
	}
	return br;
}

/**
 * get_mft_record - read a record from the mft
 * @vol:	volume to read from
 * @mrec:	output data buffer
 * @mref:	starting mft record number
 * @count:	number of mft records to read
 *
 * Read mft record specified by @mref from volume @vol into buffer @mrec.
 * Return 1 on success or -ERRNO on error, where ERRNO is the error
 * code. This function will do the error reporting so caller really only needs
 * to check for sucess / failure.
 *
 * NOTE: @mrec has to be at least of size vol->mft_record_size.
 */
__inline__ int get_mft_record(const ntfs_volume *vol, const MFT_RECORD *mrec,
			      const MFT_REF *mref)
{
	return get_mft_records(vol, mrec, mref, 1);
}

/**
 * __read_file_record - read a FILE record from the mft from disk
 * @vol:	volume to read from
 * @mref:	mft reference specifying mft record to read
 * @mrec:	address of pointer in which to return the mft record
 * @attr:	address of pointer in which to return the first attribute
 * 
 * Read a FILE record from the mft of @vol from the storage medium. @mref
 * specifies the mft record to read, including the sequence number. When the
 * function returns, @mrec and @attr will contain pointers to the read mft
 * record and to the first attribute within the mft record, respectively.
 * @attr is optional (can be NULL).
 *
 * The read mft record is checked for having the magic FILE, for being in use,
 * and for having a matching sequence number (if MSEQNO(*@mref) != 0).
 * If either of these fails, return -EIO.
 * 
 * Return 0 on success, or -ERRNO on error, where ERRNO is the error code.
 * 
 * Note: Caller has to free *@mrec when finished.
 */
int __read_file_record(const ntfs_volume *vol, const MFT_REF *mref,
			MFT_RECORD **mrec, ATTR_RECORD **attr)
{
	MFT_RECORD *m;
	ATTR_RECORD *a;
	int er = 0;
	
	if (!vol || !mrec) {
#ifdef DEBUG
		fprintf(stderr, "read_file_record() received NULL pointer!\n");
#endif
		return -EINVAL;
	}
	if (!(m = malloc(vol->mft_record_size)))
		return -ENOMEM;
	if ((er = get_mft_record(vol, m, mref)) != 1)
		goto failed;
	if (!is_file_record(m->magic))
		goto file_corrupt;
	if (MSEQNO(*mref) && MSEQNO(*mref) != le16_to_cpu(m->sequence_number))
		goto file_corrupt;
	if (!(m->flags & MFT_RECORD_IN_USE))
		goto file_corrupt;
	a = (ATTR_RECORD*)((char*)m + le16_to_cpu(m->attrs_offset));
	if (p2n(a) < p2n(m) || (char*)a > (char*)m + vol->mft_record_size)
		goto file_corrupt;
	*mrec = m;
	if (attr)
		*attr = a;
	return 0;
file_corrupt:
#ifdef DEBUG
	fprintf(stderr, "read_file_record(): file is corrupt.\n");
#endif
	er = -EIO;
failed:
	free(m);
	return er < 0 ? er : -EINVAL;
}

/**
 * read_file_record - read a FILE record from the mft from @vol
 * @vol:	volume to read from
 * @mref:	mft reference specifying mft record to read
 * @mrec:	address of pointer in which to return the mft record
 * @attr:	address of pointer in which to return the first attribute
 * 
 * Return the FILE record @mref from the mft of @vol. @mref, the mft record to
 * return, includes the sequence number, which can be 0 if no sequence number
 * checking is to be performed. If the mft record is already loaded in the
 * volume's mft cache this is returned straight away. Otherwise a new mft entry
 * is allocated and inserted into the cache and the mft record is within the
 * entry is returned. When the function returns, @mrec and @attr will contain
 * pointers to the mft record and to the first attribute within the mft record,
 * respectively. @attr is optional (can be NULL).
 *
 * The mft record is checked for having the magic FILE, for being in use,
 * and for having a matching sequence number (if MSEQNO(*@mref) != 0).
 * If either of these fails, return -EIO.
 * 
 * Return 0 on success, or -ERRNO on error, where ERRNO is the error code.
 * 
 * Note: Caller has to free *@mrec when finished.
 */
int read_file_record(const ntfs_volume *vol, const MFT_REF *mref,
		     MFT_RECORD **mrec, ATTR_RECORD **attr)
{
	return -ENOTSUP;
}

int put_mft_record(ntfs_volume *vol, const __u8 *buf, const MFT_REF *mref)
{
	__s64 bw, m;
	__s64 lcn, ofs;
	
	if (!vol || !buf)
		return -EINVAL;
	if (!vol->fd)
		return -EBADF;
	m = MREF(*mref);
	if (vol->number_of_mft_records <= m)
		return -ESPIPE;
	if (!ntfs_get_bit(vol->mft_bitmap, m))
		return -ENOENT;
	lcn = vcn_to_lcn(vol->mft_runlist, m << vol->mft_record_size_bits >>
						vol->cluster_size_bits);
	if (lcn == -1) {
		perror("Error sparse $Mft records are not supported");
		return -ENOTSUP;
	}
	ofs = m << vol->mft_record_size_bits & vol->cluster_size - 1;
	if (lcn == -1) {
		int eo = errno;
		perror("Error determining on disk location of $Mft record");
		return -eo;
	}
	bw = mst_pwrite(vol->fd, buf, vol->mft_record_size, 
					(lcn << vol->cluster_size_bits) + ofs);
	if (bw != vol->mft_record_size) {
		int eo = errno;
#define ESTR "Error writing $Mft record"
                if (bw == -1) {
                        perror(ESTR);
			return -eo;
		} else if (!bw)
                        fprintf(stderr, ESTR ": Ran out of input data!\n");
		else
                        fprintf(stderr, ESTR ": unknown error\n");
#undef ESTR
		return -EIO;
	}
	return 1;
}

