/*
 *   (C) Copyright IBM Corp. 2001, 2005
 *
 *   This program is free software;  you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation; either version 2 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
 *   the GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program;  if not, write to the Free Software
 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 *
 * Module: mdregmgr
 * File: md_super.c
 *
 * Description: This file contains all functions related to the initial
 *              discovery of MD physical volumes, volume groups, and logical
 *              volumes.
 */

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <errno.h>
#include <time.h>
#include <sys/ioctl.h>
#include <asm/bitops.h>
#include <fcntl.h>
#include <unistd.h>
#include <plugin.h>

#include "md.h"


#define my_plugin_record my_plugin

#if 0
/*
 * cpy_str
 *
 * Copy src to target only if the target's size is big enough.
 * The null terminator is not copied.
 * Return copy size (ie. size of source string)
 */
static u_int32_t cpy_str(char *target, u_int32_t target_size, char *src)
{
	u_int32_t cpy_size;
	cpy_size = strlen(src);
	if (target_size > cpy_size) {
		strncpy(target, src, cpy_size);
		return cpy_size;
	} else {
		return 0;
	}
}
#endif


/*
 * md_analyze_sb
 *
 * This function is common for both versions of MD formats.
 */
static void md_analyze_sb(md_volume_t *vol, int level, int raid_disks, u_int32_t chunksize_in_bytes)
{
	int length=0;
	LOG_ENTRY();

	//Found more active disks than actual raid_disks ?	
	if (vol->active_disks > raid_disks) {
		length = sprintf(message_buffer,
				 _("%s region %s is corrupt."
				   "  Too many disks (%d) are active."
				   "  Whereas the number of raid disks is %d."),
				 level_to_string(level), vol->name, vol->active_disks, raid_disks);
		vol->flags |= MD_CORRUPT;
		goto queue_corrupt_message;
	}

	if (vol->active_disks < raid_disks) {
		switch ((int)level) {
			case MD_LEVEL_MULTIPATH:
			case MD_LEVEL_RAID1:
				if (vol->active_disks >= 1) {
					vol->flags |= MD_DEGRADED;
				} else {
					length = sprintf(message_buffer,
							 _("%s region %s is corrupt."
							   "  None of the disks are active to start the array."),
							 level_to_string(level), vol->name);
					vol->flags |= MD_CORRUPT;
					goto queue_corrupt_message;
				}
				break;
			case MD_LEVEL_RAID5:
				if ( (raid_disks - vol->active_disks) == 1 ) {
					vol->flags |= MD_DEGRADED;
				} else {
					length = sprintf(message_buffer,
							 _("%s region %s is corrupt."
							   "  The number of raid disks for a full functional array is %d."
							   "  The number of active disks is %d."),
							 level_to_string(level), vol->name, 
							 raid_disks, vol->active_disks);
					vol->flags |= MD_CORRUPT;
					goto queue_corrupt_message;
				}
				break;
			default:
				length = sprintf(message_buffer,
						 _("%s region %s is corrupt."
						   "  The number of raid disks for a full functional array is %d."
						   "  The number of active disks is %d.\n"),
						 level_to_string(level), vol->name, 
						 raid_disks, vol->active_disks);
				vol->flags |= MD_CORRUPT;
				goto queue_corrupt_message;
		}
	}

	if ((level == MD_LEVEL_RAID0) ||
	    (level == MD_LEVEL_RAID4) || 
	    (level == MD_LEVEL_RAID5) ) {
		if (!chunksize_in_bytes) {
			length = sprintf(message_buffer,
					 _("%s region %s is corrupt.  The chunk size is 0."),
					 level_to_string(level), vol->name);
			vol->flags |= MD_CORRUPT;
			goto queue_corrupt_message;
		}
		
		if (chunksize_in_bytes > MD_MAX_CHUNK_SIZE_BYTES) {
			length = sprintf(message_buffer,
					 _("%s region %s is corrupt.  The chunk size (%d bytes) is too big."
					   "  The maximum chunk size is %d bytes.\n"),
					 level_to_string(level), vol->name, chunksize_in_bytes,
					 MD_MAX_CHUNK_SIZE_BYTES);
			vol->flags |= MD_CORRUPT;
			goto queue_corrupt_message;
		}
		
		if ( (1 << (ffs(chunksize_in_bytes)-1)) != chunksize_in_bytes) {
			length = sprintf(message_buffer,
					 _("%s region %s is corrupt."
					   "  The chunk size (%d bytes) is not a power of 2."),
					 level_to_string(level), vol->name, chunksize_in_bytes);
			vol->flags |= MD_CORRUPT;
			goto queue_corrupt_message;
		}
		
		if (chunksize_in_bytes < 4096) {
			length = sprintf(message_buffer,
					 _("%s region %s is corrupt."
					   "  The chunk size (%d bytes) is too small."
					   "  The minimum chunk size is 4096 bytes.\n"),
					 level_to_string(level), vol->name, chunksize_in_bytes);
			vol->flags |= MD_CORRUPT;
			goto queue_corrupt_message;
		}
	}

queue_corrupt_message:
	if (vol->flags & MD_CORRUPT) {
		md_queue_corrupt_message(level_to_pers(level), message_buffer, length);
	}
	LOG_EXIT_VOID();
}

static u_int32_t get_random(void) {
	u_int32_t num;
	int fd;

	fd = open("/dev/urandom", O_RDONLY);
	if (fd != -1 && read(fd, &num, 4) == 4) {
		LOG_DEBUG("raid set magic: %x\n", num);
		close(fd);
	} else {
		num = rand();
		LOG_DEBUG("raid set magic (pseudo-random): %x\n", num);
	}
	close(fd);
	return num;
}

/*
 * md_calc_saved_info_csum
 *
 * Calculate checksum for EVMS saved info block
 *
 */
static unsigned int md_calc_saved_info_csum(md_saved_info_t *info)
{
	unsigned int disk_csum, csum;
	unsigned long long newcsum;
	int size = sizeof(*info) + (info->expand_shrink_cnt)*2;
	unsigned int *isuper = (unsigned int*)info;

	disk_csum = info->csum;
	info->csum = 0;
	newcsum = 0;
	for (; size>=4; size -= 4 )
		newcsum += (*isuper++);

	if (size == 2)
		newcsum += (*(unsigned short*) isuper);

	csum = (newcsum & 0xffffffff) + (newcsum >> 32);
	info->csum = disk_csum;
	return csum;
}

static void sb0_get_sb_disk_info(md_member_t *member, mdu_disk_info_t *info)
{
	mdp_super_t *sb = (mdp_super_t *)member->sb;
	mdp_disk_t *d = &sb->disks[member->dev_number];

	info->number = d->number;
	info->major = d->major;
	info->minor = d->minor;
	info->raid_disk = d->raid_disk;
	info->state = d->state;
}

static void sb0_get_sb_disk_info_for_index(void *super, mdu_disk_info_t *info)
{
	mdp_super_t *sb = (mdp_super_t *)super;

	if (info->number < MD_SB_DISKS) {
		mdp_disk_t *d = &sb->disks[info->number];

		info->major = d->major;
		info->minor = d->minor;
		info->raid_disk = d->raid_disk;
		info->state = d->state;
	} else {
		LOG_MD_BUG();
		memset(info, 0, sizeof(*info));
	}
}

static md_array_info_t * sb0_get_disk_array_info(void *super)
{
	mdp_super_t *sb = (mdp_super_t *)super;
	md_array_info_t *array;
	mdp_disk_t *d;
	int i;

	LOG_ENTRY();

	array = EngFncs->engine_alloc(sizeof(md_array_info_t) +
				      (sb->nr_disks * sizeof(mdu_disk_info_t)));
	if (array) {
		array->raid_disks = sb->raid_disks;
		array->personality = level_to_pers(sb->level);
		array->chunksize = sb->chunk_size >> EVMS_VSECTOR_SIZE_SHIFT; /* in sectors */
		array->active_disks = sb->active_disks;
		array->failed_disks = sb->failed_disks;
		array->nr_disks = sb->nr_disks;
		array->spare_disks = sb->spare_disks;
		array->working_disks = sb->working_disks;
		for (i=0; i<sb->nr_disks; i++) {
			d = &sb->disks[i];
			array->disk[i].number = d->number;
			array->disk[i].raid_disk = d->raid_disk;
			array->disk[i].major = d->major;
			array->disk[i].minor = d->minor;
			array->disk[i].state = d->state;
		}
	}
	LOG_EXIT_PTR(array);
	return array;
}

static void sb0_get_sb_info(void *super, md_super_info_t *info)
{
	mdp_super_t *sb = (mdp_super_t *)super;

	memset(info, 0, sizeof(*info));

	info->md_magic = sb->md_magic;
	info->major_version = sb->major_version;
	info->minor_version = sb->minor_version;
	info->patch_version = sb->patch_version;
	info->set_uuid0 = sb->set_uuid0;
	info->set_uuid1 = sb->set_uuid1;
	info->set_uuid2 = sb->set_uuid2;
	info->set_uuid3 = sb->set_uuid3;
	info->ctime = (time_t)sb->ctime;
	info->utime = (time_t)sb->utime;
	info->level = sb->level;
	info->size = sb->size * 2;                /* in sectors */
	info->nr_disks = sb->nr_disks;
	info->raid_disks = sb->raid_disks;
	info->active_disks = sb->active_disks;
	info->working_disks = sb->working_disks;
	info->failed_disks = sb->failed_disks;
	info->spare_disks = sb->spare_disks;
	info->md_minor = sb->md_minor;              /* minor or -1 if not used */
	info->not_persistent = sb->not_persistent;
	if (sb->state & (1<<MD_SB_ERRORS)) {
		info->state_flags = MD_SUPER_INFO_ERRORS;
	} else if ((sb->state & (1<<MD_SB_CLEAN))) {
		info->state_flags = MD_SUPER_INFO_CLEAN;
	} else {
		info->state_flags = MD_SUPER_INFO_DIRTY;
	}
	info->sb_csum = sb->sb_csum;
	info->layout = sb->layout;
	info->events = sb->events_hi;
	info->events <<= 32;
	info->events += sb->events_lo;
	info->chunksize = sb->chunk_size >> EVMS_VSECTOR_SIZE_SHIFT;           /* in sectors */
	info->this_disk_index = sb->this_disk.number;
}

/*
 * Change the disk count
 */
static void sb0_set_sb_info(void *super, md_super_info_t *info)
{
	mdp_super_t *sb = (mdp_super_t *)super;

	LOG_ENTRY();

	sb->nr_disks = info->nr_disks;
	sb->raid_disks = info->raid_disks;
	sb->active_disks = info->active_disks;
	sb->working_disks = info->working_disks;
	sb->failed_disks = info->failed_disks;
	sb->spare_disks = info->spare_disks;
	LOG_WARNING("Superblock disk counts have been changed,"
		    " nr_disks(%03d) raid_disks(%03d) active_disks(%03d)"
		    " working_disks(%03d) failed_disks(%03d) spare_disks(%03d).\n",
		    sb->nr_disks, sb->raid_disks, sb->active_disks,
		    sb->working_disks, sb->failed_disks, sb->spare_disks);
	LOG_EXIT_VOID();
}

static u_int64_t sb0_calc_volume_size(md_volume_t *vol)
{
	mdp_super_t *sb = vol->sb;
	u_int64_t size = 0;
	u_int64_t smallest_size = -1;
	md_member_t *member;
	list_element_t iter;

	LOG_ENTRY();
	if (vol->flags & MD_CORRUPT) {
		LOG_MD_BUG();
		goto out;
	}

	if (vol->flags & MD_NEEDS_UPDATE_SIZE) {
		LIST_FOR_EACH(vol->members, iter, member) {
			smallest_size = min(smallest_size, member->data_size);
			member->data_offset = 0; //just to make sure
			member->super_offset = MD_NEW_SIZE_SECTORS(member->obj->size); 
		}
		sb->size = smallest_size / 2;
	}

	switch ((int)sb->level) {
	case MD_LEVEL_LINEAR:
	case MD_LEVEL_RAID0:
		LIST_FOR_EACH(vol->members, iter, member) {
			size += member->data_size;
		}
		break;
	case MD_LEVEL_RAID1:
	case MD_LEVEL_MULTIPATH:
		size = sb->size * 2;
		if (vol->flags & MD_NEEDS_UPDATE_SIZE) {
			LIST_FOR_EACH(vol->members, iter, member) {
				member->data_size = smallest_size;
			}
		}
		break;
	case MD_LEVEL_RAID4:
	case MD_LEVEL_RAID5:
		size = (sb->raid_disks - 1) * BLOCKS_TO_VSECTORS(sb->size);
		if (vol->flags & MD_NEEDS_UPDATE_SIZE) {
			LIST_FOR_EACH(vol->members, iter, member) {
				member->data_size = smallest_size;
			}
		}
		break;
	default:
		LOG_MD_BUG();
		break;
	}

out:
	vol->flags &= ~MD_NEEDS_UPDATE_SIZE;
	LOG_EXIT_U64(size);
	return size;
}

static int sb0_find_empty_slot(void *super, u_int32_t *available_slot)
{
	int rc=0;
	int i;
	int start_idx, stop_idx;
	boolean found = FALSE;
	mdp_super_t *sb = (mdp_super_t *)super;

	LOG_ENTRY();

	/*
	 * Try to find an empty slot starting from raid_disks,
	 * (see kernel md driver code (md.c)
	 * If we reach MAX_MD_DEVICES, don't give up,
	 * go back and try from the beginning.
	 */
	start_idx = sb->raid_disks;
	stop_idx = MD_SB_DISKS;
	while (found == FALSE) {
		for (i=start_idx; i < stop_idx; i++) {
			if (descriptor_removed(&sb->disks[i])) {
				found = TRUE;
				break;
			}
			if (descriptor_empty(&sb->disks[i])) {
				found = TRUE;
				break;
			}
		}

		if (found == FALSE) {
			if (start_idx != 0) {
				start_idx = 0; // Try from the beginning
				stop_idx = sb->raid_disks;
			} else {
				LOG_CRITICAL("md%d array is full.\n", sb->md_minor);
				rc = ENODEV;
				break;
			}
		}
	}

	if (!rc) {
		*available_slot = i;
	}
	
	LOG_EXIT_INT(rc);
	return rc;
}

static int sb0_max_disks() {
	return MD_SB_DISKS;
}

static int sb0_calc_sb_csum(mdp_super_t *super)
{
	unsigned int  oldcsum = super->sb_csum;
	unsigned long long newcsum = 0;
	unsigned long csum;
	int i;
	unsigned int *superc = (int*) super;
	super->sb_csum = 0;

	for (i=0; i<MD_SB_BYTES/4; i++)
		newcsum+= superc[i];
	csum = (newcsum& 0xffffffff) + (newcsum>>32);
	super->sb_csum = oldcsum;
	return csum;
}

static int sb0_validate_sb(mdp_super_t *sb)
{
	int old_csum, new_csum;
        LOG_ENTRY();

	if ( ! ( sb->md_magic == MD_SB_MAGIC &&
			 sb->major_version == 0 &&
			 sb->minor_version == 90 ) ) {
		LOG_EXTRA("Bad signature or version\n");
		LOG_EXIT_INT(ENXIO);
		return ENXIO;
	}

	old_csum = sb->sb_csum;

	new_csum = sb0_calc_sb_csum(sb);

	if (new_csum != old_csum) {
		LOG_WARNING("Bad Checksum\n");
	}
	LOG_EXIT_INT(0);
	return 0;
}

static void *sb0_allocate_sb(void)
{
	return EngFncs->engine_alloc(MD_SB_BYTES);
}

static int sb0_duplicate_sb(void **target, void *src)
{
	LOG_ENTRY();
	*target = EngFncs->engine_alloc(MD_SB_BYTES);
	if (!*target) {
		LOG_EXIT_INT(ENOMEM);
		return ENOMEM;
	}
	memcpy(*target, src, MD_SB_BYTES);
	LOG_EXIT_INT(0);
	return 0;
}

static int sb0_init_sb(void *super, int md_minor, int level, u_int32_t layout,
		u_int64_t size, u_int32_t chunk_size)
{
	mdp_super_t *sb = (mdp_super_t *)super;

	LOG_ENTRY();
	memset(sb, 0, MD_SB_BYTES);
	sb->level = level;
	sb->layout = layout;
	sb->size = (u_int32_t)(size/2);
	sb->chunk_size = chunk_size << EVMS_VSECTOR_SIZE_SHIFT;
	sb->events_lo = 1;
	sb->events_hi = 0;
	sb->ctime = sb->utime =  time(NULL);
	sb->major_version = 0;
	sb->minor_version = 90;
	sb->patch_version = 0;
	sb->md_magic = MD_SB_MAGIC;
	sb->md_minor = md_minor;
	sb->not_persistent = 0;
	sb->sb_csum = 0;
	sb->set_uuid0 = get_random();
	sb->set_uuid1 = get_random();
	sb->set_uuid2 = get_random();
	sb->set_uuid3 = get_random();

	LOG_EXIT_INT(0);
	return 0;
}

static int sb0_add_new_disk(md_member_t *member)
{
	int rc = 0;
	mdp_super_t *master_sb = NULL;
	mdp_disk_t *disk;
	int i;

	LOG_ENTRY();
	if (!member || !member->vol) {
		LOG_MD_BUG();
		rc = EINVAL;
		goto out;
	}
	
	master_sb = (mdp_super_t *)member->vol->sb;
	if (!master_sb) {
		LOG_MD_BUG();
		rc = EINVAL;
		goto out;
	}
	
	if (!(member->flags & (MD_MEMBER_DISK_ACTIVE | MD_MEMBER_DISK_SPARE))) {
		LOG_MD_BUG();
		rc = EINVAL;
		goto out;
	}

	master_sb->nr_disks++;
	master_sb->working_disks++;

	i = member->dev_number;
	disk = &master_sb->disks[i];
	disk->major = member->obj->dev_major;
	disk->minor = member->obj->dev_minor;
	disk->number = i;

	if (member->flags & MD_MEMBER_DISK_ACTIVE) {
		master_sb->active_disks++;
		master_sb->raid_disks++;
		disk->raid_disk = master_sb->raid_disks - 1;
		disk->state = (1<<MD_DISK_ACTIVE | 1<<MD_DISK_SYNC);
		if (i==0) {
			master_sb->this_disk = *disk;
		}
		member->raid_disk = master_sb->raid_disks - 1;
	} else if (member->flags & MD_MEMBER_DISK_SPARE) {
		master_sb->spare_disks++;
		disk->raid_disk = i; // 2.4 kernel needs this value
		disk->state = 0;
		member->raid_disk = -1;
	} else {
		LOG_MD_BUG();
		rc = EINVAL;
		goto out;
	}

	member->flags |= MD_MEMBER_NEW;
	member->data_offset = 0;
	member->super_offset = MD_NEW_SIZE_SECTORS(member->obj->size);

	if (member->data_size == -1) {
		member->data_size = md_object_usable_size(member->obj,
							  &member->vol->sb_ver,
							  member->vol->chunksize);
		LOG_DEBUG("Data size of %s is set to %"PRIu64" sectors.\n", 
			  member->obj->name, member->data_size);
	}
out:
	LOG_EXIT_INT(rc);
	return rc;
}

static int sb0_activate_spare(md_member_t *member)
{
	int rc = 0;
	md_volume_t *vol;
	mdp_super_t *master_sb = NULL;
	mdp_disk_t *disk;

	LOG_ENTRY();
	if (!member || !member->vol) {
		LOG_MD_BUG();
		rc = EINVAL;
		goto out;
	}

	vol = member->vol;
	master_sb = (mdp_super_t *)vol->sb;
	if (!master_sb) {
		LOG_MD_BUG();
		rc = EINVAL;
		goto out;
	}
	member->flags = (MD_MEMBER_DISK_ACTIVE | MD_MEMBER_DISK_SYNC);
	member->raid_disk = master_sb->raid_disks;
	master_sb->raid_disks++;
	disk = &master_sb->disks[member->dev_number];
	disk->state = (MD_DISK_ACTIVE | MD_DISK_SYNC);
	disk->raid_disk = member->raid_disk;

out:
	LOG_EXIT_INT(rc);
	return rc;
}

static int sb0_remove_disk(md_member_t *member, boolean resize)
{
	int rc = 0;
	md_volume_t *vol;
	mdp_super_t *master_sb = NULL;
	mdp_disk_t *disk;
	list_element_t iter;
	md_member_t *my_member;

	LOG_ENTRY();
	if (!member || !member->vol) {
		LOG_MD_BUG();
		rc = EINVAL;
		goto out;
	}

	vol = member->vol;
	master_sb = (mdp_super_t *)vol->sb;
	if (!master_sb) {
		LOG_MD_BUG();
		rc = EINVAL;
		goto out;
	}

	if (master_sb->this_disk.number == member->dev_number) {
		// Must set master superblock to another working superblock.
		LIST_FOR_EACH(vol->members, iter, my_member) {
			if (my_member->dev_number != member->dev_number) {
				vol->sb_func->duplicate_sb(&vol->sb, my_member->sb);
				master_sb = (mdp_super_t *)vol->sb;
				break;
			}
		}
	}

	disk = &master_sb->disks[member->dev_number];

	if (resize == TRUE) {
		master_sb->nr_disks--;
		master_sb->raid_disks--;
		LIST_FOR_EACH(vol->members, iter, my_member) {
			if ((my_member != member) &&
			    (my_member->raid_disk > member->raid_disk)) {
				my_member->dev_number--;
				my_member->raid_disk--;
				vol->sb_func->set_this_device_info(my_member);
			}
		}
	} else {
		disk->state = (1<<MD_DISK_REMOVED);
	}
	
	if (member->flags & MD_MEMBER_STALE) {
		master_sb->nr_disks++;
		memset(disk, 0, sizeof(*disk));
	} else if (member->flags & MD_MEMBER_DISK_ACTIVE) {
		master_sb->active_disks--;
		master_sb->working_disks--;
	} else if (member->flags & MD_MEMBER_DISK_SPARE) {
		master_sb->spare_disks--;
		master_sb->working_disks--;
	} else if (member->flags & MD_MEMBER_DISK_FAULTY) {
		master_sb->failed_disks--;
	} else {
		LOG_MD_BUG();
		rc = EINVAL;
		goto out;
	}

	member->raid_disk = -1;
	member->data_offset = -1;
	member->data_size = -1;
	member->flags = MD_MEMBER_DISK_REMOVED;
out:
	LOG_EXIT_INT(rc);
	return rc;
}

static int sb0_replace_disk(md_member_t *member, storage_object_t *new_obj)
{
	int rc = 0;
	mdp_super_t *master_sb = NULL;
	mdp_disk_t *disk;

	LOG_ENTRY();
	if (!member || !member->vol) {
		LOG_MD_BUG();
		rc = EINVAL;
	}
	if (!rc) {
		master_sb = (mdp_super_t *)member->vol->sb;
		if (!master_sb) {
			LOG_MD_BUG();
			rc = EINVAL;
		}
	}
	if (!rc) {
		member->obj = new_obj;
		disk = &master_sb->disks[member->dev_number];
		disk->major = member->obj->dev_major;
		disk->minor = member->obj->dev_minor;
		member->super_offset = MD_NEW_SIZE_SECTORS(member->obj->size);
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/*
 * Set device info in the member's superblock.
 */
static void sb0_set_this_device_info(md_member_t *member)
{
	mdp_super_t *sb;
	mdp_disk_t *d;

	LOG_ENTRY();
	if (!member || !member->sb) {
		LOG_MD_BUG();
		LOG_EXIT_VOID();
		return;
	}

	sb = (mdp_super_t *)member->sb;
	d = &sb->disks[member->dev_number];
	d->raid_disk = d->number;
	if (md_member_is_raid_disk(member)) {
		d->state = 1<<MD_DISK_ACTIVE;
		d->state |= 1<<MD_DISK_SYNC;
		d->raid_disk = member->raid_disk;
	} else {
		if (member->flags & MD_MEMBER_DISK_SPARE) {
			d->state = 0;
		} else if (member->flags & MD_MEMBER_DISK_FAULTY) {
			d->state = 1<<MD_DISK_FAULTY;
			if (member->flags & MD_MEMBER_DISK_REMOVED) {
				d->state |= 1<<MD_DISK_REMOVED;
			}
		}
	}
	sb->this_disk = *d;

	LOG_EXIT_VOID();
}

/*
 * Set device state in the master superblock.
 */
static void sb0_set_this_device_state(md_member_t *member)
{
	mdp_super_t *sb;
	mdp_disk_t *d;

	LOG_ENTRY();
	if (!member || !member->vol || !member->vol->sb) {
		LOG_MD_BUG();
		LOG_EXIT_VOID();
		return;
	}

	sb = (mdp_super_t *)member->vol->sb;
	d = &sb->disks[member->dev_number];
	d->number = member->dev_number;
	d->raid_disk = member->raid_disk;
	d->major = member->obj->dev_major;
	d->minor = member->obj->dev_minor;
	if (md_member_is_raid_disk(member)) {
		d->state = 1<<MD_DISK_ACTIVE;
		d->state |= 1<<MD_DISK_SYNC;
	} else {
		if (member->flags & MD_MEMBER_DISK_SPARE) {
			d->state = 0;
		} else if (member->flags & MD_MEMBER_DISK_FAULTY) {
			d->state = 1<<MD_DISK_FAULTY;
			if (member->flags & MD_MEMBER_DISK_REMOVED) {
				d->state |= 1<<MD_DISK_REMOVED;
			}
		}
	}

	LOG_EXIT_VOID();
}

static void sb0_load_this_device_info(md_member_t *member)
{
	mdp_super_t *sb = NULL;

	LOG_ENTRY();
	if (!member || !member->sb) {
		LOG_MD_BUG();
		LOG_EXIT_VOID();
		return;
	}

	sb = (mdp_super_t *)member->sb;
	member->data_offset = 0;
	member->super_offset = MD_NEW_SIZE_SECTORS(member->obj->size);
	switch (sb->level) {
	case 0:
	case 4:
	case 5:
		member->data_size = MD_CHUNK_ALIGN_NEW_SIZE_SECTORS(
			(sb->chunk_size >> EVMS_VSECTOR_SIZE_SHIFT), 
			member->obj->size);
		break;
	case 1:
		member->data_size = sb->size * 2;
		break;
	default:
		member->data_size = MD_NEW_SIZE_SECTORS(member->obj->size);
		break;
	}
	
	member->recovery_offset = 0;
	member->cnt_corrected_read = 0;
	memset(member->device_uuid, 0, sizeof(member->device_uuid));
	member->dev_number = sb->this_disk.number;
	member->flags = 0;
	member->raid_disk = -1;
	if (disk_faulty(&sb->this_disk)) {
		member->flags |= MD_MEMBER_DISK_FAULTY;
	}
	if (disk_active(&sb->this_disk)) {
		member->raid_disk = sb->this_disk.raid_disk;
		member->flags |= MD_MEMBER_DISK_ACTIVE;
	}
	if (disk_sync(&sb->this_disk)) {
		member->flags |= MD_MEMBER_DISK_SYNC;
	}
	if (disk_spare(&sb->this_disk)) {
		member->flags |= MD_MEMBER_DISK_SPARE;
	}
	if (disk_removed(&sb->this_disk)) {
		member->flags |= MD_MEMBER_DISK_REMOVED;
	}

	LOG_DEBUG(" %s: dev_number(%d), raid_disk(%d), data_offset(%"PRIu64") data_size(%"PRIu64").\n",
		  member->obj->name, member->dev_number, member->raid_disk, member->data_offset, member->data_size);

	LOG_EXIT_VOID();
}

static void sb0_get_name(char *name, void *super)
{
	mdp_super_t *sb = (mdp_super_t *)super;
	sprintf(name, "md/md%d", sb->md_minor);
}

static int sb0_get_dev_number(void *super)
{
	mdp_super_t *sb = (mdp_super_t *)super;
	return sb->this_disk.number;
}

static u_int64_t sb0_get_event(void *super)
{
	mdp_super_t *sb = (mdp_super_t *)super;
	u_int64_t ev = (u_int64_t)sb->events_hi;
	return (ev<<32) | sb->events_lo;
}

static boolean sb0_same_uuid(void *super1, void *super2)
{
	mdp_super_t *sb1 = (mdp_super_t *)super1;
	mdp_super_t *sb2 = (mdp_super_t *)super2;

	LOG_ENTRY();
	if (sb1->set_uuid0 == sb2->set_uuid0 &&
		sb1->set_uuid1 == sb2->set_uuid1 &&
		sb1->set_uuid2 == sb2->set_uuid2 &&
		sb1->set_uuid3 == sb2->set_uuid3 ) {
		LOG_EXIT_BOOL(TRUE);
		return TRUE;
	}
	LOG_EXIT_BOOL(FALSE);
	return FALSE;
}

static int sb0_get_level(void *sb)
{
	mdp_super_t *sb0 = (mdp_super_t *)sb;
	return sb0->level;
}

static int sb0_analyze_sb(md_volume_t *vol)
{
	mdp_super_t *sb = (mdp_super_t *)vol->sb;
	int nr_disks;
	int active_disks;
	int spares;
	int failed_disks;
	int stale_disks;
	md_member_t *member;
	list_element_t iter;

	LOG_ENTRY();

	if (vol->flags & MD_CORRUPT) {
		LOG_WARNING("%s has been set CORRUPT, skip analyzing...\n", vol->name);
		goto out;
	}

	nr_disks = md_volume_count_children(vol);
	active_disks = md_volume_count_active_disks(vol);
	spares = md_volume_count_spare_disks(vol);
	failed_disks = md_volume_count_faulty_disks(vol);
	stale_disks = md_volume_count_stale_disks(vol);
	vol->raid_disks = sb->raid_disks;
	LOG_DEBUG("Analyzing %s (md_minor=%d)...\n", vol->name, vol->md_minor);
	LOG_DEBUG("raid_disks   : superblock(%03d) volume(%03d) counted(---).\n",
		  sb->raid_disks, vol->raid_disks);
	LOG_DEBUG("nr_disks     : superblock(%03d) volume(%03d) counted(%03d).\n",
		  sb->nr_disks, vol->nr_disks, nr_disks);
	LOG_DEBUG("active_disks : superblock(%03d) volume(%03d) counted(%03d).\n",
		  sb->active_disks, vol->active_disks, active_disks);
	LOG_DEBUG("spare_disks  : superblock(%03d) volume(%03d) counted(%03d).\n",
		  sb->spare_disks, vol->spare_disks, spares);
	LOG_DEBUG("failed_disks : superblock(%03d) volume(%03d) counted(%03d).\n",
		  sb->failed_disks, vol->failed_disks, failed_disks);
	LOG_DEBUG("stale_disks  : superblock(---) volume(%03d) counted(%03d).\n",
		  vol->stale_disks, stale_disks);
	LOG_DEBUG("The following devices are members of %s array:\n", vol->name);
	LIST_FOR_EACH(vol->members, iter, member) {
		LOG_DEBUG("%12s: Major=%03d Minor=%03d Number=%03d RaidDisk=%03d State: %s%s%s%s%s\n",
			  member->obj->name, member->obj->dev_major, member->obj->dev_minor,
			  member->dev_number, member->raid_disk,
			  (member->flags & MD_MEMBER_DISK_ACTIVE) ? "active " : "",
			  (member->flags & MD_MEMBER_DISK_SYNC) ? "sync " : "",
			  (member->flags & MD_MEMBER_DISK_SPARE) ? "spare " : "",
			  (member->flags & MD_MEMBER_DISK_FAULTY) ? "faulty " : "",
			  (member->flags & MD_MEMBER_STALE) ? "stale " : "");
	}

	md_analyze_sb(vol, sb->level, sb->raid_disks, sb->chunk_size);

	if (vol->flags & MD_CORRUPT) {
		LOG_CRITICAL("MD region %s is corrupt\n", vol->name);
	}
	if (vol->flags & MD_DEGRADED) {
		LOG_WARNING("MD region %s is degraded\n", vol->name);
	}

out:
	LOG_EXIT_INT(vol->flags);
	return vol->flags;
}

static void sb0_set_utime(void *super)
{
	mdp_super_t *sb = (mdp_super_t *)super;
	sb->utime = time(NULL);
}

static int sb0_mark_disk_faulty(md_member_t *member, boolean mark_removed)
{
	mdp_super_t *master_sb;
	mdp_disk_t *disk;
	int rc = 0;

	LOG_ENTRY();
	if (!member->vol || !member->vol->sb) {
		LOG_MD_BUG();
		rc = EINVAL;
		goto out;
	}

	master_sb = (mdp_super_t *)member->vol->sb;
	if (member->flags & MD_MEMBER_DISK_ACTIVE) {
		master_sb->active_disks--;
		master_sb->working_disks--;
		member->flags &= ~MD_MEMBER_DISK_ACTIVE;
		member->flags |= MD_MEMBER_DISK_FAULTY;
	} else if (member->flags & MD_MEMBER_DISK_SPARE) {
		master_sb->spare_disks--;
		master_sb->working_disks--;
		member->flags &= ~MD_MEMBER_DISK_SPARE;
		member->flags |= MD_MEMBER_DISK_FAULTY;	
	} else {
		LOG_MD_BUG();
		rc = EINVAL;
	}

	if (!rc) {
		disk = &master_sb->disks[member->dev_number];
		disk->state = MD_DISK_FAULTY;
		if (mark_removed == TRUE) {
			disk->state |= MD_DISK_REMOVED;
		}
		if (master_sb->this_disk.number == disk->number) {
			master_sb->this_disk.state = disk->state;
		}
	}
out:
	LOG_EXIT_INT(rc);
	return rc;
}

static void sb0_increment_events(void *super)
{
	mdp_super_t *sb = (mdp_super_t *)super;
	sb->events_lo++;
	if (sb->events_lo == 0) {
		sb->events_hi++;
	}
}

static int sb0_write_sb(md_member_t *member)
{
	int rc = 0;
	int i;
	mdp_super_t *sb;
	mdp_super_t *master;
	storage_object_t *obj;
	md_volume_t *vol = member->vol;
	u_int32_t index = member->dev_number;

	LOG_ENTRY();
	

	obj = member->obj;

	if (!obj) {
		LOG_MD_BUG();
		LOG_EXIT_INT(EINVAL);
		return EINVAL;
	}
	
	if (vol->commit_flag & MD_COMMIT_BACKUP_METADATA) {
		sb = EngFncs->engine_alloc(MD_SB_BYTES);
	} else {
		sb = member->sb;
	}
	if (!sb) {
		LOG_MD_BUG();
		LOG_EXIT_INT(EINVAL);
		return EINVAL;
	}

	if (!vol) {
		LOG_MD_BUG();
		LOG_EXIT_INT(EINVAL);
		return EINVAL;
	}

	master = (mdp_super_t *)vol->sb;
	if (!vol) {
		LOG_MD_BUG();
		LOG_EXIT_INT(EINVAL);
		return EINVAL;
	}

	if ( ! ( master->md_magic == MD_SB_MAGIC &&
			 master->major_version == 0 &&
			 master->minor_version == 90 ) ) {
		LOG_EXIT_INT(EINVAL);
		return EINVAL;
	}

	// copy superblock from master
	memcpy(sb, master, MD_SB_BYTES);

	if (vol->flags & MD_NEW_REGION || member->flags & MD_MEMBER_DISK_PENDING) {
		sb->events_lo = 0; // null out event counter
		sb->events_hi = 0; // as signal to kernel on new devices
	}

	if ((sb->disks[index].major != obj->dev_major) ||
	     (sb->disks[index].minor != obj->dev_minor) ) {
		if (sb->disks[index].major != 0) {
			/* This device number has been changed */
			LOG_DEFAULT("Region %s object (%s) index (%d) :"
				    " changing from major/minor (%d:%d) to (%d:%d)\n",
			    vol->name, obj->name, index,
			    sb->disks[index].major,
			    sb->disks[index].minor,
			    obj->dev_major,
			    obj->dev_minor);
		}
		sb->disks[index].major = obj->dev_major;
		sb->disks[index].minor = obj->dev_minor;
		master->disks[index].major = obj->dev_major;
		master->disks[index].minor = obj->dev_minor;
		if (master->this_disk.number == index) {
			master->this_disk.major = obj->dev_major;
			master->this_disk.minor = obj->dev_minor;
		}
	}

	sb->this_disk = sb->disks[member->dev_number];

	for (i=sb->nr_disks; i<MAX_DISKS(vol); i++) {
		if (!md_volume_find_member(vol, i)) {
			memset(&sb->disks[i], 0, sizeof(mdp_disk_t));
		}
	}

	LOG_DEFAULT("Writing MD Superblock at %"PRIu64" on %s\n",
		    member->super_offset, obj->name);

	if (vol->commit_flag & MD_COMMIT_BACKUP_METADATA) {
		/*
		 * When the MD metadata is restored from backup,
		 * consider the MD region as a newly created region.
		 */
		sb->events_lo = 0;
		sb->events_hi = 0;
		sb->state = 0;
		sb->sb_csum = sb0_calc_sb_csum(sb);
		rc = EngFncs->save_metadata(vol->region->name, obj->name,
					    member->super_offset, MD_SB_SECTORS, (char *)sb);
		if (rc) {
			LOG_ERROR("Can't save backup metadata on %s for MD %s region.\n",
				  obj->name, vol->name);
			goto out;
		}
		EngFncs->engine_free(sb);
	} else {
		sb->sb_csum = sb0_calc_sb_csum(sb);

		if ( WRITE(obj, member->super_offset, MD_SB_SECTORS, (char*)sb)) {
			LOG_ERROR("Error writing superblock to object %s\n", obj->name);
			rc = EIO;
			goto out;
		}
	}
out:
	LOG_EXIT_INT(rc);
	return rc;
}

static int sb0_read_saved_info(md_member_t *member)
{
	u_int64_t location;
	storage_object_t *obj = member->obj;

	LOG_ENTRY();
	if (obj == NULL) {
		LOG_MD_BUG();
		LOG_EXIT_INT(EINVAL);
		return EINVAL;
	}

	if (!member->saved_info) {
		LOG_MD_BUG();
		LOG_EXIT_INT(EINVAL);
		return EINVAL;	
	}
	
	location = member->super_offset;
	location += MD_SB0_SAVED_INFO_SECTOR_OFFSET;
	LOG_DEBUG("Reading MD saved info block at %"PRIu64" on %s\n",
		  location, obj->name);
	if ( READ(obj, location, MD_SAVED_INFO_SECTS,
		   (char*)member->saved_info)) {
		LOG_SERIOUS("Error reading MD saved info to %s.\n", obj->name);
		LOG_EXIT_INT(EIO);
		return EIO;
	}
	LOG_EXIT_INT(0);
	return 0;
}

static int sb0_write_saved_info(md_member_t *member)
{
	u_int64_t location;
	storage_object_t *obj = member->obj;

	LOG_ENTRY();
	if (obj == NULL) {
		LOG_ERROR("Nowhere to write to!\n");
		LOG_EXIT_INT(EINVAL);
		return EINVAL;
	}

	if (!member->saved_info) {
		LOG_ERROR("No Data to write\n");
		LOG_EXIT_INT(EINVAL);
		return EINVAL;	
	}

	member->saved_info->signature = MD_SAVED_INFO_SIGNATURE;

	member->saved_info->csum = md_calc_saved_info_csum(member->saved_info);
	
	location = member->super_offset;
	location += MD_SB0_SAVED_INFO_SECTOR_OFFSET;

	LOG_DEBUG("Writing MD saved info block at %"PRIu64" on %s\n",
		  location, obj->name);
	if ( WRITE(obj, location, MD_SAVED_INFO_SECTS,
		   (char*)member->saved_info)) {
		LOG_SERIOUS("Error writing MD saved info to %s.\n", obj->name);
		LOG_EXIT_INT(EIO);
		return EIO;
	}

	LOG_EXIT_INT(0);
	return 0;
}

static int sb0_zero_superblock(md_member_t *member, boolean now)
{
	mdp_super_t sb;

	if (now == TRUE) {
		memset(&sb, 0, MD_SB_BYTES);
		if ( WRITE(member->obj, member->super_offset, MD_SB_SECTORS, (char*)&sb)) {
			return EIO;
		}
	} else {
		KILL_SECTORS(member->obj, member->super_offset, MD_SB_SECTORS);
	}
	return 0;
}

static int sb0_zero_saved_info(md_member_t *member, boolean now)
{
	md_saved_info_t info;
	u_int64_t location;

	location = MD_NEW_SIZE_SECTORS(member->obj->size);
	location += MD_SB0_SAVED_INFO_SECTOR_OFFSET;

	if (now == TRUE) {
		memset(&info, 0, sizeof(md_saved_info_t));
		if ( WRITE(member->obj, location, MD_SAVED_INFO_SECTS, (char*)&info)) {
			return EIO;
		}
	} else {
		KILL_SECTORS(member->obj, location, MD_SAVED_INFO_SECTS);
	}
	return 0;
}

static struct super_func sb0_handler = {
	activate_spare : sb0_activate_spare,
	add_new_disk : sb0_add_new_disk,
	allocate_sb : sb0_allocate_sb,
	analyze_sb : sb0_analyze_sb,
	calc_volume_size : sb0_calc_volume_size,
	duplicate_sb : sb0_duplicate_sb,
	find_empty_slot : sb0_find_empty_slot,
	get_dev_number : sb0_get_dev_number,
	get_event : sb0_get_event,
	get_level : sb0_get_level,
	get_name : sb0_get_name,
	get_disk_array_info : sb0_get_disk_array_info,
	get_sb_disk_info : sb0_get_sb_disk_info,
	get_sb_disk_info_for_index: sb0_get_sb_disk_info_for_index,
	get_sb_info : sb0_get_sb_info,
	increment_events : sb0_increment_events,
	init_sb : sb0_init_sb,
	load_this_device_info : sb0_load_this_device_info,
	mark_disk_faulty : sb0_mark_disk_faulty,
	max_disks : sb0_max_disks,
	read_saved_info : sb0_read_saved_info,
	remove_disk : sb0_remove_disk,
	replace_disk : sb0_replace_disk,
	set_sb_info : sb0_set_sb_info,
	set_this_device_info : sb0_set_this_device_info,
	set_this_device_state : sb0_set_this_device_state,
	set_utime : sb0_set_utime,
	same_uuid : sb0_same_uuid,
	write_sb : sb0_write_sb,
	write_saved_info : sb0_write_saved_info,
	zero_superblock : sb0_zero_superblock,
	zero_saved_info : sb0_zero_saved_info,
};

/***  MD Version 1 Superblock ***/

/*
 * Calculate the position of the superblock.
 * It is always aligned to a 4K boundary and
 * depeding on minor_version, it can be:
 * 0: At least 8K, but less than 12K, from end of device
 * 1: At start of device
 * 2: 4K from start of device.
*/
static u_int64_t sb1_minor_version_to_super_offset(storage_object_t *obj, int minor_version)
{
	u_int64_t loc = 0;
	switch(minor_version) {
	case 0:
		loc = obj->size;
		loc -= 8*2;
		loc &= ~(4*2-1);
		break;
	case 1:
		loc = 0;
		break;
	case 2:
		loc = 4*2;
		break;
	}
	return loc;
}

static u_int64_t sb1_minor_version_to_data_offset(storage_object_t *obj, int minor_version)
{
	u_int64_t offset;
	switch(minor_version) {
	case 0:
		offset = 0;
		break;
	case 1:
		offset = MD_SB_1_SECTORS;
		break;
	case 2:
		offset = (4*2 + MD_SB_1_SECTORS);
		break;
	default:
		LOG_MD_BUG();
		return -1;
	}
	return offset;
}

static int sb1_super_offset_to_minor_version(storage_object_t *obj, u_int64_t super_offset)
{
	u_int64_t loc = 0;
	int minor_version;

	for (minor_version=0; minor_version <=2; minor_version++) {
		switch(minor_version) {
		case 0:
			loc = obj->size;
			loc -= 8*2;
			loc &= ~(4*2-1);
			break;
		case 1:
			loc = 0;
			break;
		case 2:
			loc = 4*2;
			break;
		}
		if (loc == super_offset) {
			return minor_version;
		}
	}
	return -1;
}


/*
 * Convert a MD superblock 1 in disk endian (little endian) format to the CPU
 * endian format.
 */
static void sb1_disk_to_cpu(mdp_sb_1_t * sb)
{
	int i;

	sb->magic =                DISK_TO_CPU32(sb->magic);
	sb->major_version =        DISK_TO_CPU32(sb->major_version);
	sb->feature_map =          DISK_TO_CPU32(sb->feature_map);
	sb->ctime =                DISK_TO_CPU64(sb->ctime);
	sb->level =                DISK_TO_CPU32(sb->level);
	sb->layout =               DISK_TO_CPU32(sb->layout);
	sb->size =                 DISK_TO_CPU64(sb->size);
	sb->chunksize =            DISK_TO_CPU32(sb->chunksize);
	sb->raid_disks =           DISK_TO_CPU32(sb->raid_disks);
	sb->data_offset =          DISK_TO_CPU64(sb->data_offset);
	sb->data_size =            DISK_TO_CPU64(sb->data_size);
	sb->super_offset =         DISK_TO_CPU64(sb->super_offset);
	sb->recovery_offset =      DISK_TO_CPU64(sb->recovery_offset);
	sb->dev_number =           DISK_TO_CPU32(sb->dev_number);
	sb->cnt_corrected_read =   DISK_TO_CPU32(sb->cnt_corrected_read);
	sb->utime =                DISK_TO_CPU64(sb->utime);
	sb->events =               DISK_TO_CPU64(sb->events);
	sb->resync_offset =        DISK_TO_CPU64(sb->resync_offset);
	//sb->sb_csum =              DISK_TO_CPU32(sb->sb_csum);
	sb->max_dev =              DISK_TO_CPU32(sb->max_dev);
	for (i=0; i<sb->max_dev; i++) {
		sb->dev_roles[i] = DISK_TO_CPU16(sb->dev_roles[i]);
	}
}

/*
 * Convert a MD superblock 1 in CPU endian format to 
 * disk endian (little endian) format.
 *
 * Note: To keep a copy of MD superblock in CPU format, 
 * duplicate it before calling this function.
 */
static void sb1_cpu_to_disk(mdp_sb_1_t * sb)
{
	int i;

	sb->magic =                CPU_TO_DISK32(sb->magic);
	sb->major_version =        CPU_TO_DISK32(sb->major_version);
	sb->feature_map =          CPU_TO_DISK32(sb->feature_map);
	sb->ctime =                CPU_TO_DISK64(sb->ctime);
	sb->level =                CPU_TO_DISK32(sb->level);
	sb->layout =               CPU_TO_DISK32(sb->layout);
	sb->size =                 CPU_TO_DISK64(sb->size);
	sb->chunksize =            CPU_TO_DISK32(sb->chunksize);
	sb->raid_disks =           CPU_TO_DISK32(sb->raid_disks);
	sb->data_offset =          CPU_TO_DISK64(sb->data_offset);
	sb->data_size =            CPU_TO_DISK64(sb->data_size);
	sb->super_offset =         CPU_TO_DISK64(sb->super_offset);
	sb->recovery_offset =      CPU_TO_DISK64(sb->recovery_offset);
	sb->dev_number =           CPU_TO_DISK32(sb->dev_number);
	sb->cnt_corrected_read =   CPU_TO_DISK32(sb->cnt_corrected_read);
	sb->utime =                CPU_TO_DISK64(sb->utime);
	sb->events =               CPU_TO_DISK64(sb->events);
	sb->resync_offset =        CPU_TO_DISK64(sb->resync_offset);
	//sb->sb_csum =              CPU_TO_DISK32(sb->sb_csum);

	for (i=0; i<sb->max_dev; i++) {
		sb->dev_roles[i] = CPU_TO_DISK16(sb->dev_roles[i]);
	}
	sb->max_dev =              CPU_TO_DISK32(sb->max_dev);
}


static void sb1_print_sb(void *super)
{
	mdp_sb_1_t *sb = (mdp_sb_1_t *)super;
	int i;
	LOG_DEBUG("Sizeof structure: %d bytes.\n", sizeof(*sb));
	LOG_DEBUG("magic:           %x.\n", sb->magic);
	LOG_DEBUG("major_version:   %d.\n", sb->major_version);
	LOG_DEBUG("feature_map:     %d.\n", sb->feature_map);
	LOG_DEBUG("set_name:        %s.\n", sb->set_name);
	LOG_DEBUG("level:           %d.\n", sb->level);
	LOG_DEBUG("layout:          %d.\n", sb->layout);
	LOG_DEBUG("size:            %"PRIu64".\n", sb->size);
	LOG_DEBUG("chunksize:       %d.\n", sb->chunksize);
	LOG_DEBUG("raid_disks:      %d.\n", sb->raid_disks);
	LOG_DEBUG("data_offset:     %"PRIu64".\n", sb->data_offset);
	LOG_DEBUG("data_size:       %"PRIu64".\n", sb->data_size);
	LOG_DEBUG("super_offset:    %"PRIu64".\n", sb->super_offset);
	LOG_DEBUG("recovery_offset: %"PRIu64".\n", sb->recovery_offset);
	LOG_DEBUG("dev_number:      %d,\n", sb->dev_number);
	LOG_DEBUG("cnt_corrected_read: %d,\n", sb->cnt_corrected_read);
	LOG_DEBUG("events:          %"PRIu64".\n", sb->events);
	LOG_DEBUG("resync_offset:   %"PRIu64".\n", sb->resync_offset);
	LOG_DEBUG("max_dev:         %d,\n", sb->max_dev);
	for (i=0; i<sb->max_dev; i++) {
		LOG_DEBUG("dev_roles[%03d]:  %X,\n", i, sb->dev_roles[i]);
	}
}

/*
 * sb1_calc_sb_csum
 *
 * Calculate checksum for MD superblock 1 format.
 * Assumption: the superblock is in CPU format.
 */
static unsigned int sb1_calc_sb_csum(mdp_sb_1_t * sb)
{
	unsigned int disk_csum, csum;
	unsigned long long newcsum;
	int size = sizeof(*sb) + (sb->max_dev)*2;
	unsigned int *isuper = (unsigned int*)sb;

	disk_csum = sb->sb_csum;
	sb->sb_csum = 0;
	newcsum = 0;
	for (; size>=4; size -= 4 )
		newcsum += (*isuper++);

	if (size == 2)
		newcsum += (*(unsigned short*) isuper);

	csum = (newcsum & 0xffffffff) + (newcsum >> 32);
	sb->sb_csum = disk_csum;
	return csum;
}

static int sb1_validate_sb(u_int32_t magic, u_int32_t major_version)
{
	LOG_ENTRY();
	if (magic != MD_SB_MAGIC) {
		LOG_DEBUG("Invalid MD magic.\n");
		LOG_EXIT_INT(EINVAL);
		return EINVAL;
	}

	if (major_version != 1) {
		LOG_DEBUG("Invalid MD version.\n");
		LOG_EXIT_INT(EINVAL);
		return EINVAL;
	}

	LOG_EXIT_INT(0);
	return 0;
}

static void *sb1_allocate_sb(void)
{
	return EngFncs->engine_alloc(MD_SB_1_BYTES);
}

static int sb1_duplicate_sb(void **target, void *src)
{
	*target = EngFncs->engine_alloc(MD_SB_1_BYTES);
	if (!*target) {
		return ENOMEM;
	}
	memcpy(*target, src, MD_SB_1_BYTES);
	return 0;
}

static u_int64_t sb1_calc_volume_size(md_volume_t *vol)
{
	mdp_sb_1_t *sb = (mdp_sb_1_t *)vol->sb;
	md_member_t *member;
	list_element_t iter;
	u_int64_t size = 0;
	u_int64_t smallest_size = -1;

	LOG_ENTRY();
	if (vol->flags & MD_CORRUPT) {
		LOG_MD_BUG();
		goto out;
	}

	if (vol->flags & MD_NEEDS_UPDATE_SIZE) {
		LIST_FOR_EACH(vol->members, iter, member) {
			smallest_size = min(smallest_size, member->data_size);
		}
		sb->size = smallest_size;
	}

	switch ((int)sb->level) {
	case MD_LEVEL_RAID0:
	case MD_LEVEL_LINEAR:
		LIST_FOR_EACH(vol->members, iter, member) {
			size += member->data_size;
		}
		break;
	case MD_LEVEL_RAID1:
	case MD_LEVEL_MULTIPATH:
		size = sb->size;
		if (vol->flags & MD_NEEDS_UPDATE_SIZE) {
			LIST_FOR_EACH(vol->members, iter, member) {
				member->data_size = smallest_size;
			}
		}
		break;
	case MD_LEVEL_RAID4:
	case MD_LEVEL_RAID5:
		size = sb->size * (sb->raid_disks - 1);
		if (vol->flags & MD_NEEDS_UPDATE_SIZE) {
			LIST_FOR_EACH(vol->members, iter, member) {
				member->data_size = smallest_size;
			}
		}
		break;
	default:
		LOG_MD_BUG();
		break;
	}
out:
	if (vol->flags & MD_NEEDS_UPDATE_SIZE) {
		LIST_FOR_EACH(vol->members, iter, member) {
			sb = member->sb;
			sb->super_offset = sb1_minor_version_to_super_offset(member->obj, vol->sb_ver.minor_version);
			member->super_offset = sb->super_offset;
		}
	}
	vol->flags &= ~MD_NEEDS_UPDATE_SIZE;
	LOG_EXIT_U64(size);
	return size;
}

static int sb1_add_new_disk(md_member_t *member)
{
	int rc = 0;
	mdp_sb_1_t *sb = NULL;

	LOG_ENTRY();
	if (!member || !member->vol) {
		LOG_MD_BUG();
		rc = EINVAL;
		goto out;
	}
	
	sb = (mdp_sb_1_t *)member->vol->sb;
	if (!sb) {
		LOG_MD_BUG();
		rc = EINVAL;
		goto out;
	}
	sb->dev_number = member->dev_number;

	if (member->data_size == -1) {
		sb->data_size = md_object_usable_size(member->obj, 
						      &member->vol->sb_ver,
						      member->vol->chunksize);
		member->data_size = sb->data_size;
	} else {
		sb->data_size = member->data_size;
	}

	if (member->data_offset == -1) {
		sb->data_offset = sb1_minor_version_to_data_offset(member->obj, 
								   member->vol->sb_ver.minor_version);
		member->data_offset = sb->data_offset;
	} else {
		sb->data_offset = member->data_offset;
	}

	if (member->super_offset == -1) {
		sb->super_offset = sb1_minor_version_to_super_offset(member->obj, 
								     member->vol->sb_ver.minor_version);
		member->super_offset = sb->super_offset;
	} else {
		sb->super_offset = member->super_offset;
	}

	*(u_int32_t *)(sb->device_uuid) = random();
	*(u_int32_t *)(sb->device_uuid+4) = random();
	*(u_int32_t *)(sb->device_uuid+8) = random();
	*(u_int32_t *)(sb->device_uuid+12) = random();
	memcpy(member->device_uuid, sb->device_uuid, sizeof(sb->device_uuid));

	sb->max_dev++;
	if (member->flags & MD_MEMBER_DISK_ACTIVE) {
		sb->dev_roles[member->dev_number] = member->dev_number;
		sb->raid_disks++;
		member->raid_disk = sb->raid_disks - 1;
	} else if (member->flags & MD_MEMBER_DISK_SPARE) {
		sb->dev_roles[member->dev_number] = 0xFFFF;
		member->raid_disk = -1;
	}
	member->flags |= MD_MEMBER_NEW;
	
	LOG_DEBUG("%s: raid_disks(%d), max_dev(%d).\n",
		  sb->set_name, sb->raid_disks, sb->max_dev);
out:
	LOG_EXIT_INT(rc);
	return rc;
}

static int sb1_activate_spare(md_member_t *member)
{
	int rc = 0;
	md_volume_t *vol; 
	mdp_sb_1_t *sb = NULL;

	LOG_ENTRY();
	if (!member || !member->vol) {
		LOG_MD_BUG();
		rc = EINVAL;
		goto out;
	}

	vol = member->vol;
	sb = (mdp_sb_1_t *)vol->sb;
	if (!sb) {
		LOG_MD_BUG();
		rc = EINVAL;
		goto out;
	}
	member->flags = (MD_MEMBER_DISK_ACTIVE | MD_MEMBER_DISK_SYNC);
	member->raid_disk = sb->raid_disks;
	sb->raid_disks++;
	sb->dev_roles[member->dev_number] = member->raid_disk;

out:
	LOG_EXIT_INT(rc);
	return rc;
}

static int sb1_remove_disk(md_member_t *member, boolean resize)
{
	int rc = 0;
	md_volume_t *vol; 
	mdp_sb_1_t *sb = NULL;
	md_member_t *my_member;
	list_element_t iter;

	LOG_ENTRY();
	if (!member || !member->vol) {
		LOG_MD_BUG();
		rc = EINVAL;
		goto out;
	}

	vol = member->vol;
	sb = (mdp_sb_1_t *)vol->sb;
	if (!sb) {
		LOG_MD_BUG();
		rc = EINVAL;
		goto out;
	}

	if (!memcmp(sb->device_uuid, member->device_uuid, sizeof(member->device_uuid))) {
		// Must set master superblock to another working superblock.
		LIST_FOR_EACH(vol->members, iter, my_member) {
			if (my_member->dev_number != member->dev_number) {
				vol->sb_func->duplicate_sb(&vol->sb, my_member->sb);
				sb = (mdp_sb_1_t *)vol->sb;
				break;
			}
		}
	}

	if (resize == TRUE) {
		/*
		 * We are about to remove a disk to satisfy a resize request.
		 * This will decrement the number of raid disks.
		 */
		sb->raid_disks--;
		sb->max_dev--;
		LIST_FOR_EACH(vol->members, iter, my_member) {
			if ((my_member != member) && 
			    (my_member->raid_disk > member->raid_disk)) {
				my_member->dev_number--;
				my_member->raid_disk--;
				vol->sb_func->set_this_device_info(my_member);
			}
		}
	} else {
		/*
		 * How do I mark a disk "removed" the MD ver 1 superblock?
		 * Just mark this disk "faulty" and hope that the kernel
		 * MD driver does not complain.
		 */
		sb->dev_roles[member->dev_number] = 0xFFFE;
	}

	member->raid_disk = -1;
	member->data_offset = -1;
	member->data_size = -1;
	member->flags = MD_MEMBER_DISK_REMOVED;
out:
	LOG_EXIT_INT(rc);
	return rc;
}

static int sb1_replace_disk(md_member_t *member, storage_object_t *new_obj)
{
	int rc = 0;
	mdp_sb_1_t *sb = NULL;
	int minor_version;

	LOG_ENTRY();
	if (!member || !member->vol) {
		LOG_MD_BUG();
		rc = EINVAL;
	}
	if (!rc) {
		sb = (mdp_sb_1_t *)member->vol->sb;
		if (!sb) {
			LOG_MD_BUG();
			rc = EINVAL;
		}
	}

	if (!rc) {
		minor_version = sb1_super_offset_to_minor_version(member->obj, member->super_offset);
		if (minor_version == -1) {
			LOG_MD_BUG();
			rc = EINVAL;
		} else {
			member->obj = new_obj;
			member->super_offset = sb1_minor_version_to_super_offset(member->obj, minor_version);
		}
	}

	LOG_EXIT_INT(rc);
	return rc;
}


static int sb1_find_empty_slot(void *super, u_int32_t *available_slot)
{
	int rc=0;
	int i;
	mdp_sb_1_t *sb = (mdp_sb_1_t *)super;

	LOG_ENTRY();

	/*
	 * Try to find an empty slot starting from raid_disks,
	 * (see kernel md driver code (md.c)
	 */
	i = sb->raid_disks;
	while (i < MD_SB_1_DISKS) {
		if (sb->dev_roles[i] == 0) {
			break;
		}
	}

	if (i == MD_SB_1_DISKS) {
		LOG_CRITICAL("%s array is full.\n", sb->set_name);
		rc = ENODEV;
	} else {
		*available_slot = i;
	}
	
	LOG_EXIT_INT(rc);
	return rc;
}

int sb1_init_sb(void *super, int md_minor, int level, u_int32_t layout,
		u_int64_t size, u_int32_t chunk_size)
{
	mdp_sb_1_t *sb = (mdp_sb_1_t *)super;
	int rc = 0;

	LOG_ENTRY();
	memset(sb, 0, MD_SB_1_BYTES);
	sb->magic = MD_SB_MAGIC;
	sb->major_version = 1;
	sb->feature_map = 0;
	sb->chunksize = chunk_size;
	sb->layout = layout;
	sb->level = level;
	sb->max_dev = 0;
	sprintf(sb->set_name, "md/md%d", md_minor);
	sb->size = size;
	*(u_int32_t *)(sb->set_uuid)    = random();
	*(u_int32_t *)(sb->set_uuid+4)  = random();
	*(u_int32_t *)(sb->set_uuid+8)  = random();
	*(u_int32_t *)(sb->set_uuid+12) = random();

	LOG_EXIT_INT(rc);
	return rc;
}

/*
 * Set device info in the member's superblock
 */
static void sb1_set_this_device_info(md_member_t *member)
{
	mdp_sb_1_t *sb;

	LOG_ENTRY();
	if (!member || !member->sb) {
		LOG_MD_BUG();
		LOG_EXIT_VOID();
		return;
	}

	sb = (mdp_sb_1_t *)member->sb;
	sb->data_offset = member->data_offset;
	sb->data_size = member->data_size;
	sb->super_offset = member->super_offset;
	sb->recovery_offset = member->recovery_offset;
	sb->cnt_corrected_read = member->cnt_corrected_read;
	memcpy(sb->device_uuid, member->device_uuid, sizeof(sb->device_uuid));
	sb->dev_number = member->dev_number;
	if (member->flags & MD_MEMBER_DISK_FAULTY) {
		sb->dev_roles[member->dev_number] = 0xFFFE;
	} else if (member->flags & MD_MEMBER_DISK_SPARE) {
		sb->dev_roles[member->dev_number] = 0xFFFF;
	} else {
		sb->dev_roles[member->dev_number] = member->raid_disk;
	}

	LOG_EXIT_VOID();
}

/*
 * Set device state in the master superblock.
 */
static void sb1_set_this_device_state(md_member_t *member)
{
	mdp_sb_1_t *sb;

	LOG_ENTRY();
	if (!member || !member->vol || !member->vol->sb) {
		LOG_MD_BUG();
		LOG_EXIT_VOID();
		return;
	}

	sb = (mdp_sb_1_t *)member->vol->sb;
	
	if (member->flags & MD_MEMBER_DISK_FAULTY) {
		sb->dev_roles[member->dev_number] = 0xFFFE;
	} else if (member->flags & MD_MEMBER_DISK_SPARE) {
		sb->dev_roles[member->dev_number] = 0xFFFF;
	} else if (md_member_is_raid_disk(member)) {
		sb->dev_roles[member->dev_number] = member->raid_disk;
	} else {
		sb->dev_roles[member->dev_number] = 0xFFFE;
	}

	LOG_EXIT_VOID();
}


static void sb1_load_this_device_info(md_member_t *member)
{
	mdp_sb_1_t *sb;

	LOG_ENTRY();
	if (!member || !member->sb) {
		LOG_MD_BUG();
		LOG_EXIT_VOID();
		return;
	}

	sb = (mdp_sb_1_t *)member->sb;
	member->data_offset = sb->data_offset;
	member->data_size = sb->data_size;
	member->super_offset = sb->super_offset;
	member->recovery_offset = sb->recovery_offset;
	member->cnt_corrected_read = sb->cnt_corrected_read;
	memcpy(member->device_uuid, sb->device_uuid, sizeof(sb->device_uuid));
	member->dev_number = sb->dev_number;
	member->raid_disk = -1;
	if (sb->dev_roles[sb->dev_number] == 0xFFFE) {
		member->flags = MD_MEMBER_DISK_FAULTY;
	} else if (sb->dev_roles[sb->dev_number] == 0xFFFF) {
		member->flags = MD_MEMBER_DISK_SPARE;
	} else {
		member->flags = MD_MEMBER_DISK_ACTIVE;
		member->flags |= MD_MEMBER_DISK_SYNC;
		member->raid_disk = sb->dev_roles[sb->dev_number];
	}

	LOG_EXIT_VOID();
}

/*
 * sb1_get_name
 *
 * Superblock version 1 format does not have MD minor field.
 * Use the set_name field as part of the name.
 */
static void sb1_get_name(char *name, void *super)
{
	mdp_sb_1_t *sb = (mdp_sb_1_t *)super;
	sprintf(name, "%s", sb->set_name);
}

/*
 * sb1_get_minor
 *
 * Superblock version 1 format does not have MD minor field.
 * We try to get it from the set_name field.
 */
static int sb1_get_minor(void *super)
{
	mdp_sb_1_t *sb = (mdp_sb_1_t *)super;
	int md_minor = -1;
	char *ptr = sb->set_name;
	while (ptr) {
		ptr = strstr(ptr, "md");
		if (ptr) {
			if (isdigit(*(ptr+2))) {
				md_minor = atoi(ptr+2);
				break;
			} else {
				ptr += 2;
			}
		}
	}
	return md_minor;
}

static int sb1_get_dev_number(void *super)
{
	mdp_sb_1_t *sb = (mdp_sb_1_t *)super;
	return sb->dev_number;
}

/*
 * Note: mark_removed parameter is not used in this function.
 */
static int sb1_mark_disk_faulty(md_member_t *member, boolean mark_removed)
{
	mdp_sb_1_t *master_sb;
	int rc = 0;

	LOG_ENTRY();
	if (!member->vol || !member->vol->sb) {
		LOG_MD_BUG();
		rc = EINVAL;
		goto out;
	}

	master_sb = (mdp_sb_1_t *)member->vol->sb;
	if (member->flags & MD_MEMBER_DISK_ACTIVE) {
		member->flags &= ~MD_MEMBER_DISK_ACTIVE;
		member->flags |= MD_MEMBER_DISK_FAULTY;
	} else if (member->flags & MD_MEMBER_DISK_SPARE) {
		member->flags &= ~MD_MEMBER_DISK_SPARE;
		member->flags |= MD_MEMBER_DISK_FAULTY;	
	} else {
		LOG_MD_BUG();
		rc = EINVAL;
	}

	if (!rc) {
		master_sb->dev_roles[member->dev_number] = 0xFFFE;
	}
out:
	LOG_EXIT_INT(rc);
	return rc;
}

static void sb1_set_utime(void *super)
{
	mdp_sb_1_t *sb = (mdp_sb_1_t *)super;
	sb->utime = time(NULL);
}

static void sb1_increment_events(void *super)
{
	mdp_sb_1_t *sb = (mdp_sb_1_t *)super;
	sb->events++;
}

static u_int64_t sb1_get_event(void *super)
{
	mdp_sb_1_t *sb = (mdp_sb_1_t *)super;
	return sb->events;
}

static boolean sb1_same_uuid(void *super1, void *super2)
{
	mdp_sb_1_t *sb1 = (mdp_sb_1_t *)super1;
	mdp_sb_1_t *sb2 = (mdp_sb_1_t *)super2;

	LOG_ENTRY();
	if (memcmp(sb1->set_uuid, sb2->set_uuid, sizeof(sb1->set_uuid)) == 0) {
		LOG_EXIT_BOOL(TRUE);
		return TRUE;
	}
	LOG_EXIT_BOOL(FALSE);
	return FALSE;
}

static void sb1_get_sb_disk_info(md_member_t *member, mdu_disk_info_t *info)
{
	mdp_sb_1_t *sb = (mdp_sb_1_t *)member->sb;

	info->number = member->dev_number;
	info->major = member->obj ? member->obj->dev_major : 0;
	info->minor = member->obj ? member->obj->dev_minor : 0;
	info->state = 0;
	switch (sb->dev_roles[info->number]) {
	case 0xFFFF:
		info->raid_disk = -1;
		break;
	case 0xFFFE:
		info->raid_disk = -1;
		info->state |= (1<<MD_DISK_FAULTY);
		break;
	default:
		info->raid_disk = sb->dev_roles[info->number];
		info->state |= (1<<MD_DISK_ACTIVE);
		info->state |= (1<<MD_DISK_SYNC);
	}
}

static void sb1_get_sb_disk_info_for_index(void *super, mdu_disk_info_t *info)
{
	mdp_sb_1_t *sb = (mdp_sb_1_t *)super;

	if (info->number < MD_SB_1_DISKS) {
		info->major = 0;
		info->minor = 0;
		info->state = 0;
		switch (sb->dev_roles[info->number]) {
		case 0xFFFF:
			info->raid_disk = -1;
			break;
		case 0xFFFE:
			info->raid_disk = -1;
			info->state |= (1<<MD_DISK_FAULTY);
			break;
		default:
			info->raid_disk = sb->dev_roles[info->number];
			info->state |= (1<<MD_DISK_ACTIVE);
			info->state |= (1<<MD_DISK_SYNC);
		}
	} else {
		LOG_MD_BUG();
		memset(info, 0, sizeof(*info));
	}
}

static md_array_info_t * sb1_get_disk_array_info(void *super)
{
	mdp_sb_1_t *sb = (mdp_sb_1_t *)super;
	md_array_info_t *array;
	int i;

	LOG_ENTRY();

	array = EngFncs->engine_alloc(sizeof(md_array_info_t) +
				      (sb->max_dev * sizeof(mdu_disk_info_t)));
	if (array) {
		array->raid_disks = sb->raid_disks;
		array->personality = level_to_pers(sb->level);
		array->chunksize = sb->chunksize;
		array->nr_disks = sb->max_dev;

		for (i=0; i<sb->max_dev; i++) {

			array->disk[i].number = i;
			array->disk[i].major = 0;
			array->disk[i].minor = 0;
			array->disk[i].state = 0;

			if (sb->dev_roles[i] == 0xFFFF) {
				array->spare_disks++;
				array->working_disks++;
				array->disk[i].raid_disk = -1;
			} else if (sb->dev_roles[i] == 0xFFFE) {
				array->disk[i].raid_disk = -1;
				array->disk[i].state |= (1<<MD_DISK_FAULTY);
				array->failed_disks++;
			} else {
				array->disk[i].raid_disk = sb->dev_roles[i];
				array->disk[i].state |= (1<<MD_DISK_ACTIVE) | (1<<MD_DISK_SYNC);
				array->active_disks++;
				array->working_disks++;
			}
		}		
	}

	LOG_EXIT_PTR(array);
	return array;
}

static void sb1_get_sb_info(void *super, md_super_info_t *info)
{
	mdp_sb_1_t *sb = (mdp_sb_1_t *)super;
	int i;

	memset(info, 0, sizeof(*info));

	sb1_print_sb(sb);

	info->md_magic = sb->magic;
	info->major_version = sb->major_version;
	switch (sb->super_offset) {
	case 0:
		info->minor_version = 1;
		break;
	case 4*2:
		info->minor_version = 2;
		break;
	default:
		info->minor_version = 0;
	}
	info->patch_version = 0;
	info->set_uuid0 = (u_int32_t)sb->set_uuid[0];
	info->set_uuid1 = (u_int32_t)sb->set_uuid[4];  
	info->set_uuid2 = (u_int32_t)sb->set_uuid[8];
	info->set_uuid3 = (u_int32_t)sb->set_uuid[12];
	info->ctime = (time_t)sb->ctime;
	info->utime = (time_t)sb->utime;
	info->level = sb->level;
	info->size = sb->size;                /* in sectors */
	info->nr_disks = sb->max_dev;
	info->raid_disks = sb->raid_disks;
	for (i=0; i<sb->max_dev; i++) {
		if (sb->dev_roles[i] == 0xFFFF) {
			info->spare_disks++;
			info->working_disks++;
		} else if (sb->dev_roles[i] == 0xFFFE) {
			info->failed_disks++;
		} else {
			info->active_disks++;
			info->working_disks++;
		}
	}
	info->md_minor = sb1_get_minor(super);
	info->not_persistent = 0;
	info->state_flags = MD_SUPER_INFO_CLEAN; /* FIX ME */
	info->sb_csum = sb->sb_csum;
	info->layout = sb->layout;
	info->events = sb->events;
	info->chunksize = sb->chunksize;
	info->this_disk_index = sb->dev_number;
}

/*
 * Change the disk count
 */
static void sb1_set_sb_info(void *super, md_super_info_t *info)
{
	mdp_sb_1_t *sb = (mdp_sb_1_t *)super;

	LOG_ENTRY();

	sb->max_dev = info->nr_disks;
	sb->raid_disks = info->raid_disks;
	LOG_WARNING("Superblock disk counts have been changed,"
		    " max_dev(%03d) raid_disks(%03d)",
		    sb->max_dev, sb->raid_disks);
	LOG_EXIT_VOID();
}

static int sb1_get_level(void *sb)
{
	mdp_sb_1_t *sb1 = (mdp_sb_1_t *)sb;
	return sb1->level;
}

static int sb1_max_disks()
{
	return MD_SB_1_DISKS;
}

static int sb1_analyze_sb(md_volume_t *vol)
{
	mdp_sb_1_t *sb = (mdp_sb_1_t *)vol->sb;
	int nr_disks;
	int active_disks;
	int spares;
	int failed_disks;
	int stale_disks;
	md_member_t *member;
	list_element_t iter;

	LOG_ENTRY();
	if (vol->flags & MD_CORRUPT) {
		LOG_WARNING("%s has been set CORRUPT, skip analyzing...\n", vol->name);
		goto out;
	}

	nr_disks = md_volume_count_children(vol);
	active_disks = md_volume_count_active_disks(vol);
	spares = md_volume_count_spare_disks(vol);
	failed_disks = md_volume_count_faulty_disks(vol);
	stale_disks = md_volume_count_stale_disks(vol);
	vol->raid_disks = sb->raid_disks;
	LOG_DEBUG("Analyzing %s (md_minor=%d)...\n", vol->name, vol->md_minor);
	LOG_DEBUG("raid_disks   : superblock(%03d) volume(%03d) counted(---).\n",
		  sb->raid_disks, vol->raid_disks);
	LOG_DEBUG("nr_disks     : superblock(---) volume(%03d) counted(%03d).\n",
		  vol->nr_disks, nr_disks);
	LOG_DEBUG("active_disks : superblock(---) volume(%03d) counted(%03d).\n",
		  vol->active_disks, active_disks);
	LOG_DEBUG("spare_disks  : superblock(---) volume(%03d) counted(%03d).\n",
		  vol->spare_disks, spares);
	LOG_DEBUG("failed_disks : superblock(---) volume(%03d) counted(%03d).\n",
		  vol->failed_disks, failed_disks);
	LOG_DEBUG("stale_disks  : superblock(---) volume(%03d) counted(%03d).\n",
		  vol->stale_disks, stale_disks);
	LOG_DEBUG("The following devices are members of %s array:\n", vol->name);
	LIST_FOR_EACH(vol->members, iter, member) {
		LOG_DEBUG("%12s: Major=%03d Minor=%03d Number=%03d RaidDisk=%03d State: %s%s%s%s%s\n",
			  member->obj->name, member->obj->dev_major, member->obj->dev_minor,
			  member->dev_number, member->raid_disk,
			  (member->flags & MD_MEMBER_DISK_ACTIVE) ? "active " : "       ",
			  (member->flags & MD_MEMBER_DISK_SYNC) ? "sync " : "     ",
			  (member->flags & MD_MEMBER_DISK_SPARE) ? "spare " : "      ",
			  (member->flags & MD_MEMBER_DISK_FAULTY) ? "faulty " : "       ",
			  (member->flags & MD_MEMBER_STALE) ? "stale " : "      ");
	}

	md_analyze_sb(vol, sb->level, sb->raid_disks, sb->chunksize << EVMS_VSECTOR_SIZE_SHIFT);

	if (vol->flags & MD_CORRUPT) {
		LOG_CRITICAL("MD region %s is corrupt\n", vol->name);
	}
	if (vol->flags & MD_DEGRADED) {
		LOG_WARNING("MD region %s is degraded\n", vol->name);
	}

out:
	LOG_EXIT_INT(vol->flags);
	return vol->flags;
}

static int sb1_write_sb(md_member_t *member)
{
	int rc = 0;
	mdp_sb_1_t *duplicate = NULL;
	md_volume_t *vol;

	LOG_ENTRY();

	if (!member || !member->vol || !member->vol->sb) {
		LOG_MD_BUG();
		rc = EINVAL;
		goto out;
	}
	
	vol = member->vol;

	/* Copy from master superblock */
	rc = sb1_duplicate_sb(&member->sb, vol->sb);
	if (rc) {
		goto out;
	}
	sb1_set_this_device_info(member);
	
	sb1_print_sb(member->sb);

	/*
	 * Duplicate the MD superblock.
	 */

	rc = sb1_duplicate_sb((void **)&duplicate, member->sb);
	if (rc) {
		goto out;
	}

	if (vol->commit_flag & MD_COMMIT_BACKUP_METADATA) {
		/*
		 * When the MD metadata is restored from backup,
		 * consider the MD region as a newly created region.
		 */
		duplicate->events = 0;
		duplicate->resync_offset = 0;
	}

	/*
	 * Convert the MD superblock to disk little endian format
	 * before calculating the CRC.
	 */
	sb1_cpu_to_disk(duplicate);
	duplicate->sb_csum = 0;
	duplicate->sb_csum = sb1_calc_sb_csum(duplicate);

	LOG_DEFAULT("Writing MD Superblock at %"PRIu64" on %s (size=%"PRIu64").\n",
		    member->super_offset, member->obj->name, member->obj->size);

	if (vol->commit_flag & MD_COMMIT_BACKUP_METADATA) {
		rc = EngFncs->save_metadata(vol->region->name, member->obj->name,
					    member->super_offset, MD_SB_1_SECTORS, (char *)duplicate);
		if (rc) {
			LOG_ERROR("Can't save backup metadata on %s for MD %s region.\n",
				  member->obj->name, vol->name);
			goto out;
		}
	} else {
		if ( WRITE(member->obj, member->super_offset, MD_SB_1_SECTORS, (char*)duplicate)) {
			LOG_ERROR("Error writing superblock to object %s\n", member->obj->name);
			rc = EIO;
			goto out;
		}
	}
out:
	if (duplicate) {
		EngFncs->engine_free(duplicate);
	}
	LOG_EXIT_INT(rc);
	return rc;
}

static int sb1_write_saved_info(md_member_t *member)
{
	u_int64_t location;
	storage_object_t *obj = member->obj;

	LOG_ENTRY();
	if (obj == NULL) {
		LOG_MD_BUG();
		LOG_EXIT_INT(EINVAL);
		return EINVAL;
	}

	if (!member->saved_info) {
		LOG_MD_BUG();
		LOG_EXIT_INT(EINVAL);
		return EINVAL;	
	}

	member->saved_info->signature = MD_SAVED_INFO_SIGNATURE;

	member->saved_info->csum = md_calc_saved_info_csum(member->saved_info);
	
	location = member->super_offset;
	location += MD_SB1_SAVED_INFO_SECTOR_OFFSET;

	LOG_DEBUG("Writing MD saved info block at %"PRIu64" on %s\n",
		  location, obj->name);
	if ( WRITE(obj, location, MD_SAVED_INFO_SECTS,
		   (char*)member->saved_info)) {
		LOG_ERROR("Error writing MD saved info to %s.\n", obj->name);
		LOG_EXIT_INT(EIO);
		return EIO;
	}
	LOG_EXIT_INT(0);
	return 0;
}

static int sb1_read_saved_info(md_member_t *member)
{
	u_int64_t location;
	storage_object_t *obj = member->obj;

	LOG_ENTRY();
	if (obj == NULL) {
		LOG_MD_BUG();
		LOG_EXIT_INT(EINVAL);
		return EINVAL;
	}

	if (!member->saved_info) {
		LOG_MD_BUG();
		LOG_EXIT_INT(EINVAL);
		return EINVAL;	
	}
	
	location = member->super_offset;
	location += MD_SB1_SAVED_INFO_SECTOR_OFFSET;
	LOG_DEBUG("Reading MD saved info block at %"PRIu64" on %s\n",
		  location, obj->name);
	if ( READ(obj, location, MD_SAVED_INFO_SECTS,
		   (char*)member->saved_info)) {
		LOG_ERROR("Error reading MD saved info to %s.\n", obj->name);
		LOG_EXIT_INT(EIO);
		return EIO;
	}
	LOG_EXIT_INT(0);
	return 0;
}

static int sb1_zero_superblock(md_member_t *member, boolean now)
{
	mdp_sb_1_t sb;
	
	LOG_ENTRY();
	LOG_DEBUG("Removing MD superblock from %s at %"PRIu64".\n",
		  member->obj->name, member->super_offset);

	if (now == TRUE) {
		memset(&sb, 0, MD_SB_1_BYTES);
		if ( WRITE(member->obj, member->super_offset, MD_SB_1_SECTORS, (char*)&sb)) {
			return EIO;
		}
	} else {
		KILL_SECTORS(member->obj, member->super_offset, MD_SB_1_SECTORS);
	}
	LOG_EXIT_INT(0);
	return 0;
}

static int sb1_zero_saved_info(md_member_t *member, boolean now)
{
	md_saved_info_t info;
	u_int64_t location;

	location = member->super_offset;
	location += MD_SB1_SAVED_INFO_SECTOR_OFFSET;
	LOG_DEBUG("Removing MD saved info from %s at %"PRIu64".\n",
		  member->obj->name, location);

	if (now == TRUE) {
		memset(&info, 0, sizeof(md_saved_info_t));
		if ( WRITE(member->obj, location, MD_SAVED_INFO_SECTS, (char*)&info)) {
			return EIO;
		}
	} else {
		KILL_SECTORS(member->obj, location, MD_SAVED_INFO_SECTS);
	}
	return 0;
}

static struct super_func sb1_handler = {
	activate_spare : sb1_activate_spare,
	add_new_disk : sb1_add_new_disk,
	allocate_sb : sb1_allocate_sb,
	analyze_sb : sb1_analyze_sb,
	calc_volume_size : sb1_calc_volume_size,
	duplicate_sb : sb1_duplicate_sb,
	find_empty_slot : sb1_find_empty_slot,
	get_event : sb1_get_event,
	get_name : sb1_get_name,
	get_level : sb1_get_level,
	get_dev_number : sb1_get_dev_number,
	get_disk_array_info : sb1_get_disk_array_info,
	get_sb_disk_info : sb1_get_sb_disk_info,
	get_sb_disk_info_for_index : sb1_get_sb_disk_info_for_index,
	get_sb_info : sb1_get_sb_info,
	increment_events : sb1_increment_events,
	init_sb : sb1_init_sb,
	load_this_device_info : sb1_load_this_device_info,
	mark_disk_faulty : sb1_mark_disk_faulty,
	max_disks : sb1_max_disks,
	read_saved_info : sb1_read_saved_info,
	remove_disk : sb1_remove_disk,
	replace_disk : sb1_replace_disk,
	same_uuid : sb1_same_uuid,
	set_sb_info : sb1_set_sb_info,
	set_this_device_info : sb1_set_this_device_info,
	set_this_device_state : sb1_set_this_device_state,
	set_utime : sb1_set_utime,
	write_sb : sb1_write_sb,
	write_saved_info : sb1_write_saved_info,
	zero_superblock : sb1_zero_superblock,
	zero_saved_info : sb1_zero_saved_info,
};

struct super_func *sb_handlers[] = {&sb0_handler, &sb1_handler, NULL};

int md_init_sb(md_volume_t *vol,
	       md_sb_ver_t *sb_ver,
	       int level,
	       u_int32_t layout,
	       u_int64_t size,
	       u_int32_t chunksize )
{
	int rc=0;

	LOG_ENTRY();
	vol->sb_ver = *sb_ver;
	switch (sb_ver->major_version) {
	case MD_SB_VER_0:
		vol->sb_func = &sb0_handler;
		break;
	case MD_SB_VER_1:
		vol->sb_func = &sb1_handler;
		break;
	default:
		LOG_MD_BUG();
		rc = EINVAL;
	}

	if (!rc) {
		vol->sb = vol->sb_func->allocate_sb();
		if (!vol->sb) {
			rc = ENOMEM;
		}
	}

	if (!rc) {
		vol->personality = level_to_pers(level);
		vol->chunksize = chunksize;
		rc = vol->sb_func->init_sb(vol->sb, vol->md_minor, level, layout, size, chunksize);
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/*
 * md_volume_add_new_member
 *
 * The steps:
 * - insert member to the MD volume's list
 * - call the superblock handler to add the member as a raid/spare disk
 * - discard all members' existing superblock, then recreate all superblocks
 */
int md_volume_add_new_member(md_volume_t *vol, md_member_t *member)
{
	int rc = 0;
	int rc2;
	md_member_t *my_member;
	list_element_t e = NULL;
	list_element_t iter;
	md_super_info_t info;

	LOG_ENTRY();

	if (!vol || !vol->sb_func || !member || !member->obj) {
		LOG_MD_BUG();
		rc = EINVAL;
		goto out;
	}

	if (member->dev_number == -1) {
		md_volume_get_super_info(vol, &info);
		vol->nr_disks = info.nr_disks;
		member->dev_number = vol->nr_disks;
		LOG_DEBUG("Caller did not specify the disk index, set member's index to %d.\n",
			  member->dev_number);
	}

	e = EngFncs->insert_thing(vol->members, member, INSERT_AFTER, NULL);
	if (!e) {
		LOG_CRITICAL("Can't insert %s into MD volume %s.\n",
			     member->obj->name, vol->name);
		rc =  ENOMEM;
		goto out;
	}
	member->vol = vol;

	rc = vol->sb_func->add_new_disk(member);
	if (rc) {
		goto out_remove;
	}
	
	/* Free all existing superblocks, then re-create from the master */
	LIST_FOR_EACH(vol->members, iter, my_member) {
		if (my_member->sb) {
			EngFncs->engine_free(my_member->sb);
			my_member->sb = NULL;
		}
		rc2 = vol->sb_func->duplicate_sb(&my_member->sb, vol->sb);
		if (!rc2) {
			vol->sb_func->set_this_device_info(my_member);
		} else if (!rc) {
			rc = rc2; //Save and return the first error (if any)
		}
	}

out_remove:
	if (rc && e) {
		member->vol = NULL;
		EngFncs->delete_element(e);
	}
	
	md_volume_get_super_info(vol, &info);
	vol->nr_disks = info.nr_disks;
	vol->raid_disks = info.raid_disks;
	vol->active_disks = info.active_disks;
	vol->spare_disks = info.spare_disks;
	vol->working_disks = info.working_disks;
	vol->failed_disks = info.failed_disks;
	
	LOG_DEBUG("MD region %s: nr_disks(%d) raid_disks(%d) active_disks(%d)"
		  " spare_disks(%d) working_disks(%d) failed_disks(%d).\n",
		  vol->name, vol->nr_disks, vol->raid_disks, vol->active_disks,
		  vol->spare_disks, vol->working_disks, vol->failed_disks);

out:
	LOG_EXIT_INT(rc);
	return rc;
}

/*
 * md_write_sbs_to_disk
 *
 * Write superblocks for all members of the array.
 */
int md_write_sbs_to_disk(md_volume_t * vol)
{
	int rc = 0;
	int rc2;
	int level;
	boolean done = FALSE;
	list_element_t iter;
	md_member_t *member;

	LOG_ENTRY();	
	if (!vol || !vol->region) {
		LOG_EXIT_INT(EFAULT);
		return EFAULT;
	}

	if (!vol->sb) {
		LOG_MD_BUG();
		LOG_EXIT_INT(EINVAL);
		return EINVAL;
	}

	if (!(vol->commit_flag & MD_COMMIT_DONT_CHECK_ACTIVE) &&
		md_is_region_active(vol->region)) {
		LOG_ERROR("Region %s is still active, skip writting superblocks\n",
			  vol->region->name);
		LOG_EXIT_INT(EINVAL);
		return EINVAL;
	}

	vol->sb_func->set_utime(vol->sb);
	vol->sb_func->increment_events(vol->sb);

	//md_print_sb(message_buffer, MD_MESSAGE_BUF_SIZE, vol);
	//LOG_DEBUG("%s\n", message_buffer);
	
	level = vol->sb_func->get_level(vol->sb);

	LIST_FOR_EACH(vol->members, iter, member) {
		if (done == FALSE) {
			rc2 = vol->sb_func->write_sb(member);
			if (!rc2) {
				if (level == MD_LEVEL_MULTIPATH) {
					done = TRUE;
				}
			} else if (!rc) {
				rc = rc2; //Save and return the first error (if any)
			}
		}
		member->flags &= ~(MD_MEMBER_NEW | MD_MEMBER_DISK_PENDING);
	}
	if (!rc) {
		vol->flags &= ~(MD_DIRTY | MD_NEW_REGION);
		vol->commit_flag &= ~(MD_COMMIT_DONT_CHECK_ACTIVE);
	} else {
		vol->flags |= MD_CORRUPT;
	}

	LOG_EXIT_INT(rc);
	return rc;
}

const char * md_volume_get_name(md_volume_t *vol)
{
	vol->sb_func->get_name(vol->name, vol->sb);
	return vol->name;
}

int md_volume_set_master_sb(md_volume_t *vol, md_sb_ver_t *sb_ver, void *sb)
{
	md_super_info_t info;
	int rc = 0;

	LOG_ENTRY();

	if (!sb) {
		LOG_EXIT_INT(EINVAL);
		return EINVAL;
	}

	switch (sb_ver->major_version) {
	case MD_SB_VER_0:
	case MD_SB_VER_1:
		break;
	default:
		LOG_MD_BUG();
		LOG_EXIT_INT(EINVAL);
		return EINVAL;
	}

	if (vol->sb) {
		LOG_DEBUG("%s: Replacing master superblock.\n", vol->name);
		EngFncs->engine_free(vol->sb);
		vol->sb = NULL;
		vol->sb_func = NULL;
	}

	vol->sb_ver = *sb_ver;
	vol->sb_func = sb_handlers[sb_ver->major_version];
	rc = vol->sb_func->duplicate_sb(&vol->sb, sb);
	if (!rc) {
		md_volume_get_super_info(vol, &info);
		vol->md_minor = info.md_minor;
		vol->raid_disks = info.raid_disks;
		vol->personality = level_to_pers(info.level);
		vol->chunksize = info.chunksize;
	}
	LOG_EXIT_INT(rc);
	return rc;
}

int md_read_saved_info(md_member_t *member)
{
	int rc = 0;
	md_saved_info_t *info;
	
	LOG_ENTRY();
	info = EngFncs->engine_alloc(MD_SAVED_INFO_BYTES);
	if (!info) {
		LOG_CRITICAL("No memory to read MD saved info.\n");
		LOG_EXIT_INT(ENOMEM);
		return ENOMEM;
	}

	member->saved_info = info;
	rc = member->vol->sb_func->read_saved_info(member);
	if (!rc) {
		//Check for our signature & csum
		if (info->signature != MD_SAVED_INFO_SIGNATURE) {
			LOG_DEBUG("Not a valid signature.\n");
			rc = EINVAL;
		} else if (info->csum != md_calc_saved_info_csum(info)){
			LOG_DEBUG("Checksum is invalid.\n");
			rc = EINVAL;
		} else if (!(info->flags & (MD_SAVED_INFO_EXPAND_IN_PROGRESS | MD_SAVED_INFO_SHRINK_IN_PROGRESS))) {
			LOG_DEBUG("Saved flag is not EXPAND nor SHRINK.\n");
			rc = EINVAL;
		}
	}

	if (rc) {
		EngFncs->engine_free(info);
		member->saved_info = NULL;
	}
	LOG_EXIT_INT(rc);
	return rc;
}

int md_member_set_sb(md_member_t *member, void *sb)
{
	int rc = 0;

	LOG_ENTRY();

	if (!member || !sb) {
		LOG_EXIT_INT(EINVAL);
		return EINVAL;
	}
	
	if (!member->vol || !member->vol->sb_func) {
		LOG_MD_BUG();
		LOG_EXIT_INT(EINVAL);
		return EINVAL;
	}

	if (member->sb) {
		LOG_DEBUG("%s: Replacing superblock.\n", member->obj->name);
		EngFncs->engine_free(member->sb);
		member->sb = NULL;
	}

	rc = member->vol->sb_func->duplicate_sb(&member->sb, sb);
	if (!rc) {
		member->vol->sb_func->load_this_device_info(member);
	}
	LOG_EXIT_INT(rc);
	return rc;
}

int md_member_get_disk_major(md_member_t *member)
{
	int dev_major;
	
	LOG_ENTRY();
	if (!member->vol) {
		LOG_EXIT_INT(-1);
		return -1;
	}
	switch (member->vol->sb_ver.major_version) {
	case MD_SB_VER_0:
	{
		mdp_super_t *sb = member->sb;
		dev_major = sb->this_disk.major;
		break;
	}
	case MD_SB_VER_1:
		dev_major = member->obj->dev_major;
		break;
	default:
		dev_major = -1;
	}
	LOG_EXIT_INT(dev_major);
	return dev_major;
}

int md_member_get_disk_minor(md_member_t *member)
{
	int dev_minor;
	
	LOG_ENTRY();
	if (!member->vol) {
		LOG_EXIT_INT(-1);
		return -1;
	}
	switch (member->vol->sb_ver.major_version) {
	case MD_SB_VER_0:
	{
		mdp_super_t *sb = member->sb;
		dev_minor = sb->this_disk.minor;
		break;
	}
	case MD_SB_VER_1:
		dev_minor = member->obj->dev_minor;
		break;
	default:
		dev_minor = -1;
	}
	LOG_EXIT_INT(dev_minor);
	return dev_minor;
}

int md_member_get_raid_disk(md_member_t *member)
{
	mdu_disk_info_t info;
	LOG_ENTRY();
	if (!member->vol) {
		LOG_EXIT_INT(-1);
		return -1;
	}
	member->vol->sb_func->get_sb_disk_info(member, &info);
	LOG_EXIT_INT(info.raid_disk);
	return info.raid_disk;
}

u_int64_t md_object_usable_size(storage_object_t *obj, md_sb_ver_t *sb_ver, u_int32_t chunksize)
{
	u_int64_t size;

	if (sb_ver->major_version == MD_SB_VER_0) {
		size = MD_NEW_SIZE_SECTORS(obj->size);
		if (chunksize) {
			size = size & ~(chunksize-1);
		}
		return size;
	} else if (sb_ver->major_version == MD_SB_VER_1) {
		size = obj->size;
		switch (sb_ver->minor_version) {
		case 0:
			size -= 8*2;
			size &= ~(4*2-1);
			break;
		case 1:
			size -= MD_SB_1_SECTORS;
			break;
		case 2:
			size -= (4*2 + MD_SB_1_SECTORS);
			break;
		default:
			LOG_MD_BUG();
			return 0;
		}
		if (chunksize) {
			size = size & ~(chunksize-1);
		}
		return size;
	} else {
		LOG_MD_BUG();
		return 0;
	}
}

void md_volume_set_name(md_volume_t *vol, const char *name)
{
	if (!name) {
		vol->sb_func->get_name(vol->name, vol->sb);
	} else {
		strcpy(vol->name, name);
	}
}

/* 
 * md_read_metadata
 *
 * This function reads MD metadata (superblock, saved info)
 */
static int md_read_metadata(storage_object_t *obj, u_int64_t loc, void *buf, u_int32_t sects)
{
	int rc = 0;
	int fd;

	LOG_ENTRY();
	
	if (obj->data_type != DATA_TYPE) {
		LOG_DETAILS("Object not data type, skipping %s.\n", obj->name);
		LOG_EXIT_INT(rc);
		return rc;
	}

	LOG_DEBUG("Looking for MD metadata at %"PRIu64" on %s.\n", loc, obj->name);
	if (obj->flags & SOFLAG_ACTIVE) {
		/* Make sure that we read what's on disk */
		md_ioctl_flush_buffer_cache(obj);
		fd = EngFncs->open_object(obj, O_RDONLY | O_SYNC);
		if (fd <= 0) {
			LOG_ERROR("Error opening object %s.\n", obj->name);
			rc = EIO;
		} else {
			rc = EngFncs->read_object(obj, fd, buf,
						  sects<<EVMS_VSECTOR_SIZE_SHIFT,
						  loc<<EVMS_VSECTOR_SIZE_SHIFT);
			if (rc == (sects<<EVMS_VSECTOR_SIZE_SHIFT)) {
				rc = 0;
			} else {
				LOG_ERROR("READ from %s failed, rc=%d.\n", obj->name, rc);
				rc = EIO;
			}
			EngFncs->close_object(obj, fd);
		}
	} else {
		//The object is inactive, read via EVMS Engine service
		if ( READ(obj, loc, sects, (char*)buf)) {
			rc = EIO;
		}
	}
	if (rc) {
		LOG_ERROR("Error reading MD metadata from object %s.\n", obj->name);
	}
	LOG_EXIT_INT(rc);
	return rc;
}


int md_read_sb0(storage_object_t *obj, void **super)
{
	int rc=0;
	mdp_super_t *sb;

	LOG_ENTRY();
	sb = EngFncs->engine_alloc(MD_SB_BYTES);
	if (!sb) {
		LOG_CRITICAL("No memory.\n");
		LOG_EXIT_INT(ENOMEM);
		return ENOMEM;
	}

	rc = md_read_metadata(obj, MD_NEW_SIZE_SECTORS(obj->size), sb, MD_SB_SECTORS);
	if (!rc) {
		rc = sb0_validate_sb(sb);
	}
	if (!rc) {
		*super = sb;
	} else {
		LOG_DEBUG("(%s) does not have MD superblock.\n", obj->name);
		EngFncs->engine_free(sb);	
	}
	LOG_EXIT_INT(rc);
	return rc;
}

int md_read_sb1(storage_object_t *obj, void **super, md_sb_ver_t *sb_ver)
{
	int rc=0;
	mdp_sb_1_t *sb;
	u_int64_t loc;
	int minor_version;

	LOG_ENTRY();
	sb = EngFncs->engine_alloc(MD_SB_1_BYTES);
	if (!sb) {
		LOG_CRITICAL("No memory.\n");
		LOG_EXIT_INT(ENOMEM);
		return ENOMEM;
	}

	for (minor_version = 0; minor_version <= 2; minor_version++) {

		loc = sb1_minor_version_to_super_offset(obj, minor_version);

		rc = md_read_metadata(obj, loc, sb, MD_SB_1_SECTORS);
		if (!rc) {
			rc = sb1_validate_sb(DISK_TO_CPU32(sb->magic), DISK_TO_CPU32(sb->major_version));
			if (!rc) {
				if (DISK_TO_CPU64(sb->super_offset) != loc) {
					LOG_WARNING("Invalid MD super offset (%"PRIu64")"
						    " read at lsn (%"PRIu64").\n",
						    DISK_TO_CPU64(sb->super_offset), loc);
					rc = EINVAL;
				}
				if (!rc) {
					sb1_disk_to_cpu(sb);
					sb_ver->major_version = MD_SB_VER_1;
					sb_ver->minor_version = minor_version;
					sb_ver->patchlevel = 0;
					LOG_DEBUG("Found version1 superblock on %s.\n", obj->name);
					sb1_print_sb(sb);
					break;
				}
			}
		}
	}
	if (!rc) {
		*super = sb;
	} else {
		LOG_DEBUG("(%s) does not have MD superblock.\n", obj->name);
		EngFncs->engine_free(sb);	
	}
	LOG_EXIT_INT(rc);
	return rc;
}


