/*
** Copyright (C) 1995, 1996, 1997, 1998 Hewlett-Packard Company
** Originally by Kevin Hughes, kev@kevcom.com, 3/11/94
**
** This program and library is free software; you can redistribute it and/or
** modify it under the terms of the GNU (Library) General Public License
** as published by the Free Software Foundation; either version 2
** of the License, or any later version.
**
** This program is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
** GNU (Library) General Public License for more details.
**
** You should have received a copy of the GNU (Library) General Public License
** along with this program; if not, write to the Free Software
** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
**-----------------------------------------------------------------
** Fixed the merge option -M
** G. Hill 3/7/97
**
** Changed readindexline, mergeindexentries, printindexentry and
** added marknumMerge, addtoresultlistMerge, markentrylistMerge,
** ismarkedMerge to add support for METADATA
** G. Hill 3/26/97 ghill@library.berkeley.edu
**
** change sprintf to snprintf to avoid corruption
** added safestrcpy() macro to avoid corruption from strcpy overflow
** SRE 11/17/99
**
*/

#include <assert.h> /* for bug hunting */
#include "swish.h"
#include "merge.h"
#include "error.h"
#include "search.h"
#include "index.h"
#include "string.h"	
#include "hash.h"
#include "mem.h"
#include "docprop.h"

/* The main merge functions - it accepts three file names.
** This is a bit hairy. It basically acts as a zipper,
** zipping up both index files into one.
*/

void readmerge(file1, file2, outfile)
char *file1;
char *file2;
char *outfile;
{
	int i, j, indexfilenum1, indexfilenum2, wordsfilenum1, wordsfilenum2, result, skipwords, skipfiles;
	long limit1, limit2, fileinfo1, fileinfo2, offsetstart, hashstart;
	struct indexentry *ip1, *ip2, *ip3;
	int endip1,endip2;
	struct indexentry *buffer1, *buffer2;
	FILE *fp1, *fp2, *fp3;
	char *filename, *title;
	int size;
	struct metaMergeEntry *metaFile1, *metaFile2;
	int firstTime = 1;

	char *wordchars1,*beginchars1,*endchars1,*ignorelastchar1,*ignorefirstchar1;
	char *wordchars2,*beginchars2,*endchars2,*ignorelastchar2,*ignorefirstchar2;
	char *wordcharsM,*begincharsM,*endcharsM,*ignorelastcharM,*ignorefirstcharM;
	int applyStemmingRules1,applySoundexRules1,minwordlimit1,maxwordlimit1;
	int applyStemmingRules2,applySoundexRules2,minwordlimit2,maxwordlimit2;

	initindexfilehashlist();
	
	/* remapVar is used into addindexfilelist and need to be
	** initialized each time two indexes are merged.
	*/
	remapVar = 0;
	metaFile1 = metaFile2 = NULL;
	
	initmapentrylist();
	
	if ((fp1 = openIndexFileForRead(file1)) == NULL) {
		snprintf(errorstr, MAXSTRLEN, "Couldn't read the index file \"%s\".",
			file1);
		progerr(errorstr);
	}
	if (!isokindexheader(fp1)) {
		snprintf(errorstr, MAXSTRLEN, "\"%s\" has an unknown format.",
			file1);
		progerr(errorstr);
	}
	if ((fp2 = openIndexFileForRead(file2)) == NULL) {
		snprintf(errorstr, MAXSTRLEN, "Couldn't read the index file \"%s\".",
			file2);
		progerr(errorstr);
	}
	if (!isokindexheader(fp2)) {
		snprintf(errorstr, MAXSTRLEN, "\"%s\" has an unknown format.",
			file2);
		progerr(errorstr);
	}

	/* Read index header values from File 1 */
	readheader(fp1);
	wordchars1=estrdup(wordchars);sortstring(wordchars1);
	beginchars1=estrdup(beginchars);sortstring(beginchars1);
	endchars1=estrdup(endchars);sortstring(endchars1);
	ignorelastchar1=estrdup(ignorelastchar);sortstring(ignorelastchar1);
	ignorefirstchar1=estrdup(ignorefirstchar);sortstring(ignorefirstchar1);
	applyStemmingRules1=applyStemmingRules;
	applySoundexRules1=applySoundexRules;
	minwordlimit1=minwordlimit;
	maxwordlimit1=maxwordlimit;
	indexfilenum1=totalfilesheader;
	wordsfilenum1=totalwordsheader;

	/* Read index header values from File 2 */
	readheader(fp2);
	wordchars2=estrdup(wordchars);sortstring(wordchars2);
	beginchars2=estrdup(beginchars);sortstring(beginchars2);
	endchars2=estrdup(endchars);sortstring(endchars2);
	ignorelastchar2=estrdup(ignorelastchar);sortstring(ignorelastchar2);
	ignorefirstchar2=estrdup(ignorefirstchar);sortstring(ignorefirstchar2);
	applyStemmingRules2=applyStemmingRules;
	applySoundexRules2=applySoundexRules;
	minwordlimit2=minwordlimit;
	maxwordlimit2=maxwordlimit;
	indexfilenum2=totalfilesheader;
	wordsfilenum2=totalwordsheader;

	/* Merge values of both files */
	if(strcmp(wordchars1,wordchars2)) {
		printf("warning: WordCharacters do not match. Merging them\n");
		wordcharsM=mergestrings(wordchars1,wordchars2);
		wordchars=SafeStrCopy(wordchars,wordcharsM,&lenwordchars);
		efree(wordcharsM);
	} else {
		wordchars=SafeStrCopy(wordchars,wordchars1,&lenwordchars);
	}
	makelookuptable(wordchars,wordcharslookuptable);
	efree(wordchars1);efree(wordchars2);
	if(strcmp(beginchars1,beginchars2)) {
		printf("warning: BeginCharacters do not match. Merging them\n");
		begincharsM=mergestrings(beginchars1,beginchars2);
		beginchars=SafeStrCopy(beginchars,begincharsM,&lenbeginchars);
		efree(begincharsM);
	} else {
		beginchars=SafeStrCopy(beginchars,beginchars1,&lenbeginchars);
	}
	makelookuptable(beginchars,begincharslookuptable);
	efree(beginchars1);efree(beginchars2);
	if(strcmp(endchars1,endchars2)) {
		printf("warning: EndCharacters do not match. Merging them\n");
		endcharsM=mergestrings(endchars1,endchars2);
		endchars=SafeStrCopy(endchars,endcharsM,&lenendchars);
		efree(endcharsM);
	} else {
		endchars=SafeStrCopy(endchars,endchars1,&lenendchars);
	}
	makelookuptable(endchars,endcharslookuptable);
	efree(endchars1);efree(endchars2);
	if(strcmp(ignorefirstchar1,ignorefirstchar2)) {
		printf("warning: IgnoreFirstChar do not match. Merging them\n");
		ignorefirstcharM=mergestrings(ignorefirstchar1,ignorefirstchar2);
		ignorefirstchar=SafeStrCopy(ignorefirstchar,ignorefirstcharM,&lenignorefirstchar);
		efree(ignorefirstcharM);
	} else {
		ignorefirstchar=SafeStrCopy(ignorefirstchar,ignorefirstchar1,&lenignorefirstchar);
	}
	efree(ignorefirstchar1);efree(ignorefirstchar2);
	if(strcmp(ignorelastchar1,ignorelastchar2)) {
		printf("warning: IgnoreLastChar do not match. Merging them\n");
		ignorelastcharM=mergestrings(ignorelastchar1,ignorelastchar2);
		ignorelastchar=SafeStrCopy(ignorelastchar,ignorelastcharM,&lenignorelastchar);
		efree(ignorelastcharM);
	} else {
		ignorelastchar=SafeStrCopy(ignorelastchar,ignorelastchar1,&lenignorelastchar);
	}
	efree(ignorelastchar1);efree(ignorelastchar2);

	applyStemmingRules=applyStemmingRules1 && applyStemmingRules2;
	applySoundexRules=applySoundexRules1 && applySoundexRules2;
	if(minwordlimit1<minwordlimit2) minwordlimit=minwordlimit1;
	else minwordlimit=minwordlimit2;
	if(maxwordlimit1<maxwordlimit2) maxwordlimit=maxwordlimit1;
	else maxwordlimit=maxwordlimit2;

	ip1 = ip2 = ip3 = NULL;
	endip1=endip2=0;
	buffer1 = buffer2 = NULL;
	if (verbose)
		printf("Counting files... ");
	totalfiles = indexfilenum1 + indexfilenum2;
	totalwords = wordsfilenum1 + wordsfilenum2;
	if (verbose) 
		printf("%d files.\n", indexfilenum1 + indexfilenum2);
	readoffsets(fp1);
	if (verbose) 
		printf("\nReading stopwords file 1...");
	readstopwords(fp1);
	limit1 = offsets[STOPWORDPOS];
	fileinfo1 = offsets[FILELISTPOS];
	metaFile1 = readMergeMeta(metaFile1,fp1);
	
	readoffsets(fp2);
	if (verbose) 
		printf("\nReading stopwords file 2...");
	readstopwords(fp2);
	limit2 = offsets[STOPWORDPOS];
	fileinfo2 = offsets[FILELISTPOS];
	metaFile2 = readMergeMeta(metaFile2,fp2);
	
	/* Create the merged list and modify the
	   individual ones with the new meta index
	*/	   
	metaEntryList = createMetaMerge(metaFile1, metaFile2);
	
	if (verbose)
		printf("\nReading file 1 info ...");
	fseek(fp1, fileinfo1, 0);
	for (i = 1; i <= indexfilenum1; i++) {
		struct docPropertyEntry *docProperties;
		readFileEntry(fp1, &filename, &title, &size, &docProperties);
		addindexfilelist(i, filename, title, size, docProperties, &totalfiles);
		/* swap metaName values for properties */
		swapDocPropertyMetaNames(docProperties, metaFile1);
	}
	if (verbose)
		printf("\nReading file 2 info ...");
	fseek(fp2, fileinfo2, 0);
	for (i = 1; i <= indexfilenum2; i++) {
		struct docPropertyEntry *docProperties;
		readFileEntry(fp2, &filename, &title, &size , &docProperties);
		addindexfilelist(i + indexfilenum1, filename, title, size, docProperties, &totalfiles);
		/* swap metaName values for properties */
		swapDocPropertyMetaNames(docProperties, metaFile2);
	}
	
	if (verbose)
		printf("\nCreating output file ... ");
	if ((fp3 = openIndexFileForWrite(outfile)) == NULL) {
		snprintf(errorstr,
			MAXSTRLEN, "Couldn't write the merged index file \"%s\".",
			outfile);
		progerr(errorstr);
	}
	
	if (verbose)
		printf("\nMerging words... ");
	
	readoffsets(fp1);
	readhashoffsets(fp1);
	readoffsets(fp2);
	readhashoffsets(fp2);
	
	for (i = 0; i < MAXCHARS; i++)
		offsets[i] = 0;
	for (i = 0; i < SEARCHHASHSIZE; i++)
		hashoffsets[i] = 0;
	skipwords = 0;
	while (1) {
		if (buffer1 == NULL) {
			if(endip1) 
				ip1=NULL;
			else ip1 = (struct indexentry *) 
				readindexline(fp1, limit1,metaFile1);
			if (ip1 == NULL) {
				endip1=1;
				if (ip2 == NULL && !firstTime) {
					break;
				}
			}
			buffer1 = ip1;
		}
		firstTime =0;
		if (buffer2 == NULL) {
			if(endip2) 
				ip2=NULL;
			else ip2 = (struct indexentry *) 
				readindexline(fp2, limit2,metaFile2);
			if (ip2 == NULL){
				endip2=1;
				if (ip1 == NULL) {
					break;
				}
			}
			else 
				addfilenums(ip2, indexfilenum1);
			buffer2 = ip2;
		}
		if (ip1 == NULL)
			result = 1;
		else if (ip2 == NULL)
			result = -1;
		else 
			result = strcmp(ip1->word, ip2->word);
		if (!result) {
			ip3 = (struct indexentry *) mergeindexentries(ip1, ip2);
			freeindexentry(ip1);
			freeindexentry(ip2);
			buffer1 = buffer2 = NULL;
			skipwords++;
		}
		else if (result < 0) {
			ip3 = ip1;
			buffer1 = NULL;
		}
		else {
			ip3 = ip2;
			buffer2 = NULL;
		}
		entrylist = (struct entryarray *)addentryMerge(entrylist,ip3);
		freeindexentry(ip3);
	}
	
	if (verbose) {
		if (skipwords)
			printf("%d redundant word%s.\n", skipwords,
			(skipwords == 1) ? "" : "s");
		else
			printf("no redundant words.\n");
	}

	printf("Computing hash table ...\n");fflush(stdout);
		/* Compute hash table for direct access */
	for (i=0; i<SEARCHHASHSIZE; i++) hashentries[i] = NULL;
	computeHash(entrylist);

	if (verbose)
		printf("\nPrinting header... ");

	printheader(fp3, outfile, (totalwords-skipwords), totalfiles,1);

	offsetstart = ftell(fp3);
	for (i = 0; i < MAXCHARS; i++)
		printlong(fp3,(long) 0);
	fputc('\n', fp3);
	
	hashstart = ftell(fp3);
	for (i = 0; i < SEARCHHASHSIZE; i++)
		printlong(fp3,(long) 0);
	fputc('\n', fp3);

	if (verbose)
		printf("\nPrinting words... \n");

	if(verbose)
		printf("Writing index entries ...\n");
	printindex(entrylist,fp3);
	fflush(stdout);

	if(verbose)
		printf("Writing stopwords ...\n");
	printstopwords(fp3);
	fflush(stdout);
	
	if (verbose)
		printf("\nMerging file info... ");
	fflush(stdout);
	
	offsets[FILELISTPOS] = ftell(fp3);
	for (i = j = 1; i <= indexfilenum1 + indexfilenum2; i++)
	{
		if (getmap(i) == j) {
			struct docPropertyEntry* docProperties;
			struct indexfileinfo * fileInfo;
			addtofilehashlist(j++ - 1, ftell(fp3));
			fileInfo = lookupindexfilenum(i, &docProperties);
			writeFileEntry(fileInfo->filename, fileInfo->title, fileInfo->size, fp3, &docProperties);
		}
	}
	
	skipfiles = (indexfilenum1 + indexfilenum2) - totalfiles;
	if (verbose) {
		if (skipfiles)
			printf("%d redundant file%s.\n", skipfiles,
			(skipfiles == 1) ? "" : "s");
		else
			printf("no redundant files.\n");
	}
	if(verbose)
		printf("Writing file offsets ...\n");
	printfileoffsets(fp3);
	if(verbose)
		printf("Writing MetaNames ...\n");
	printMetaNames(fp3);
	fclose(fp3);

	fp3 = openIndexFileForReadAndWrite(outfile);
	fseek(fp3, offsetstart, 0);
	for (i = 0; i < MAXCHARS; i++)
		printlong(fp3,offsets[i]);

	fseek(fp3, hashstart, 0);
	for (i = 0; i < SEARCHHASHSIZE; i++)
		printlong(fp3,hashoffsets[i]);
	printhash(fp3);
	fclose(fp3);
	
	
	fclose(fp1);
	fclose(fp2);
	
	if (verbose)
		printf("\nDone.\n");
}


/* This adds an offset to the file numbers in a particular
** result list. For instance, file 1 has file numbers going from
** 1 to 10, but so does file 2, so I have to add 10 to all the
** file numbers in file 2 before merging.
*/

void addfilenums(ip, num)
struct indexentry *ip;
int num;
{
	struct resultMerge *rp;
	
	rp = ip->result;
	while (rp != NULL) {
		rp->filenum =
			encodefilenum(getmap(decodefilenum(rp->filenum) + num));
		rp = rp->next;
	}
}

/* This reads the next line in the index file and puts the results
** in a result structure.
*/

struct indexentry *readindexline(fp, limit, metaFile)
FILE *fp;
long limit;
struct metaMergeEntry *metaFile;
{
	int i, j, x, rank, filenum, structure,metaName,frequency, *position;
	static int filewordlen=0;
	static char *fileword=NULL;
	struct resultMerge *rp;
	struct indexentry *ip;
	struct metaMergeEntry* tmp;
	long nextposmetaname;
	
	j=rank=filenum=structure=metaName=frequency=0;
	position=NULL;
	nextposmetaname=0L;
	if(!filewordlen) fileword = (char *) emalloc((filewordlen=MAXWORDLEN) + 1);
	
	rp = NULL;
	
	if (limit == ftell(fp))
		return NULL;
	   /* Read Word len */
	uncompress(i,fp);
	if(i > filewordlen) {
		filewordlen = i + 100;
		fileword = (char *) erealloc(fileword,filewordlen + 1);
	}
	fread(fileword,1,i,fp);
	fileword[i]='\0';
	
	/* Jump hash offset */
	readlong(fp);
	
	uncompress(metaName,fp);
	while(metaName) {
		nextposmetaname=readlong(fp);
		do {
			uncompress(filenum,fp);
			uncompress(rank,fp);
			uncompress(structure,fp);
			uncompress(frequency,fp);
			position=(int *)emalloc(frequency*sizeof(int));
			for(j=0;j<frequency;j++){
				uncompress(x,fp);
				position[j] = x;
			}
			/*Need to modify metaName with new list*/
			for(tmp=metaFile;tmp;tmp=tmp->next) {
				if (tmp->oldIndex == metaName) {
					metaName = tmp->newIndex;
					break;
				}
			}
			rp = (struct resultMerge *) addtoresultlistMerge(rp, filenum, rank, structure,metaName,frequency,position);
		} while (ftell(fp)!=nextposmetaname);
		uncompress(metaName,fp);
	} 
	ip = (struct indexentry *) emalloc(sizeof(struct indexentry));
	ip->word = (char *) estrdup(fileword);
	ip->result = rp;
	
	return ip;
}

/* This puts all the file info into a hash table so that it can
** be looked up by its pathname and filenumber. This is how
** we find redundant file information.
*/

void addindexfilelist(num, filename, title, size, docProperties, totalfiles)
int num;
char *filename;
char *title;
int size;
struct docPropertyEntry *docProperties;
int *totalfiles;
{
	int i;
	unsigned hashval;
	static int lenpath=0;
	static char *path=NULL;
	struct indexfileinfo *ip1, *ip2;
	
	if(!lenpath) path = (char *)emalloc((lenpath=MAXSTRLEN) + 1);

	path = SafeStrCopy(path, filename,&lenpath);
	i = lookupindexfilepath(path);
	if (i != -1) {
		*totalfiles = *totalfiles - 1;
		remap(num, i);
		return;
	}
	
	remap(num, remapVar + 1);
	remapVar++;
	
	ip1 = (struct indexfileinfo *) emalloc(sizeof(struct indexfileinfo));
	ip1->filenum = num;
	ip1->filename = (char *) estrdup(filename);
	ip1->title = (char *) estrdup(title);
	ip1->size = size;
	ip1->path = (char *) estrdup(path);
	ip1->docProperties = docProperties;
	
	hashval = bignumhash(num);
	ip1->next = indexfilehashlist[hashval];
	indexfilehashlist[hashval] = ip1;
	
	ip2 = (struct indexfileinfo *) emalloc(sizeof(struct indexfileinfo));
	ip2->filenum = num;
	ip2->filename = (char *) estrdup(filename);
	ip2->title = (char *) estrdup(title);
	ip2->size = size;
	ip2->path = (char *) estrdup(path);
	ip2->docProperties = docProperties; /* two pointers to the same list! - be careful */
	
	hashval = bighash(path);
	ip2->next = indexfilehashlist[hashval];
	indexfilehashlist[hashval] = ip2;
}


/* This returns the file information corresponding to a file number.
*/

struct indexfileinfo *lookupindexfilenum(num, docProperties)
int num;
struct docPropertyEntry** docProperties;
{
	unsigned hashval;
	static struct indexfileinfo *ip;
	
	if (docProperties != NULL)
		*docProperties = NULL;

	hashval = bignumhash(num);
	ip = indexfilehashlist[hashval];
	
	while (ip != NULL) {
		if (ip->filenum == num)
		{
			if (docProperties != NULL)
				*docProperties = ip->docProperties;
			return ip;
		}
		ip = ip->next;
	}
	return NULL;
}

/* This returns the file number corresponding to a pathname.
*/

int lookupindexfilepath(path)
char *path;
{
	unsigned hashval;
	struct indexfileinfo *ip;
	
	hashval = bighash(path);
	ip = indexfilehashlist[hashval];
	
	while (ip != NULL) {
		if (!strcmp(ip->path, path))
			return ip->filenum;
		ip = ip->next;
	}
	return -1;
}

/* This simply concatenates two information lists that correspond
** to a word found in both index files.
*/

struct indexentry *mergeindexentries(ip1, ip2)
struct indexentry *ip1;
struct indexentry *ip2;
{
	struct resultMerge *newrp, *rp1, *rp2;
	struct indexentry *ep;
	
	rp1 = ip1->result;
	rp2 = ip2->result;
	newrp = NULL;
	
	while (rp1 != NULL) {
		newrp = (struct resultMerge *) addtoresultlistMerge(newrp,
			rp1->filenum, rp1->rank, rp1->structure,rp1->metaName,
			rp1->frequency, rp1->position);
		rp1 = rp1->next;
	}
	while (rp2 != NULL) {
		newrp = (struct resultMerge *) addtoresultlistMerge(newrp,
			rp2->filenum, rp2->rank, rp2->structure,rp2->metaName,
			rp2->frequency, rp2->position);
		rp2 = rp2->next;
	}
	
	ep = (struct indexentry *) emalloc(sizeof(struct indexentry));
	ep->word = (char *) estrdup(ip1->word);
	ep->result = newrp;
	
	return ep;
}

/* This prints a new word entry into the merged index file,
** removing redundant file information as it goes along.
*/

struct entryarray *addentryMerge(e,ip)
struct entryarray *e;
struct indexentry *ip;
{
	int i, j, k, isbigger;
	struct resultMerge *rp;
	struct location *lp, *tmplp;
	struct entry *en;

	isbigger=0;

	rp = ip->result;
	if (e == NULL) {
		e = (struct entryarray *) emalloc(sizeof(struct entryarray));
		e->maxsize = SEARCHHASHSIZE;   /* Put what you like */
		e->elist = (struct entry **) emalloc(e->maxsize*sizeof(struct entry *));
		e->currentsize = 1;
		en = (struct entry *) emalloc(sizeof(struct entry));
		en->word = (char *) estrdup(ip->word);
		en->locationlist = NULL;
		tmplp = NULL;
		while (rp != NULL) {
			lp= (struct location *)emalloc(sizeof(struct location));
			lp->filenum = rp->filenum;
			lp->rank = rp->rank;
			lp->frequency = rp->frequency;
			lp->structure = rp->structure;
			lp->metaName = rp->metaName;
			lp->position=(int *)emalloc(rp->frequency*sizeof(int));
			CopyPositions(lp->position,0,rp->position,0,rp->frequency);
			lp->next = NULL;
			if (tmplp) tmplp->next =lp;
			else en->locationlist = lp;
			tmplp = lp;
			rp = rp->next;
		}
		e->elist[0]=en;
	}
	else {
		/* Look for the position to insert using a binary search */
		i=e->currentsize-1;
		j=k=0;
		while(i>=j) {
			k=j+(i-j)/2;
			isbigger = strcmp(ip->word,e->elist[k]->word);
			if(!isbigger) break;
			else if(isbigger > 0) j=k+1;
			else i=k-1;
		}
		if (isbigger == 0) {
			printf("err: Merge.c Internal error\n.\n");
			exit(0);
		}
		en = (struct entry *) emalloc(sizeof(struct entry));
		en->word = (char *) estrdup(ip->word);
		en->locationlist = NULL;
		tmplp = NULL;
		while (rp != NULL) {
			lp= (struct location *)emalloc(sizeof(struct location));
			lp->filenum = rp->filenum;
			lp->rank = rp->rank;
			lp->frequency = rp->frequency;
			lp->structure = rp->structure;
			lp->metaName = rp->metaName;
			lp->position=(int *)emalloc(rp->frequency*sizeof(int));
			CopyPositions(lp->position,0,rp->position,0,rp->frequency);
			lp->next = NULL;
			if (tmplp) tmplp->next =lp;
			else en->locationlist = lp;
			tmplp = lp;
			rp = rp->next;
		}
		if (isbigger > 0) k++;
		e->currentsize++;
		if(e->currentsize==e->maxsize) {
			e->maxsize *=2;
			e->elist=(struct entry **) erealloc(e->elist,e->maxsize*sizeof(struct entry *)); 
		}
		for(i=e->currentsize;i>k;i--) e->elist[i]=e->elist[i-1];
		e->elist[k] = en;
	}
	return e;
}


/* This associates a number with a new number.
** This function is used to remap file numbers from index
** files to a new merged index file.
*/

void remap(oldnum, newnum)
int oldnum;
int newnum;
{
	unsigned hashval;
	struct mapentry *mp;
	
	mp = (struct mapentry *) emalloc(sizeof(struct mapentry));
	mp->oldnum = oldnum;
	mp->newnum = newnum;
	
	hashval = bignumhash(oldnum);
	mp->next = mapentrylist[hashval];
	mapentrylist[hashval] = mp;
}

/* This retrieves the number associated with another.
*/

int getmap(num)
int num;
{
	unsigned hashval;
	struct mapentry *mp;
	
	hashval = bignumhash(num);
	mp = mapentrylist[hashval];
	
	while (mp != NULL) {
		if (mp->oldnum == num)
			return mp->newnum;
		mp = mp->next;
	}
	return num;
}

/* This marks a number as having been printed.
*/

void marknum(num)
int num;
{
	unsigned hashval;
	struct markentry *mp;
	
	mp = (struct markentry *) emalloc(sizeof(struct markentry));
	mp->num = num;
	
	hashval = bignumhash(num);
	mp->next = markentrylist[hashval];
	markentrylist[hashval] = mp;
}

/* Same thing but for merge only */
void marknumMerge(num, metaName)
int num;
int metaName;
{
	unsigned hashval;
	struct markentryMerge *mp;
	
	mp = (struct markentryMerge *) emalloc(sizeof(struct markentryMerge));
	mp->num = num;
	mp->metaName = metaName;
	
	hashval = bignumhash(num);
	mp->next = markentrylistMerge[hashval];
	markentrylistMerge[hashval] = mp;
}      

/* Has a number been printed?
*/

int ismarked(num)
int num;
{
	unsigned hashval;
	struct markentry *mp;
	
	hashval = bignumhash(num);
	mp = markentrylist[hashval];
	
	while (mp != NULL) {
		if (mp->num == num)
			return 1;
		mp = mp->next;
	}
	return 0;
}


int ismarkedMerge(num,metaName)
int num;
int metaName;
{
	unsigned hashval;
	struct markentryMerge *mp;
	
	hashval = bignumhash(num);
	mp = markentrylistMerge[hashval];
	
	while (mp != NULL) {
		if ( (mp->num == num) && (mp->metaName == metaName) )
			return 1;
		mp = mp->next;
	}
	return 0;
}

/* Initialize the marking list.
*/

void initmarkentrylist()
{
	int i;
	struct markentry *mp;
	
	for (i = 0; i < BIGHASHSIZE; i++) {
		mp = markentrylist[i]; /* minor optimization */
		if (mp != NULL)
			efree(mp);
		markentrylist[i] = NULL;
	}
}

/* TAB */
/* gprof suggests that this is a major CPU eater  :-(, that's
   because it gets called a _lot_ rather than because it is inefficient...

   ... and it's probably an indicator that free() is a major CPU hog :-(

   you'd think that putting the NULL assignment into the if() condition
   would be fastest, but either gcc is really stupid, or really smart,
   because gprof showed that unconditionally setting it after reading it
   saved about 10% over setting it unconditionally at the end of the loop
   (that could be sampling error though), and setting it inside the loop
   definitely increased it by 15-20% ... go figure?   - TAB oct/99

   For reference:
   Reading specs from /usr/lib/gcc-lib/i386-redhat-linux/2.7.2.3/specs
   gcc version 2.7.2.3

   hhmm... I wonder if we should consider making it a macro? No, there are
   routines using 1/4 the CPU, getting called 20 times as often (compress)
   so obviously subrouting overhead isn't the issue...

*/

void initmarkentrylistMerge()
{
	int i;
	struct markentryMerge *mp;
	
	for (i = 0; i < BIGHASHSIZE; i++) {
		mp = markentrylistMerge[i];
		markentrylistMerge[i] = NULL; /* TAB */
		if (mp != NULL)
			efree(mp);
	}
}      


/* Initialize the main file list.
*/

void initindexfilehashlist()
{
	int i;
	struct indexfileinfo *ip;
	
	for (i = 0; i < BIGHASHSIZE; i++) {
		ip = indexfilehashlist[i];
		if (ip != NULL)
			efree(ip);
		indexfilehashlist[i] = NULL;
	}
}

/* Initialize the mapentrylist 
*/

void initmapentrylist()
{
	int i;
	struct mapentry *ip;
	
	for (i = 0; i < BIGHASHSIZE; i++) {
		ip = mapentrylist[i];
		if (ip != NULL)
			efree(ip);
		mapentrylist[i] = NULL;
	}
}



/* Frees up used index entries, my best attempt at memory management...
** I still have bytes leaking elsewhere...
*/

void freeindexentry(ip)
struct indexentry *ip;
{
	struct resultMerge *rp, *oldp;
	
	efree(ip->word);
	rp = ip->result;
	while (rp != NULL) {
		oldp = rp;
		if (rp->position) efree(rp->position);
		rp = rp->next;
		efree(oldp);
	}
	efree(ip);
}

/* Translates a file number into something that can be compressed.
*/

/* New and faster from TAB */
/* I tested this to 30,000 and it's return values are identical to the old ones - TAB */
int encodefilenum(num)
int num;
{
       int i = (num -1 ) / 127;
       return num + i;
}

/* Translates a compressed file number into a correct file number.
*/

/* New and faster from TAB */
/* as with the encoding algorithm, I ran the old and the new algorithms
   with 0 to 30,000 as input and their identical in everything but _speed_ :-)+    - TAB - oct/99
*/

int decodefilenum(num)
int num;
{
       int i = (num -1 ) / 128;
       return num - i;
}

/* Similar to addtoresultlist, but also adding the meta name
*/

struct resultMerge *addtoresultlistMerge(rp, filenum, rank, structure, metaName,frequency,position)
struct resultMerge *rp;
int filenum;
int rank;
int structure;
int metaName;
int frequency;
int *position;
{
	struct resultMerge *newnode;
	static struct resultMerge *head;
	
	newnode = (struct resultMerge *) emalloc(sizeof(struct resultMerge));
	newnode->filenum = filenum;
	newnode->rank = rank;
	newnode->structure = structure;
	newnode->metaName = metaName;
	newnode->frequency = frequency;
	newnode->position = (int *)emalloc(frequency*sizeof(int));
	CopyPositions(newnode->position,0,position,0,frequency);
	newnode->next = NULL;
	
	if (rp == NULL)
		rp = newnode;
	else
		head->next = newnode;
	
	head = newnode;
	
	return rp;
}

/* Reads the meta names from the index. Needs to be different from
** readMetaNames because needs to zero out the counter.
*/
struct metaMergeEntry* readMergeMeta(metaFile,fp)
struct metaMergeEntry* metaFile;
FILE* fp;
{     
	int counter, metalen, style;
	static int wordlen=0;
	static char *word=NULL;
	
	if(!wordlen) word = emalloc((wordlen=MAXWORDLEN)+1);
 
	counter = 0;
	fseek(fp, offsets[METANAMEPOS], 0);
	
	 	/* read meta name len
		* No metanames if 0 
		*/
	uncompress(metalen,fp);
	while (metalen) {
		if (metalen>=wordlen) {
			wordlen = metalen +200;
			word = erealloc(word, wordlen+1);
		}
		fread(word,1,metalen,fp);
		word[metalen]='\0';
			/* parse the meta name style:
			* <name>"0   -> normal meta name [default]
			* <name>"1   -> doc property name
			* <name>"2   -> both
			*/
		uncompress(style,fp);
		style--;
		metaFile = addMetaMerge(metaFile, word, (style > 0) ? 1 : 0, (style == 1) ? 1 : 0, &counter);
			/* Next metaName len
			* No more metanames if 0 
			*/
		uncompress(metalen,fp);
	}
	return metaFile;
}

/* Adds an entry to the list of meta names for one index, 
** setting the new index to 0 - it will then be set by 
** createMetaMerge.
*/

struct metaMergeEntry* addMetaMerge(metaFile, metaWord, isDocProp, isOnlyDocProp, counter)
struct metaMergeEntry* metaFile;
char* metaWord;
int isDocProp, isOnlyDocProp;
int* counter;
{
	int i;
	struct metaMergeEntry* newEntry;
	struct metaMergeEntry* tmpEntry;
	
	if (*counter == 0)
		*counter = 2;
	else if ((*counter) == 1 ||  (!((*counter) % 128)) )
		(*counter)++;
	for( i=0; metaWord[i]; i++)
		metaWord[i] =  tolower(metaWord[i]);
	newEntry = (struct metaMergeEntry*) emalloc(sizeof(struct metaMergeEntry));
	newEntry->metaName = (char*)estrdup(metaWord);
	newEntry->isDocProperty = isDocProp;
	newEntry->isOnlyDocProperty = isOnlyDocProp;
	newEntry->oldIndex = (*counter)++;
	newEntry->newIndex = 0;
	newEntry->next = NULL;
	if (metaFile)
    {
		for(tmpEntry=metaFile;tmpEntry->next!=NULL;tmpEntry=tmpEntry->next)
			;
		tmpEntry->next = newEntry;
    }
	else
		metaFile = newEntry;
	
	return metaFile;
}

/* Creates a list of all the meta names in the indexes
*/
struct metaEntry* createMetaMerge(metaFile1, metaFile2)
struct metaMergeEntry* metaFile1;
struct metaMergeEntry* metaFile2;
{
	struct metaMergeEntry* tmpEntry;
	int counter;
	
	metaEntryList = NULL;
	counter = 0;
	for (tmpEntry=metaFile1;tmpEntry;tmpEntry=tmpEntry->next)
		metaEntryList = addMetaMergeList(metaEntryList,tmpEntry,&counter);
	
	for (tmpEntry=metaFile2;tmpEntry;tmpEntry=tmpEntry->next)
		metaEntryList = addMetaMergeList(metaEntryList,tmpEntry,&counter);
	
	return metaEntryList;
}

/* Adds an entry to the merged meta names list and changes the
 ** new index in the idividual file entry
 */

struct metaEntry* addMetaMergeList(metaEntryList,metaFileEntry,count)
struct metaEntry* metaEntryList;
struct metaMergeEntry* metaFileEntry;
int* count;
{
	int i, wordExists, newIndex;
	struct metaEntry* newEntry;
	struct metaEntry* tmpEntry;
	struct metaEntry* last;
	char *metaWord, *compWord;
	int isOnlyDocProperty = 0;
	int isDocProperty = 0;

	newIndex=0;
	last=NULL;	
	wordExists = 0;
	if ((*count) == 0)
		*count = 2;
	else if ((*count) == 1 ||  (!((*count) % 128)) )
		(*count)++;
	metaWord = metaFileEntry->metaName;
	isDocProperty = metaFileEntry->isDocProperty;
	isOnlyDocProperty = metaFileEntry->isOnlyDocProperty;
	
	for( i=0; metaWord[i]; i++)
		metaWord[i] =  tolower(metaWord[i]);
	if (metaEntryList)
    {
		for(tmpEntry=metaEntryList;tmpEntry;tmpEntry=tmpEntry->next)
		{
			if (tmpEntry->next == NULL)
				last = tmpEntry;
			compWord = tmpEntry->metaName;
			if (!strcmp(compWord,metaWord) ) 
			{
				wordExists = 1;
				newIndex = tmpEntry->index;
				/*
				 * Keep the docProperties fields in synch.
				 * The semantics we want for the metaEntry are:
				 *	isDocProperty = 1 if either index is using as PropertyName
				 *	isOnlyDocProperty = 1 if neither index is using as MetaName
				 */
				if (isDocProperty)	/* new entry is docProp, so assert it */
				{
					tmpEntry->isDocProperty = 1;
				}
				if (!isOnlyDocProperty)	/* new entry is not *only* docProp, so unassert that */
				{
					tmpEntry->isOnlyDocProperty = 0;
				}
				break;
			}
		}
		if (wordExists)
		{
			metaFileEntry->newIndex = newIndex;
		}
		else 
		{
			newEntry = (struct metaEntry*) emalloc(sizeof(struct metaEntry));
			newEntry->metaName = (char*)estrdup(metaWord);
			newEntry->index = *count;
			newEntry->next = NULL;
			newEntry->isDocProperty = isDocProperty;
			newEntry->isOnlyDocProperty = isOnlyDocProperty;
			metaFileEntry->newIndex = (*count)++;
			last->next = newEntry;
		}
    }
	else {
		newEntry = (struct metaEntry*) emalloc(sizeof(struct metaEntry));
		newEntry->metaName = (char*)estrdup(metaWord);
		newEntry->index = *count;
		newEntry->next = NULL;
		newEntry->isDocProperty = isDocProperty;	/* init */
		newEntry->isOnlyDocProperty = isOnlyDocProperty;	/* init */
		metaFileEntry->newIndex = (*count)++;
		metaEntryList = newEntry;
	}
	return metaEntryList;
}



  
