#****************************************************************************
#  ##   ##         #####   #####  ##     **        NoSQL RDBMS - merge      *
#  ###  ##        ####### ####### ##     **        $Revision: 2.1 $			*
#  #### ##        ###     ##   ## ##     ************************************
#  #######  ####  #####   ##   ## ##     **      Carlo Strozzi (c) 1998     *
#  ####### ######   ##### ## # ## ##     ************************************
#  ## #### ##  ##     ### ##  ### ##     **           Adapted by            *
#  ##  ### ###### ####### ######  ###### **          Carlo Strozzi          *
#  ##   ##  ####   #####   #### # ###### **     e-mail: carlos@linux.it     *
#****************************************************************************
#   NoSQL RDBMS, Copyright (C) 1998 Carlo Strozzi.                          *
#   This program comes with ABSOLUTELY NO WARRANTY; for details             *
#   refer to the GNU General Public License.                                *
#****************************************************************************
# Original code: mergetbl,v 2.6 1993/08/24 14:52:45 hobbs
#****************************************************************************

$0 =~ s-.*/-- ;
$DSTG = "..DEL.." ;	# default delete string
# -sub: definition not clear for multiple key values in old_table.
while ( $ARGV[0] =~ /^-/ ){				# Get args
    $_ = shift ;
    if( /^-a.*/ || /^--add$/ ){ $ADD++ ; next ; }
    if( /^-d(.*)/ || /^--delete=?(.*)/ ){ $DEL++ ; $DSTG = $1 if $1 ; next ; }
    if( /^-n.*/ || /^--strip-header$/ ){ $NHDR++ ; next ; }
    if( /^-r.*/ || /^--reverse$/ ){ $REV++ ; next ; }
    if( /^-s.*/ || /^--subset$/ ){ $SUB++ ; next ; }
    if( /^-x.*/ || /^--debug$/ ){ $XBUG++ ; next ; }
    die "\n$0: unknown option: $_\n" ; 
}
die "\n$0: not enough info given, check the documentation\n"
    unless @ARGV >= 2 ;
die "\n$0: can't use both add and delete options\n" if $DEL && $ADD ;
$mgtbl = pop(@ARGV) ;
open( MT, $mgtbl ) || die "\n$0: can't open merge_tbl: $mgtbl\n" ;
while( <STDIN> ){					# read old_tbl header
    print if( $SUB && ! $NHDR ) ;
    if( /^\s*#/ ){	# comment 
	next ; }
    chop ;
    if( ++$lln == 1 ){
	@CN = split( /\t/, $_ );	# col names
	$NC = @CN ;
	next ; }
    @CD = split( /\t/, $_ );		# col definitions
    for (@CD){
	($_) = /(\S+)/ ; }	# keep only 1st word
    last ; }
while( <MT> ){						# read merge_tbl header
    print unless $SUB || $NHDR ;
    if( /^\s*#/ ){	# comment 
	next ; }
    chop ;
    if( ++$mln == 1 ){
	@MCN = split( /\t/, $_ );	# col names
	next ; }
    # @MCD = split( /\t/, $_ );		# col definitions (not used)
    last ; }
unless( $SUB ){
    if( @CN != @MCN ){
	    die "\n$0: different column count in merge_tbl, old_tbl\n" ; }
}
for $col (@ARGV){		# chk column name ndx, set @KEY, #numcmp, $delx
    for( $delx = -1, $k=$i=0 ; $i < @CN ; $i++ ){
	if( $col eq $CN[$i] ){
	    $k++ ;
	    push( @KEY, $i ) ;
	    $x = ($CD[$i] =~ /N/i ? 1 : 0 ) ;
	    push( @numcmp, $x ) ;
	    unless( $MCN[$i] eq $CN[$i] ){	# chk key col same both tbls
		die "$0: key column not same in tables: $col\n" ; }
	    next ; }
	$delx = $i if $delx < 0 ;	# delete column index
    }
    die "\n$0: column name no match: $col\n" unless $k ;
}
if( $SUB ){				# gen %STX (Subset Translation Index)
    cm: for( $j=0 ; $j < @MCN ; $j++ ){
	for( $i=0 ; $i < @KEY ; $i++ ){
	    next cm if $MCN[$j] eq $CN[$KEY[$i]] ; }
	for( $k=$i=0 ; $i < @CN ; $i++ ){ # non KEY col
	    if( $MCN[$j] eq $CN[$i] ){
		$k++ ;
		$STX{$j} = $i ;
		last ; }
	}
	die "\n$0: merge column name no match: $MCN[$j]\n" unless $k ;
    }
}
&read_old ; &read_merge ;
while( 1 ){						# main loop
    if( $eof ){
	if( ! $eofa ){
	    until( $eofa ){
		print $a, "\n" ;
		&read_old ; } }
	if( ! $eofb ){
	    die "\n$0: bad merge_tbl order or format\n" if $SUB ;
	    until( $eofb ){
		print $b, "\n" ;
		&read_merge ; } }
	exit 0 ;
    }
    if( ($c = &cmp_key) < 0 ){	# old < merge
	if( ! $REV ){
	    print $a, "\n" ;
	    &read_old ; }
	else{
	    print $b, "\n" ;
	    &read_merge ; } }
    elsif( $c > 0 ){		# old > merge
	if( $SUB ){
	    die "\n$0: merge_tbl key column NON match: $G[$KEY[0]]\n" ; }
	if( ! $REV ){
	    print $b, "\n" ;
	    &read_merge ; }
	else{
	    print $a, "\n" ;
	    &read_old ; } }
    else{			# old == merge
	&do_replace ; }
}
sub do_replace {				# replace or delete row(s)
    do {
	unless( $DEL && &del_stg ){
	    if( $SUB ){		# merge line b into line a
		@N = @F ;
		while(( $s, $i ) = each %STX){
		    $N[$i] = $G[$s] ; }
		@G = @N ;
		$b = join( "\t", @G ) ; }
	    print $b, "\n" ; }
	@PG = @G ;		# prev @G
	&read_merge ;
    } while( ! $eofb && &same_keyb ) ;
    do {
	print $a, "\n" if $ADD ;
	@PF = @F ;		# prev @F
	&read_old ;
    } while( ! $eofa && &same_keya ) ;
}
sub same_keya {		# return 1 iff curr row a key = prev row a key
    for( $i=0 ; $i < @KEY ; $i++ ){
	$k = $KEY[$i] ;
	if( $numcmp[$i] ){
	    return 0 if $F[$k] != $PF[$k] ; }
	else{
	    return 0 if $F[$k] ne $PF[$k] ; }
    }
    1 ;
}
sub same_keyb {		# return 1 iff curr row b key = prev row b key
    for( $i=0 ; $i < @KEY ; $i++ ){
	$k = $KEY[$i] ;
	if( $numcmp[$i] ){
	    return 0 if $G[$k] != $PG[$k] ; }
	else{
	    return 0 if $G[$k] ne $PG[$k] ; }
    }
    1 ;
}
sub del_stg {			# return 1 iff the delete string is present
    return 1 if $G[$delx] eq $DSTG ;
    0 ;
}
sub cmp_key {		# compares the value of key cols of line a & line b
			# returns -1, 0, 1 if a<b, a==b, or a>b
    for( $i=0 ; $i < @KEY ; $i++ ){
	$k = $KEY[$i] ;
	if( $numcmp[$i] ){
	    if( $F[$k] < $G[$k] ){	# numeric comparsion
		return -1 ; }
	    if( $F[$k] > $G[$k] ){
		return 1 ; }
	}
	else{
	    if( $F[$k] lt $G[$k] ){	# string comparsion
		return -1 ; }
	    if( $F[$k] gt $G[$k] ){
		return 1 ; }
	}
    }
    0 ;
}
sub read_old {			# read next line from old_tbl into $a & @F
    $a = <STDIN> ;
    if( $a ){
	chop $a ;
	@F = split( /\t/, $a, $NC ) ; }
    else{
	$eof++ ; $eofa++ ; }
}
sub read_merge {		# read next line from merge_tbl into $b & @G
    $b = <MT> ;
    if( $b ){
	chop $b ;
	@G = split( /\t/, $b, $NC ) ; }
    else{
	$eof++ ; $eofb++ ; }
}
