/*
 * Copyright 1995,96 Thierry Bousch
 * Licensed under the Gnu Public License, Version 2
 *
 * $Id: Float.c,v 2.2 1996/08/18 09:24:12 bousch Exp $
 *
 * Floating-point numbers
 */

#include <assert.h>
#include <limits.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "saml.h"
#include "saml-errno.h"
#include "mnode.h"
#include "builtin.h"

#define FL(mnode)  ((big_float*)(mnode))
#define OFFSET_EXP (1 << 30)

typedef struct {
	struct mnode_header hdr;
	int blocks;
	int exponent;
	unsigned short d[0];
} big_float;

static s_mnode* float_build (const char*);
static gr_string* float_stringify (s_mnode*);
static s_mnode* float_add (s_mnode*, s_mnode*);
static s_mnode* float_sub (s_mnode*, s_mnode*);
static s_mnode* float_mul (s_mnode*, s_mnode*);
static int float_notzero (s_mnode*);
static int float_isneg (s_mnode*);
static int float_differ (s_mnode*, s_mnode*);
static int float_lessthan (s_mnode*, s_mnode*);
static s_mnode* float_zero (s_mnode*);
static s_mnode* float_negate (s_mnode*);
static s_mnode* float_one (s_mnode*);
static s_mnode* float_invert (s_mnode*);
static s_mnode* float_sqrt (s_mnode*);
static s_mnode* float2float (s_mnode*, s_mnode*);
static s_mnode* integer2float (s_mnode*, s_mnode*);

static unsafe_s_mtype MathType_Float = {
	"Float",
	free, float_build, float_stringify,
	NULL, NULL,
	float_add, float_sub, float_mul, mn_std_div, NULL,
	float_notzero, float_isneg, NULL, float_differ, float_lessthan,
	float_zero, float_negate, float_one, float_invert, float_sqrt
};

void init_MathType_Float (void)
{
	register_mtype(ST_FLOAT, &MathType_Float);
	register_CV_routine(ST_FLOAT, ST_FLOAT, float2float);
	register_CV_routine(ST_INTEGER, ST_FLOAT, integer2float);
}

static inline s_mnode* float_new (int blocks)
{
	return __mnalloc(ST_FLOAT,sizeof(big_float)+blocks*sizeof(short));
}

static s_mnode* float_short (short n, int shift, int blocks)
{
	int negative;
	s_mnode *mn;

	if (blocks < 4)
		blocks = 4;
	if (n == 0) {
		mn = float_new(0);
		FL(mn)->blocks = blocks;
		FL(mn)->exponent = 0;
		return mn;
	}
	mn = float_new(blocks);
	FL(mn)->blocks = blocks;
	negative = (n < 0);
	n = abs(n);
	while ((n & 32768) == 0)
		n += n, --shift;
	memset(FL(mn)->d, 0, blocks * sizeof(short));
	FL(mn)->d[0] = n;
	shift += 16 + OFFSET_EXP;
	FL(mn)->exponent = negative? -shift : shift;
	return mn;
}

static s_mnode* float_zero (s_mnode* model)
{
	return float_short(0, 0, FL(model)->blocks);
}

static s_mnode* float_one (s_mnode* model)
{
	return float_short(1, 0, FL(model)->blocks);
}

static void bump_mantissa (big_float* f)
{
	unsigned short *f_high, *f_low, *p;

	/* Add epsilon to the mantissa */
	f_high = &f->d[0];
	f_low  = &f->d[f->blocks - 1];
	for (p = f_low; p >= f_high; p--)
		if (++(*p) != 0)
			return;
	/* The mantissa has overflowed! Correct it and bump the exponent */
	if (f->exponent > 0)
		++(f->exponent);
	else
		--(f->exponent);
	*f_high = 0x8000;
}

static s_mnode* change_precision (s_mnode* n, int blocks)
{
	int iblocks = FL(n)->blocks;
	s_mnode *mn;

	if (iblocks != blocks) {
		mn = float_new(blocks);
		FL(mn)->blocks = blocks;
		FL(mn)->exponent = FL(n)->exponent;
		if (blocks > iblocks) {
			/* Pad with zeroes */
			memset(FL(mn)->d, 0, blocks * sizeof(short));
			memcpy(FL(mn)->d, FL(n)->d, iblocks * sizeof(short));
		} else {
			/* Truncate number to "blocks" 16-bit blocks,
			   rounding up if appropriate */
			memcpy(FL(mn)->d, FL(n)->d, blocks * sizeof(short));
			if (FL(n)->d[blocks] & 0x8000)
				bump_mantissa(FL(mn));
		}
		return mn;
	}
	/* No change, return the original mnode */
	return copy_mnode(n);
}

static s_mnode* promote_integer (s_mnode* n, int blocks)
{
	int iblocks, sign, exponent;
	int tmp, carry, empty, bits2shift;
	s_mnode *mn, *n1, *n2, *n3, *n4, *divisor;
	unsigned short fblock, *tbl, *tblcurr;
	gr_string *grs;

	grs = mnode_stringify(n);
	iblocks = 1 + (grs->len)/4; free(grs);
	if (blocks == 0)
		blocks = iblocks > 4 ? iblocks : 4;
	if (!mnode_notzero(n))
		return float_short(0, 0, blocks);
	/* Get the absolute value into n1 */
	sign = (mnode_isneg(n) ? -1 : 1);
	n1 = (sign > 0) ? copy_mnode(n) : mnode_negate(n);
	tbl = alloca(iblocks * sizeof(short));
	memset(tbl, 0, iblocks * sizeof(short));
	tblcurr = tbl + iblocks;
	divisor = mnode_build(ST_INTEGER, "65536");
	while (mnode_notzero(n1)) {
		n2 = mnode_div(n1, divisor);
		n3 = mnode_mul(n2, divisor);
		n4 = mnode_sub(n1, n3);
		grs = mnode_stringify(n4);
		grs = grs_append1(grs,'\0');
		*--tblcurr = atoi(grs->s);
		assert(tblcurr >= tbl);
		free(grs);
		unlink_mnode(n1);
		unlink_mnode(n3); unlink_mnode(n4);
		n1 = n2;
	}
	unlink_mnode(n1); unlink_mnode(divisor);
	for (empty = 0; !tbl[empty]; empty++)
		;
	assert(empty < iblocks);
	exponent = 16 * (iblocks - empty);
	memmove(tbl, tbl+empty, (iblocks-empty)*sizeof(short));
	memset(tbl+iblocks-empty, 0, empty*sizeof(short));
	fblock = tbl[0];
	assert(fblock != 0);
	for (bits2shift = 0; (fblock & (1U<<15)) == 0; bits2shift++)
		fblock <<= 1;
	if (bits2shift) {
		exponent -= bits2shift;
		carry = 0;
		tblcurr = tbl + iblocks;
		while (--tblcurr >= tbl) {
			tmp = *tblcurr >> (16 - bits2shift);
			*tblcurr = (*tblcurr << bits2shift) | carry;
			carry = tmp;
		}
		assert(carry == 0);
	}
	mn = float_new(blocks);
	FL(mn)->blocks = blocks;
	FL(mn)->exponent = (exponent + OFFSET_EXP) * sign;
	memset(FL(mn)->d, 0, blocks * sizeof(short));
	memcpy(FL(mn)->d, tbl, iblocks * sizeof(short));
	return mn;
}

static s_mnode* float2float (s_mnode* n, s_mnode* model)
{
	if (!model)
		return copy_mnode(n);
	return change_precision(n, FL(model)->blocks);
}

static s_mnode* integer2float (s_mnode* n, s_mnode* model)
{
	int bl = model? FL(model)->blocks : 0;
	return promote_integer(n, bl);
}

static s_mnode* float_build (const char *str)
{
	s_mnode *m1, *m2, *m3, *ten;
	char *mant, *p, c;
	int places, exponent, blocks, bl0;

	p = mant = alloca(strlen(str)+1);
	places = -INT_MAX;
	exponent = 0;
	/* Copy the mantissa into "mant" */
	while ((c = *str++) != '\0') {
	  switch(c) {
	  case '0': case '1': case '2': case '3': case '4':
	  case '5': case '6': case '7': case '8': case '9':
	  case '-': case '+':
	  	*p++ = c;
  		++places;
	  	continue;
	  case '.':
	  	places = 0;
	  	continue;
	  default:
	  	/* break */
	  }
	  break;
	}
	*p = '\0';
	m1 = mnode_build(ST_INTEGER, mant);
	if (m1->type == ST_VOID)
		return m1;
	m2 = integer2float(m1, NULL);
	unlink_mnode(m1);
	bl0 = FL(m2)->blocks;
	/* Was there a digital point in the mantissa? */
	if (places < 0)
		places = 0;
	/* Now read the exponent */
	if (c == 'e' || c == 'E') {
		exponent = strtol(str, (char**)(&str), 10);
		c = *str++;
	}
	/* And the precision, in blocks */
	if (c == 'p' || c == 'P')
		bl0 = strtol(str, NULL, 10);
	/* Round to the nearest power of two */
	for (blocks = 4; blocks < bl0; blocks += blocks)
		;
	ten = float_short(10, 0, blocks);
	m1 = mnode_power(ten, exponent - places);
	unlink_mnode(ten);
	/* Fix excess of precision in the mantissa */
	m3 = change_precision(m2, blocks);
	unlink_mnode(m2);
	m2 = mnode_mul(m1, m3);
	unlink_mnode(m1); unlink_mnode(m3);
	return m2;
}

static s_mnode* float_truncate (s_mnode *n)
{
	big_float *fl = FL(n);
	s_mnode *so_far, *ptwo, *tmp1, *tmp2;
	int i, binexp;
	char buffer[10];

	so_far = mnode_build(ST_INTEGER, "0");
	if (fl->exponent == 0)
		return so_far;
	binexp = abs(fl->exponent) - OFFSET_EXP;
	if (binexp <= 0)
		return so_far;
	ptwo = mnode_build(ST_INTEGER, "65536");
	for (i = 0; i < fl->blocks; i++) {
		if (binexp >= 16) {
			tmp1 = mnode_mul(so_far, ptwo);
			unlink_mnode(so_far);
			sprintf(buffer, "%d", fl->d[i]);
			tmp2 = mnode_build(ST_INTEGER, buffer);
			so_far = mnode_add(tmp1, tmp2);
			unlink_mnode(tmp1); unlink_mnode(tmp2);
			binexp -= 16;
		} else if (binexp > 0) {
			unlink_mnode(ptwo);
			sprintf(buffer, "%d", 1<<binexp);
			ptwo = mnode_build(ST_INTEGER, buffer);
			tmp1 = mnode_mul(so_far, ptwo);
			unlink_mnode(so_far);
			sprintf(buffer, "%d", fl->d[i] >> (16-binexp));
			tmp2 = mnode_build(ST_INTEGER, buffer);
			so_far = mnode_add(tmp1, tmp2);
			unlink_mnode(tmp1); unlink_mnode(tmp2);
			break;
		} else
			break;
	}
	unlink_mnode(ptwo);

	/* Now check the sign */
	if (fl->exponent > 0)
		return so_far;
	else {
		tmp1 = mnode_negate(so_far);
		unlink_mnode(so_far);
		return tmp1;
	}
}

static s_mnode* float_round (s_mnode *n)
{
	s_mnode *one_half, *n_corr, *result;
	big_float *fl = FL(n);

	if (fl->exponent == 0)
		return mnode_build(ST_INTEGER, "0");

	one_half = float_short(1, -1, fl->blocks);
	if (fl->exponent > 0)
		n_corr = mnode_add(n, one_half);
	else
		n_corr = mnode_sub(n, one_half);
	unlink_mnode(one_half);
	result = float_truncate(n_corr); unlink_mnode(n_corr);
	return result;
}

static gr_string* float_stringify (s_mnode *n)
{
	int digits, mlen, dec_exp;
	big_float *fl;
	s_mnode *ten, *powerten, *n2, *n2i, *n3;
	gr_string *grs, *mantissa;
	char buffer[30];

	grs = new_gr_string(0);
	fl = FL(n);
	if (fl->exponent == 0)
		return grs_append1(grs, '0');
	if (fl->exponent > 0) {
		grs = grs_append1(grs, '+');
		n3 = copy_mnode(n);
	} else {
		grs = grs_append1(grs, '-');
		n3 = mnode_negate(n);
	}
	/* How many decimal digits should we print? */
	digits = (int)(fl->blocks * 16 * 0.30103);

	/* Divide our number by an appropriate power of ten */
	dec_exp = (int)((abs(fl->exponent)-OFFSET_EXP) * 0.30103 - digits);
	ten = float_short(10, 0, fl->blocks);
	powerten = mnode_power(ten, -dec_exp); unlink_mnode(ten);
	n2 = mnode_mul(n3, powerten); unlink_mnode(powerten);
	unlink_mnode(n3);

	/* Now round to the nearest integer */
	n2i = float_round(n2); unlink_mnode(n2);
	mantissa = mnode_stringify(n2i);
	mlen = mantissa->len;
	free(mantissa);

	/* Keep three guard digits */
	digits -= 3;
	assert(mlen > digits);
	/*
	 * We cannot simply take the "digits" first digits of the mantissa
	 * because we want the guard digits to cause the correct rounding.
	 * So we have to increase n2i by 0.5 x 10^(mlen-digits), but we
	 * must be careful because this could cause the mantissa to have
	 * one digit more, e.g. 999,999,940 -> 1,000,000,440. So we must
	 * recompute mlen after adjustment.
	 */
	sprintf(buffer, "5000000000");	/* should be enough! */
	buffer[mlen-digits] = '\0';
	n3 = mnode_build(ST_INTEGER, buffer);
	n2 = mnode_add(n2i, n3);
	unlink_mnode(n2i); unlink_mnode(n3);
	mantissa = mnode_stringify(n2); unlink_mnode(n2);
	mlen = mantissa->len;
	dec_exp += mlen - 1;

	grs = grs_append1(grs, '.');
	grs = grs_append(grs, mantissa->s, digits);
	free(mantissa);
	/* Put the decimal dot after the first digit */
	grs->s[1] = grs->s[2];
	grs->s[2] = '.';
	/* Append the exponent and the precision */
	sprintf(buffer, "e%d", dec_exp);
	grs = grs_append(grs, buffer, strlen(buffer));
	sprintf(buffer, "p%d", fl->blocks);
	grs = grs_append(grs, buffer, strlen(buffer));
	return grs;
}
	
static int float_acompare (s_mnode* n1, s_mnode* n2)
{
	big_float *fl1 = FL(n1), *fl2 = FL(n2);
	unsigned short *d1 = fl1->d, *d2 = fl2->d;
	int bl1 = fl1->blocks, bl2 = fl2->blocks, blocks, i, diff;
	int e1 = abs(fl1->exponent), e2 = abs(fl2->exponent);

	if (e1 == 0 || e2 == 0 || e1 != e2)
		return e1 - e2;
	blocks = (bl1 > bl2) ? bl1 : bl2;
	if (bl1 < blocks) {
		d1 = alloca(blocks*sizeof(short));
		memset(d1, 0, blocks*sizeof(short));
		memcpy(d1, fl1->d, bl1*sizeof(short));
	}
	if (bl2 < blocks) {
		d2 = alloca(blocks*sizeof(short));
		memset(d2, 0, blocks*sizeof(short));
		memcpy(d2, fl2->d, bl2*sizeof(short));
	}
	for (i = 0; i < blocks; i++)
		if ((diff = (int)(*d1++) - (int)(*d2++)) != 0)
			return diff;
	return 0;
}

static s_mnode* float_aadd (s_mnode* n1, s_mnode* n2)
{
	big_float *fl1 = FL(n1), *fl2 = FL(n2), *fl;
	int exp1, exp2, bl1, bl2, blocks;
	int bits2shift, words2shift, carry, tmp, i;
	unsigned short *d1, *d2, *d;
	s_mnode *n;

	exp1 = abs(fl1->exponent) - OFFSET_EXP;
	exp2 = abs(fl2->exponent) - OFFSET_EXP;
	if (exp1 < exp2) {
		/* Swap n1 and n2 */
		fl = fl1; fl1 = fl2; fl2 = fl;
		tmp = exp1; exp1 = exp2; exp2 = tmp;
	}
	assert(exp1 >= exp2);
	bits2shift  = (exp1 - exp2) & 15;
	words2shift = (exp1 - exp2) >> 4;
	d1 = fl1->d;
	d2 = fl2->d;
	bl1 = fl1->blocks;
	bl2 = fl2->blocks;
	blocks = (bl1 > bl2) ? bl1 : bl2;
	if (bl1 < blocks) {
		d1 = alloca(blocks*sizeof(short));
		memset(d1, 0, blocks*sizeof(short));
		memcpy(d1, fl1->d, bl1*sizeof(short));
	}
	if (bl2 < blocks || bits2shift || words2shift) {
		d2 = alloca(blocks*sizeof(short));
		memset(d2, 0, blocks*sizeof(short));
		memcpy(d2, fl2->d, bl2*sizeof(short));
	}
	if (bits2shift) {
		carry = 0;
		for (i = 0; i < blocks; i++) {
			tmp = d2[i] & ((1 << bits2shift) - 1);
			d2[i] = (carry << (16-bits2shift)) |
				(d2[i] >> bits2shift);
			carry = tmp;
		}
	}
	if (words2shift) {
		if (words2shift < blocks) {
			memmove(d2+words2shift, d2,
				(blocks - words2shift) * sizeof(short));
			memset(d2, 0, words2shift * sizeof(short));
		} else
			memset(d2, 0, blocks * sizeof(short));
	}
	/* Now we can add the things together */
	n = float_new(blocks);
	d = FL(n)->d;
	carry = 0;
	for (i = blocks-1; i >= 0; i--) {
		carry += d1[i] + d2[i];
		d[i] = carry;
		carry = carry >> 16;
	}
	if (carry) {
		exp1++;
		for (i = 0; i < blocks; i++) {
			tmp = d[i] & 1;
			d[i] = (carry << 15) | (d[i] >> 1);
			carry = tmp;
		}
	}
	FL(n)->blocks = blocks;
	FL(n)->exponent = exp1 + OFFSET_EXP;
	return n;
}

static s_mnode* float_asub (s_mnode* n1, s_mnode* n2)
{
	big_float *fl1 = FL(n1), *fl2 = FL(n2);
	int exp1, exp2, bl1, bl2, blocks;
	int bits2shift, words2shift, carry, tmp, i;
	unsigned short *d1, *d2, *d;
	s_mnode *n;

	exp1 = abs(fl1->exponent) - OFFSET_EXP;
	exp2 = abs(fl2->exponent) - OFFSET_EXP;
	assert(exp1 >= exp2);
	bits2shift  = (exp1 - exp2) & 15;
	words2shift = (exp1 - exp2) >> 4;
	d1 = fl1->d;
	d2 = fl2->d;
	bl1 = fl1->blocks;
	bl2 = fl2->blocks;
	blocks = (bl1 > bl2)? bl1 : bl2;
	if (bl1 < blocks) {
		d1 = alloca(blocks*sizeof(short));
		memset(d1, 0, blocks*sizeof(short));
		memcpy(d1, fl1->d, bl1*sizeof(short));
	}
	if (bl2 < blocks || bits2shift || words2shift) {
		d2 = alloca(blocks*sizeof(short));
		memset(d2, 0, blocks*sizeof(short));
		memcpy(d2, fl2->d, bl2*sizeof(short));
	}
	if (bits2shift) {
		carry = 0;
		for (i = 0; i < blocks; i++) {
			tmp = d2[i] & ((1 << bits2shift) - 1);
			d2[i] = (carry << (16-bits2shift)) |
				(d2[i] >> bits2shift);
			carry = tmp;
		}
	}
	if (words2shift) {
		if (words2shift < blocks) {
			memmove(d2+words2shift, d2,
				(blocks - words2shift) * sizeof(short));
			memset(d2, 0, words2shift * sizeof(short));
		} else
			memset(d2, 0, blocks * sizeof(short));
	}
	/* Now we can substract the things together */
	n = float_new(blocks);
	d = FL(n)->d;
	carry = 0;
	for (i = blocks-1; i >= 0; i--) {
		d[i] = tmp = d1[i] - carry - d2[i];
		carry = (tmp < 0);
	}
	assert(carry == 0);
	/* Is the mantissa smaller than 0.5 ? */
	for (i = 0; i < blocks; i++)
		if (d[i]) break;
	assert(i < blocks);	/* The difference can't be zero */
	words2shift = i;
	tmp = d[i];
	for (bits2shift = 0; (tmp & (1<<15)) == 0; bits2shift++)
		tmp <<= 1;
	if (bits2shift) {
		carry = 0;
		for (i = blocks-1; i >= 0; i--) {
			tmp = d[i] >> (16-bits2shift);
			d[i] = (d[i] << bits2shift) | carry;
			carry = tmp;
		}
		assert(carry == 0);
	}
	if (words2shift) {
		memmove(d, d+words2shift, (blocks-words2shift)*sizeof(short));
		memset(d+blocks-words2shift, 0, words2shift*sizeof(short));
	}
	assert(d[0] & (1<<15));
	FL(n)->blocks = blocks;
	FL(n)->exponent = exp1 - bits2shift - 16*words2shift + OFFSET_EXP;
	return n;
}

static inline int float_sign (s_mnode *n)
{
	int x = FL(n)->exponent;
	return x ? (x > 0 ? 1 : -1) : 0;
}

static int float_notzero (s_mnode *n)
{
	return (FL(n)->exponent != 0);
}

static int float_isneg (s_mnode *n)
{
	return (FL(n)->exponent < 0);
}

static s_mnode* float_add (s_mnode* n1, s_mnode* n2)
{
	int s1 = float_sign(n1), s2 = float_sign(n2);
	int diff;
	s_mnode *n;

	if (s1 == 0)
		return copy_mnode(n2);
	if (s2 == 0)
		return copy_mnode(n1);
	if (s1 == s2) {
		n = float_aadd(n1, n2);
		if (s1 < 0)
			FL(n)->exponent *= -1;
		return n;
	}
	/* Here n1 and n2 have different signs */
	diff = float_acompare(n1, n2);
	if (diff == 0)
		return float_zero(n1);
	if (diff > 0) {
		/* n1 has greater magnitude than n2 */
		n = float_asub(n1, n2);
		if (s1 < 0)
			FL(n)->exponent *= -1;
	} else {
		n = float_asub(n2, n1);
		if (s2 < 0)
			FL(n)->exponent *= -1;
	}
	return n;
}

static s_mnode* float_sub (s_mnode* n1, s_mnode* n2)
{
	int s1 = float_sign(n1), s2 = float_sign(n2);
	int diff;
	s_mnode *n;

	if (s1 == 0)
		return float_negate(n2);
	if (s2 == 0)
		return copy_mnode(n1);
	if (s1 != s2) {
		n = float_aadd(n1, n2);
		if (s1 < 0)
			FL(n)->exponent *= -1;
			return n;
	}
	/* Here n1 and n2 have the same sign */
	diff = float_acompare(n1, n2);
	if (diff == 0)
		return float_zero(n1);
	if (diff > 0) {
		n = float_asub(n1, n2);
		if (s1 < 0)
			FL(n)->exponent *= -1;
	} else {
		n = float_asub(n2, n1);
		if (s2 > 0)
			FL(n)->exponent *= -1;
	}
	return n;
}

static inline void fl_subs (int n, unsigned short *d1, unsigned short *d2,
	unsigned short *d)
{
	int diff, carry;

	carry = 0;
	d1 += n; d2 += n; d += n;
	
	while (n--) {
		*--d = diff = carry + *--d1 - *--d2;
		carry = -(diff < 0);
	}
	if (carry)
		while (--(*--d) == 65535)
			;
}

static inline void fl_add (int n, unsigned short *d1, unsigned short *d2,
	unsigned short *d)
{
	int sum, carry;

	carry = 0;
	d1 += n; d2 += n; d += n;

	while (n--) {
		*--d = sum = carry + *--d1 + *--d2;
		carry = (sum > 65535);
	}
	if (carry)
		while (++(*--d) == 0)
			;
}

static void fl_fastmul_pow2 (int n, unsigned short *d1, unsigned short *d2,
	unsigned short *d)
{
	unsigned int prod, carry;
	int i, j, k;
	unsigned short *d1a, *d2a, *d12, *d12a, *du1, *du2, *du12;
	int n2, sdu1, sdu2;

	assert((n & (n-1)) == 0);
	if (n <= 8) {
		memset(d, 0, 2 * n * sizeof(short));
		for (i = n-1; i >= 0; i--) {
			if (d1[i] == 0)
				continue;
			carry = 0;
			k = 0;	/* To avoid a spurious warning from GCC */
			for (j = n-1; j >= 0; j--) {
				prod = carry + d[k = i + j + 1] +
					(unsigned int)d1[i]*d2[j];
				d[k] = prod;
				carry = prod >> 16;
			}
			while (carry) {
				prod = carry + d[--k];
				d[k] = prod;
				carry = prod >> 16;
			}
		}
		return;
	}
	/* For bigger numbers, we use Karatsuba's method */
	n2 = n / 2;
	d1a = d1 + n2;
	d2a = d2 + n2;
	d12  = alloca(n * sizeof(short));
	d12a = alloca(n * sizeof(short));
	fl_fastmul_pow2(n2, d1, d2, d12);
	fl_fastmul_pow2(n2, d1a, d2a, d12a);
	sdu1 = 1;	/* sign(d1-d1a) */
	for (i = 0; i < n2; i++) {
		if (d1[i] > d1a[i])
			break;
		if (d1[i] < d1a[i]) {
			sdu1 = -1;
			break;
		}
	}
	sdu2 = 1;
	for (i = 0; i < n2; i++) {
		if (d2[i] > d2a[i])
			break;
		if (d2[i] < d2a[i]) {
			sdu2 = -1;
			break;
		}
	}
	du1 = alloca(n2 * sizeof(short));
	if (sdu1 > 0)
		fl_subs(n2, d1, d1a, du1);
	else
		fl_subs(n2, d1a, d1, du1);
	du2 = alloca(n2 * sizeof(short));
	if (sdu2 > 0)
		fl_subs(n2, d2, d2a, du2);
	else
		fl_subs(n2, d2a, d2, du2);
	du12 = alloca(n * sizeof(short));
	fl_fastmul_pow2(n2, du1, du2, du12);
	memcpy(d, d12, n * sizeof(short));
	memcpy(d+n, d12a, n * sizeof(short));
	fl_add(n, d12,  d+n2, d+n2);
	fl_add(n, d12a, d+n2, d+n2);
	if (sdu1 != sdu2)
		fl_add(n, du12, d+n2, d+n2);
	else
		fl_subs(n, d+n2, du12, d+n2);
}

static void fl_fastmul_pow2p1 (int n, unsigned short *d1, unsigned short *d2,
	unsigned short *d)
{
	unsigned int carry, tmp, f, ccar;
	int i;
	
	fl_fastmul_pow2(n-1, d1+1, d2+1, d+2);
	f = d1[0];
	carry = 0;
	for (i = n-1; i > 0; i--) {
		d[i+1] = tmp = carry + d[i+1] + f * d2[i];
		carry = tmp >> 16;
	}
	ccar = carry + f * d2[0];
	f = d2[0];
	carry = 0;
	for (i = n-1; i > 0; i--) {
		d[i+1] = tmp = carry + d[i+1] + f * d1[i];
		carry = tmp >> 16;
	}
	ccar += carry;
	d[1] = ccar;
	d[0] = ccar >> 16;
}

static s_mnode* float_mul (s_mnode* n1, s_mnode* n2)
{
	s_mnode* n;
	big_float *fl1 = FL(n1), *fl2 = FL(n2);
	int exp1, exp2, exp, bl1, bl2, i, blocks, carry, tmp;
	unsigned short *d1, *d2, *d;

	if (fl1->exponent == 0)
		return copy_mnode(n1);
	if (fl2->exponent == 0)
		return copy_mnode(n2);
	exp1 = abs(fl1->exponent) - OFFSET_EXP;
	exp2 = abs(fl2->exponent) - OFFSET_EXP;
	d1 = fl1->d;
	d2 = fl2->d;
	bl1 = fl1->blocks;
	bl2 = fl2->blocks;
	for (blocks = 1; blocks < bl1 || blocks < bl2; blocks <<= 1)
		;
	if (bl1 < blocks) {
		d1 = alloca(blocks * sizeof(short));
		memset(d1, 0, blocks * sizeof(short));
		memcpy(d1, fl1->d, bl1 * sizeof(short));
	}
	if (bl2 < blocks) {
		d2 = alloca(blocks * sizeof(short));
		memset(d2, 0, blocks * sizeof(short));
		memcpy(d2, fl2->d, bl2 * sizeof(short));
	}
	d = alloca(2 * blocks * sizeof(short));
	fl_fastmul_pow2(blocks, d1, d2, d);
	assert(16384U <= d[0]);
	exp = exp1 + exp2 + OFFSET_EXP;
#if 0
	blocks = (bl1 > bl2)? bl1 : bl2;
#endif
	if (d[0] < 32768U) {
		carry = 0;
		for (i = blocks; i >= 0; i--) {
			tmp = d[i] >> 15;
			d[i] = (d[i] << 1) | carry;
			carry = tmp;
		}
		assert(carry == 0);
		--exp;
	}
	assert(d[0] >= 32768U);
	n = float_new(blocks);
	FL(n)->blocks = blocks;
	FL(n)->exponent = ((fl1->exponent < 0) ^ (fl2->exponent < 0)) ?
		-exp : exp;
	memcpy(FL(n)->d, d, blocks * sizeof(short));
	/* Round up if appropriate */
	if (d[blocks] & 0x8000)
		bump_mantissa(FL(n));
	return n;
}

static int float_differ (s_mnode* mn1, s_mnode* mn2)
{
	if (float_sign(mn1) != float_sign(mn2))
		return 1;
	return (float_acompare(mn1,mn2) != 0);
}

static int float_lessthan (s_mnode* mn1, s_mnode* mn2)
{
	int s1 = float_sign(mn1), s2 = float_sign(mn2);

	if (s1 != s2)
		return (s1 < s2);
	switch(s1) {
	    default:	/* Can't happen */
	    case  0:	return 0;
	    case -1:	return (float_acompare(mn1,mn2) > 0);
	    case  1:	return (float_acompare(mn1,mn2) < 0);
	}
}

static s_mnode* float_negate (s_mnode* mn1)
{
	big_float *fl1 = FL(mn1);
	s_mnode *n;

	if (fl1->exponent == 0)
		return copy_mnode(mn1);
	n = float_new(fl1->blocks);
	FL(n)->blocks = fl1->blocks;
	FL(n)->exponent = -(fl1->exponent);
	memcpy(FL(n)->d, fl1->d, (fl1->blocks)*sizeof(short));
	return n;
}

static s_mnode* float_invert (s_mnode* n1)
{
	int s1, words, blocks, bl1, carry, tmp, i, expo1, expo;
	unsigned short *z, *z2, *V, *Vz2;
	unsigned int vest, zest;
	s_mnode *mn;

	if ((s1 = float_sign(n1)) == 0)
		return mnode_error(SE_DIVZERO, "float_invert");
	bl1 = FL(n1)->blocks;
	expo1 = abs(FL(n1)->exponent) - OFFSET_EXP;
	for (blocks = 1; blocks < bl1; blocks <<= 1)
		;
	z  = alloca((blocks + 1) * sizeof(short));
	z2 = alloca((blocks + 2) * sizeof(short));
	V  = alloca((blocks + 1) * sizeof(short));
	Vz2= alloca((2*blocks+2) * sizeof(short));
	/*
	 * Initialize V and z
	 */
	memset(V, 0, (blocks+1) * sizeof(short));
	memcpy(V+1, FL(n1)->d, bl1 * sizeof(short));
	memset(z, 0, (blocks+1) * sizeof(short));
	vest = ((unsigned int)V[1] << 16) + V[2];
	zest = (1LL << 49) / vest;
	z[1] = zest;
	z[0] = zest >> 16;
	words = 1;
	carry = 0;
	/* Shift V three bits left */
	for (i = blocks; i >= 0; i--) {
		tmp = V[i] >> 13;
		V[i] = (V[i] << 3) + carry;
		carry = tmp;
	}
	assert(carry == 0);
	while (words < blocks) {
		fl_fastmul_pow2p1(1+words, z, z, z2);
		fl_fastmul_pow2p1(1+2*words, z2+1, V, Vz2);
		/* Double z */
		carry = 0;
		for (i = words; i >= 0; i--) {
			tmp = z[i] >> 15;
			z[i] = (z[i] << 1) + carry;
			carry = tmp;
		}
		assert(carry == 0);
		/* Shift Vz^2 four bits right */
		for (i = 1; i < 2*words+2; i++) {
			tmp = Vz2[i] << 12;
			Vz2[i] = (Vz2[i] >> 4) + carry;
			carry = tmp;
		}
		/* And substract it from 2z */
		fl_subs(1+2*words, z, Vz2+1, z);
		words *= 2;
	}
	/* Round z to the nearest multiple of 4 before shifting */
	V[0] = 2;
	fl_add(1, z+blocks, V, z+blocks);
	expo = 1 - expo1;
	if (z[0] == 4) {
		/* special case: the inverse is exactly 2 */
		z[0] = 2;
		++expo;
	}
	/* Shift z two bits right */
	carry = 0;
	for (i = 0; i <= blocks; i++) {
		tmp = z[i] << 14;
		z[i] = (z[i] >> 2) + carry;
		carry = tmp;
	}
	mn = float_new(blocks);
	FL(mn)->exponent = s1 * (expo + OFFSET_EXP);
	FL(mn)->blocks = blocks;
	memcpy(FL(mn)->d, z+1, blocks * sizeof(short));
	return mn;
}

static s_mnode* float_sqrt (s_mnode *n)
{
	return mnode_error(SE_NOTRDY, "float_sqrt");
}
