/* 
  mxTextTools -- Fast text manipulation routines

  (c) Marc-Andre Lemburg; all rights reserved
*/

/* We want all our symbols to be exported */
#define MX_BUILDING_MXTEXTTOOLS

/* Logging file used by debugging facility */
#ifndef MAL_DEBUG_OUTPUTFILE
# define MAL_DEBUG_OUTPUTFILE "mxTextTools.log"
#endif

#include "mx.h"
#include "mxTextTools.h"
#include <ctype.h>

#define VERSION "1.1.1"

/* Initial list size used by e.g. setsplit(), setsplitx(),... since
   modern OSes only reserve memory instead of actually allocating it
   at malloc() time, this should be set high enough to accomodate most
   average cases reasonably well. */
#define INITIAL_LIST_SIZE 500

/* Define this to enable the copy-protocol (__copy__, __deepcopy__) */
#define COPY_PROTOCOL

/* Type code to use for parsing input data. "s#" will use the getreadbuffer
   interface, "t#" uses the getcharbuffer interface. Both have their
   advantages and faults... */
#ifndef TYPECODE_TEXT_AND_LENGTH
# if PY_VERSION_HEX >= 0x010502A1
#  define TYPECODE_TEXT_AND_LENGTH "s#"
# else
 /* Older Python versions don't have the "t#" type code. */
#  define TYPECODE_TEXT_AND_LENGTH "s#"
# endif
#endif

/* --- module doc-string -------------------------------------------------- */

static char *Module_docstring = 

 MXTEXTTOOLS_MODULE" -- Tools for fast text processing. Version "VERSION"\n\n"

 "(c) Copyright Marc-Andre Lemburg, 1997-1999, mal@lemburg.com,\n\n"
 "                 All Rights Reserved\n\n"
 "See the documentation for further information on copyrights,\n"
 "or contact the author."
;

/* --- internal macros ---------------------------------------------------- */

#define _mxBMS_Check(v) \
        (((mxBMSObject *)(v))->ob_type == &mxBMS_Type)

#ifdef MXFASTSEARCH
#define _mxFS_Check(v) \
        (((mxFSObject *)(v))->ob_type == &mxFS_Type)
#endif

/* --- module globals ----------------------------------------------------- */

/* Translation strings */
static PyObject *mxTo_Upper;
static PyObject *mxTo_Lower;

/* --- forward declarations ----------------------------------------------- */

/* --- module helper ------------------------------------------------------ */

static
PyObject *mxTo_Upper_New()
{
    char tr[256];
    int i;
    
    for (i = 0; i < 256; i++)
	tr[i] = toupper((char)i);
    return PyString_FromStringAndSize(tr,sizeof(tr));
}

static
PyObject *mxTo_Lower_New()
{
    char tr[256];
    int i;
    
    for (i = 0; i < 256; i++)
	tr[i] = tolower((char)i);
    return PyString_FromStringAndSize(tr,sizeof(tr));
}

/* --- module interface --------------------------------------------------- */

/* --- Boyer Moore Substring Search Object -----------------------------*/

staticforward PyMethodDef mxBMS_Methods[];

/* allocation */

Py_C_Function( mxBMS_new,
	       "BMS(match[,translate=None])\n\n"
	       "Create a substring search object for the string match;\n"
	       "translate is an optional translate-string like the one used\n"
	       "in the module re.")
{
    PyObject *s = 0;
    PyObject *t = 0;
    mxBMSObject *so;

    so = PyObject_NEW(mxBMSObject,&mxBMS_Type);
    if (so == NULL) return NULL;

    Py_Get2Args("O|O:BMS.__init__",s,t);
    
    Py_Assert(PyString_Check(s),
	      PyExc_TypeError,
	      "argument must be a string");

    if (t == Py_None)
	t = 0;
    else if (t) {
	Py_Assert(PyString_Check(t),
		  PyExc_TypeError,
		  "translate table must be a string");
	Py_Assert(PyString_GET_SIZE(t) == 256,
		  PyExc_TypeError,
		  "translate string must have exactly 256 chars");
	Py_INCREF(t);
    }
    so->tr = t;

    Py_INCREF(s);
    so->match = s;
    
    so->c = bm_init(PyString_AS_STRING(s),
		    PyString_GET_SIZE(s));

    Py_Assert(so->c != NULL,
	      PyExc_TypeError,
	      "error initializing the search object");

    return (PyObject *)so;

 onError:
    Py_XDECREF(t);
    Py_XDECREF(s);
    Py_DECREF(so);
    return NULL;
}

static void
mxBMS_Free(mxBMSObject *so)
{
    bm_free(so->c);
    Py_XDECREF(so->match);
    Py_XDECREF(so->tr);
    PyMem_DEL(so);
}

/* methods */

#define so ((mxBMSObject *)self)

Py_C_Function( mxBMS_search,
	       "BMS.search(text,start=0,stop=len(text))\n\n"
	       "Search for the substring in text, looking only at the\n"
	       "slice [start:stop] and return the slice (l,r)\n"
	       "where the substring was found, (start,start) otherwise.")
{
    int start = 0;
    int index;
    char *buffer;
    int buffer_len;
    int stop = INT_MAX;

    Py_Get4Args(TYPECODE_TEXT_AND_LENGTH"|ii:BMS.search",
		buffer,buffer_len,start,stop);

    Py_CheckBufferSlice(buffer_len,start,stop);
    
    if (so->tr) {
	/* search with translate table */
	index = bm_tr_search(so->c,
			     buffer,
			     start,
			     stop,
			     PyString_AS_STRING(so->tr));
    }
    else {
	/* exact search */
	index = bm_search(so->c,
			  buffer,
			  start,
			  stop);
    }
    
    if (index != start) /* found */
	start = index - so->c->len_match;

    Py_Assert(index >= 0,
	      PyExc_SystemError,
	      "internal error");

    /* return found slice */
    Py_Return2("ii",start,index);

 onError:
    return NULL;
}

Py_C_Function( mxBMS_find,
	       "BMS.find(text,start=0,stop=len(text))\n\n"
	       "Search for the substring in text, looking only at the\n"
	       "slice [start:stop] and return the index\n"
	       "where the substring was found, -1 otherwise.")
{
    int start = 0;
    int index;
    char *buffer;
    int buffer_len;
    int stop = INT_MAX;

    Py_Get4Args(TYPECODE_TEXT_AND_LENGTH"|ii:BMS.find",
		buffer,buffer_len,start,stop);

    Py_CheckBufferSlice(buffer_len,start,stop);
    
    if (so->tr) {
	/* search with translate table */
	index = bm_tr_search(so->c,
			     buffer,
			     start,
			     stop,
			     PyString_AS_STRING(so->tr));
    }
    else {
	/* exact search */
	index = bm_search(so->c,
			  buffer,
			  start,
			  stop);
    }
    
    if (index != start) /* found */
	start = index - so->c->len_match;
    else
	start = -1;

    Py_Assert(index >= 0,
	      PyExc_SystemError,
	      "internal error");

    return PyInt_FromLong(start);

 onError:
    return NULL;
}

Py_C_Function( mxBMS_findall,
	       "BMS.findall(text,start=0,stop=len(text))\n\n"
	       "Search for the substring in text, looking only at the\n"
	       "slice [start:stop] and return a list of all\n"
	       "non overlapping slices (l,r) in text where the match\n"
	       "string can be found.")
{
    PyObject *list = 0;
    int start = 0;
    int index = 0;
    char *buffer;
    int buffer_len;
    int stop = INT_MAX;

    Py_Get4Args(TYPECODE_TEXT_AND_LENGTH"|ii:BMS.findall",
		buffer,buffer_len,start,stop);

    Py_CheckBufferSlice(buffer_len,start,stop);
    
    list = PyList_New(0);
    if (!list)
	goto onError;
        
    if (!so->tr)
	while (start <= stop - so->c->len_match) {
	    register PyObject *t,*v;

	    /* exact search */
	    index = bm_search(so->c,
			      buffer,
			      start,
			      stop);
	    if (index < 0 || index == start) /* Error or Not found */
		break;
	
	    /* Build slice and append to list */
	    start = index - so->c->len_match;
	    t = PyTuple_New(2);
	    if (!t) 
		goto onError;
	    v = PyInt_FromLong(start);
	    if (!v)
		goto onError;
	    PyTuple_SET_ITEM(t,0,v);
	    v = PyInt_FromLong(index);
	    if (!v)
		goto onError;
	    PyTuple_SET_ITEM(t,1,v);
	    PyList_Append(list,t);
	    Py_DECREF(t);
	    
	    start = index;
	}
    else
	while (start <= stop - so->c->len_match) {
	    register PyObject *t,*v;

	    /* search with translate table */
	    index = bm_tr_search(so->c,
				 buffer,
				 start,
				 stop,
				 PyString_AS_STRING(so->tr));
	    if (index < 0 || index == start) /* Error or Not found */
		break;
	
	    /* Build slice and append to list */
	    start = index - so->c->len_match;
	    t = PyTuple_New(2);
	    if (!t) 
		goto onError;
	    v = PyInt_FromLong(start);
	    if (!v)
		goto onError;
	    PyTuple_SET_ITEM(t,0,v);
	    v = PyInt_FromLong(index);
	    if (!v)
		goto onError;
	    PyTuple_SET_ITEM(t,1,v);
	    PyList_Append(list,t);
	    Py_DECREF(t);
	    
	    start = index;
	}
    
    Py_Assert(index >= 0,
	      PyExc_SystemError,
	      "internal error");

    /* return list of slices */
    return list;

 onError:
    Py_XDECREF(list);
    return NULL;
}

#ifdef COPY_PROTOCOL
Py_C_Function( mxBMS_copy,
	       "copy([memo])\n\n"
	       "Return a new reference for the instance. This function\n"
	       "is used for the copy-protocol. Real copying doesn't take\n"
	       "place, since the instances are immutable.")
{
    PyObject *memo;
    
    Py_GetArg("|O",memo);
    Py_INCREF(so);
    return (PyObject *)so;
 onError:
    return NULL;
}
#endif

#undef so

/* --- slots --- */

static int
mxBMS_Print(mxBMSObject *self,
	    FILE *fp, 
	    int flags)
{
    if (self == NULL) return -1;
    fprintf(fp,"<Boyer-Moore Search object for ");
    PyObject_Print(self->match,fp,flags);
    fprintf(fp," at %lx>",(long)self);
    return 0;
}

static PyObject
*mxBMS_Getattr(mxBMSObject *self,
	       char *name)
{
    PyObject *v;
    
    if (Py_WantAttr(name,"match")) {
	v = self->match;
	Py_INCREF(v);
	return v;
    }
    else if (Py_WantAttr(name,"translate")) {
        v = self->tr;
	if (v == NULL)
	    v = Py_None;
	Py_INCREF(v);
	return v;
    }
    else if (Py_WantAttr(name,"__members__"))
	return Py_BuildValue("[ss]","match","translate");
    
    return Py_FindMethod(mxBMS_Methods, (PyObject *)self, (char *)name);
}

/* Python Type Table */

PyTypeObject mxBMS_Type = {
        PyObject_HEAD_INIT(0)		/* init at startup ! */
	0,			  	/*ob_size*/
	"Boyer Moore Search",	  	/*tp_name*/
	sizeof(mxBMSObject),	  	/*tp_basicsize*/
	0,			  	/*tp_itemsize*/
	/* methods */
	(destructor)mxBMS_Free,		/*tp_dealloc*/
	(printfunc)mxBMS_Print,	  	/*tp_print*/
	(getattrfunc)mxBMS_Getattr,  	/*tp_getattr*/
	(setattrfunc)0,		  	/*tp_setattr*/
	(cmpfunc)0,		  	/*tp_compare*/
	(reprfunc)0,		  	/*tp_repr*/
        0,			  	/*tp_as_number*/
	0,				/*tp_as_number*/
	0,				/*tp_as_mapping*/
	(hashfunc)0,			/*tp_hash*/
	(ternaryfunc)0,			/*tp_call*/
	(reprfunc)0,			/*tp_str*/
	(getattrofunc)0, 		/*tp_getattro*/
	(setattrofunc)0, 		/*tp_setattro*/
};

/* Python Method Table */

statichere
PyMethodDef mxBMS_Methods[] =
{   
    Py_MethodListEntry("search",mxBMS_search),
    Py_MethodListEntry("find",mxBMS_find),
    Py_MethodListEntry("findall",mxBMS_findall),
#ifdef COPY_PROTOCOL
    Py_MethodListEntry("__deepcopy__",mxBMS_copy),
    Py_MethodListEntry("__copy__",mxBMS_copy),
#endif
    {NULL,NULL} /* end of list */
};

#ifdef MXFASTSEARCH

/* --- Fast Search Object --------------------------------------*/

staticforward PyMethodDef mxFS_Methods[];

/* allocation */

Py_C_Function( mxFS_new,
	       "FS(match[,translate=None])\n\n"
	       "Create a substring search object for the string match;\n"
	       "translate is an optional translate-string like the one used\n"
	       "in the module re.")
{
    PyObject *s = 0;
    PyObject *t = 0;
    mxFSObject *so;

    so = PyObject_NEW(mxFSObject,&mxFS_Type);
    if (so == NULL) return NULL;

    Py_Get2Args("O|O:FS.__init__",s,t);
    
    Py_Assert(PyString_Check(s),
	      PyExc_TypeError,
	      "argument must be a string");

    if (t == Py_None)
	t = 0;
    else if (t) {
	Py_Assert(PyString_Check(t),
		  PyExc_TypeError,
		  "translate table must be a string");
	Py_Assert(PyString_GET_SIZE(t) == 256,
		  PyExc_TypeError,
		  "translate string must have exactly 256 chars");
	Py_INCREF(t);
    }
    so->tr = t;

    Py_INCREF(s);
    so->match = s;
    
    so->c = fs_init(PyString_AS_STRING(s),
		    PyString_GET_SIZE(s));

    Py_Assert(so->c != NULL,
	      PyExc_TypeError,
	      "error initializing the search object");

    return (PyObject *)so;

 onError:
    Py_XDECREF(t);
    Py_XDECREF(s);
    Py_DECREF(so);
    return NULL;
}

static void
mxFS_Free(mxFSObject *so)
{
    fs_free(so->c);
    Py_XDECREF(so->match);
    Py_XDECREF(so->tr);
    PyMem_DEL(so);
}

/* methods */

#define so ((mxFSObject *)self)

Py_C_Function( mxFS_search,
	       "FS.search(text,start=0,stop=len(text))\n\n"
	       "Search for the substring in text, looking only at the\n"
	       "slice [start:stop] and return the slice (l,r)\n"
	       "where the substring was found, (start,start) otherwise.")
{
    int start = 0;
    int index;
    char *buffer;
    int buffer_len;
    int stop = INT_MAX;

    Py_Get4Args(TYPECODE_TEXT_AND_LENGTH"|ii:FS.search",
		buffer,buffer_len,start,stop);

    Py_CheckBufferSlice(buffer_len,start,stop);
    
    if (so->tr) {
	/* search with translate table */
	index = fs_tr_search(so->c,
			     buffer,
			     start,
			     stop,
			     PyString_AS_STRING(so->tr));
    }
    else {
	/* exact search */
	index = fs_search(so->c,
			  buffer,
			  start,
			  stop);
    }
    
    if (index != start) /* found */
	start = index - so->c->len_match;

    Py_Assert(index >= 0,
	      PyExc_SystemError,
	      "internal error");

    /* return found slice */
    Py_Return2("ii",start,index);

 onError:
    return NULL;
}

Py_C_Function( mxFS_find,
	       "FS.find(text,start=0,stop=len(text))\n\n"
	       "Search for the substring in text, looking only at the\n"
	       "slice [start:stop] and return the index\n"
	       "where the substring was found, -1 otherwise.")
{
    int start = 0;
    int index;
    char *buffer;
    int buffer_len;
    int stop = INT_MAX;

    Py_Get4Args(TYPECODE_TEXT_AND_LENGTH"|ii:FS.find",
		buffer,buffer_len,start,stop);

    Py_CheckBufferSlice(buffer_len,start,stop);
    
    if (so->tr) {
	/* search with translate table */
	index = fs_tr_search(so->c,
			     buffer,
			     start,
			     stop,
			     PyString_AS_STRING(so->tr));
    }
    else {
	/* exact search */
	index = fs_search(so->c,
			  buffer,
			  start,
			  stop);
    }
    
    if (index != start) /* found */
	start = index - so->c->len_match;
    else
	start = -1;

    Py_Assert(index >= 0,
	      PyExc_SystemError,
	      "internal error");

    return PyInt_FromLong(start);

 onError:
    return NULL;
}

Py_C_Function( mxFS_findall,
	       "FS.findall(text,start=0,stop=len(text))\n\n"
	       "Search for the substring in text, looking only at the\n"
	       "slice [start:stop] and return a list of all\n"
	       "non overlapping slices (l,r) in text where the match\n"
	       "string can be found.")
{
    PyObject *list = 0;
    int start = 0;
    int index = 0;
    char *buffer;
    int buffer_len;
    int stop = INT_MAX;

    Py_Get4Args(TYPECODE_TEXT_AND_LENGTH"|ii:FS.findall",
		buffer,buffer_len,start,stop);

    Py_CheckBufferSlice(buffer_len,start,stop);
    
    list = PyList_New(0);
    if (!list)
	goto onError;
        
    if (!so->tr)
	while (start <= stop - so->c->len_match) {
	    register PyObject *t,*v;

	    /* exact search */
	    index = fs_search(so->c,
			      buffer,
			      start,
			      stop);
	    if (index < 0 || index == start) /* Error or Not found */
		break;
	
	    /* Build slice and append to list */
	    start = index - so->c->len_match;
	    t = PyTuple_New(2);
	    if (!t) 
		goto onError;
	    v = PyInt_FromLong(start);
	    if (!v)
		goto onError;
	    PyTuple_SET_ITEM(t,0,v);
	    v = PyInt_FromLong(index);
	    if (!v)
		goto onError;
	    PyTuple_SET_ITEM(t,1,v);
	    PyList_Append(list,t);
	    Py_DECREF(t);
	    
	    start = index;
	}
    else
	while (start <= stop - so->c->len_match) {
	    register PyObject *t,*v;

	    /* search with translate table */
	    index = fs_tr_search(so->c,
				 buffer,
				 start,
				 stop,
				 PyString_AS_STRING(so->tr));
	    if (index < 0 || index == start) /* Error or Not found */
		break;
	
	    /* Build slice and append to list */
	    start = index - so->c->len_match;
	    t = PyTuple_New(2);
	    if (!t) 
		goto onError;
	    v = PyInt_FromLong(start);
	    if (!v)
		goto onError;
	    PyTuple_SET_ITEM(t,0,v);
	    v = PyInt_FromLong(index);
	    if (!v)
		goto onError;
	    PyTuple_SET_ITEM(t,1,v);
	    PyList_Append(list,t);
	    Py_DECREF(t);
	    
	    start = index;
	}
    
    Py_Assert(index >= 0,
	      PyExc_SystemError,
	      "internal error");

    /* return list of slices */
    return list;

 onError:
    Py_XDECREF(list);
    return NULL;
}

#ifdef COPY_PROTOCOL
Py_C_Function( mxFS_copy,
	       "copy([memo])\n\n"
	       "Return a new reference for the instance. This function\n"
	       "is used for the copy-protocol. Real copying doesn't take\n"
	       "place, since the instances are immutable.")
{
    PyObject *memo;
    
    Py_GetArg("|O",memo);
    Py_INCREF(so);
    return (PyObject *)so;
 onError:
    return NULL;
}
#endif

#undef so

/* --- slots --- */

static int
mxFS_Print(mxFSObject *self,
	    FILE *fp, 
	    int flags)
{
    if (self == NULL) return -1;
    fprintf(fp,"<Fast Search object for ");
    PyObject_Print(self->match,fp,flags);
    fprintf(fp," at %lx>",(long)self);
    return 0;
}

static PyObject
*mxFS_Getattr(mxFSObject *self,
	       char *name)
{
    PyObject *v;
    
    if (Py_WantAttr(name,"match")) {
	v = self->match;
	Py_INCREF(v);
	return v;
    }
    else if (Py_WantAttr(name,"translate")) {
        v = self->tr;
	if (v == NULL)
	    v = Py_None;
	Py_INCREF(v);
	return v;
    }
    else if (Py_WantAttr(name,"__members__"))
	return Py_BuildValue("[ss]","match","translate");
    
    return Py_FindMethod(mxFS_Methods, (PyObject *)self, (char *)name);
}

/* Python Type Table */

PyTypeObject mxFS_Type = {
        PyObject_HEAD_INIT(0)		/* init at startup ! */
	0,			  	/*ob_size*/
	"Fast Search",		  	/*tp_name*/
	sizeof(mxFSObject),	  	/*tp_basicsize*/
	0,			  	/*tp_itemsize*/
	/* methods */
	(destructor)mxFS_Free,		/*tp_dealloc*/
	(printfunc)mxFS_Print,	  	/*tp_print*/
	(getattrfunc)mxFS_Getattr,  	/*tp_getattr*/
	(setattrfunc)0,		  	/*tp_setattr*/
	(cmpfunc)0,		  	/*tp_compare*/
	(reprfunc)0,		  	/*tp_repr*/
        0,			  	/*tp_as_number*/
	0,				/*tp_as_number*/
	0,				/*tp_as_mapping*/
	(hashfunc)0,			/*tp_hash*/
	(ternaryfunc)0,			/*tp_call*/
	(reprfunc)0,			/*tp_str*/
	(getattrofunc)0, 		/*tp_getattro*/
	(setattrofunc)0, 		/*tp_setattro*/
};

/* Python Method Table */

statichere
PyMethodDef mxFS_Methods[] =
{   
    Py_MethodListEntry("search",mxFS_search),
    Py_MethodListEntry("find",mxFS_find),
    Py_MethodListEntry("findall",mxFS_findall),
#ifdef COPY_PROTOCOL
    Py_MethodListEntry("__deepcopy__",mxFS_copy),
    Py_MethodListEntry("__copy__",mxFS_copy),
#endif
    {NULL,NULL} /* end of list */
};

#endif

/* --- Internal functions ----------------------------------------------*/

static
PyObject *mxTextTools_JoinSequenceWithSeparator(PyObject *seq,
						int start,
						int stop,
						char *sep,
						int len_sep)
{
    PyObject *newstring = 0;
    int len_newstring,current_len = 0;
    char *p;
    int i;
    
    /* Create an empty new string */
    len_newstring = (10 + len_sep) * (stop - start);
    newstring = PyString_FromStringAndSize((char*)NULL,len_newstring);
    if (newstring == NULL) 
	goto onError;
    p = PyString_AS_STRING(newstring);

    /* Join with separator */
    for (i = start; i < stop; i++) {
	register PyObject *o;
	char *st;
	int len_st;

	o = PySequence_GetItem(seq,i);

	if PyTuple_Check(o) {
	    /* Tuple entry: (string,l,r,[...]) */
	    register int l,r;

	    /* parse tuple */
	    Py_Assert((PyTuple_GET_SIZE(o) >= 3) &&
		      PyString_Check(PyTuple_GET_ITEM(o,0)) &&
		      PyInt_Check(PyTuple_GET_ITEM(o,1)) && 
		      PyInt_Check(PyTuple_GET_ITEM(o,2)),
		      PyExc_TypeError,
		      "tuples must be of the format (string,l,r[,...])");
	    st = PyString_AS_STRING(PyTuple_GET_ITEM(o,0));
	    len_st = PyString_GET_SIZE(PyTuple_GET_ITEM(o,0));
	    l = PyInt_AS_LONG(PyTuple_GET_ITEM(o,1));
	    r = PyInt_AS_LONG(PyTuple_GET_ITEM(o,2));

	    /* compute slice */
	    if (r > len_st) r = len_st;
	    else if (r < 0) {
		r += len_st + 1;
		if (r < 0)
		    r = 0;
	    }
	    if (l > len_st) l = len_st;
	    else if (l < 0) {
		l += len_st + 1;
		if (l < 0)
		    l = 0;
	    }

	    /* empty ? */
	    if (l > r)
		continue;
	    len_st = r - l;
	    if (len_st == 0) 
		continue;

	    /* get pointer right */
	    st += l;
	}
	else if (PyString_Check(o)) {
	    /* String entry: take the whole string */
	    st = PyString_AS_STRING(o);
	    len_st = PyString_GET_SIZE(o);
	}
	else {
	    Py_DECREF(o);
	    Py_Error(PyExc_TypeError,
		     "list must contain tuples or strings as entries");
	}

        Py_DECREF(o);
    
	/* Resize the new string if needed */
	while (current_len + len_st + len_sep >= len_newstring) {
	    len_newstring += len_newstring >> 1;
	    if (_PyString_Resize(&newstring, len_newstring))
		goto onError;
	    p = PyString_AS_STRING(newstring) + current_len;
	}

	/* Insert separator */
	if (i > 0) {
	    memcpy(p, sep, len_sep);
	    p += len_sep;
	    current_len += len_sep;
	}

	/* Copy snippet into new string */
	memcpy(p,st,len_st);
	p += len_st;
	current_len += len_st;
    }
    
    /* Resize new string to the actual length */
    if (_PyString_Resize(&newstring,current_len))
	goto onError;

    return newstring;

 onError:
    Py_XDECREF(newstring);
    return NULL;
}

static
PyObject *mxTextTools_JoinSequence(PyObject *seq,
				   int start,
				   int stop) 
{
    PyObject *newstring = 0;
    int len_newstring,current_len = 0;
    char *p;
    int i;
    
    /* Create an empty new string */
    len_newstring = 10 * (stop - start);
    newstring = PyString_FromStringAndSize((char*)NULL,len_newstring);
    if (newstring == NULL) 
	goto onError;
    p = PyString_AS_STRING(newstring);
    
    /* No separator */
    for (i = start; i < stop; i++) {
	register PyObject *o;
	char *st;
	int len_st;

	o = PySequence_GetItem(seq,i);

	if (PyTuple_Check(o)) {
	    /* Tuple entry: (string,l,r,[...]) */
	    register int l,r;

	    /* parse tuple */
	    Py_Assert((PyTuple_GET_SIZE(o) >= 3) &&
		      PyString_Check(PyTuple_GET_ITEM(o,0)) &&
		      PyInt_Check(PyTuple_GET_ITEM(o,1)) && 
		      PyInt_Check(PyTuple_GET_ITEM(o,2)),
		      PyExc_TypeError,
		      "tuples must be of the format (string,l,r[,...])");
	    st = PyString_AS_STRING(PyTuple_GET_ITEM(o,0));
	    len_st = PyString_GET_SIZE(PyTuple_GET_ITEM(o,0));
	    l = PyInt_AS_LONG(PyTuple_GET_ITEM(o,1));
	    r = PyInt_AS_LONG(PyTuple_GET_ITEM(o,2));

	    /* compute slice */
	    if (r > len_st) r = len_st;
	    else if (r < 0) {
		r += len_st + 1;
		if (r < 0)
		    r = 0;
	    }
	    if (l > len_st) l = len_st;
	    else if (l < 0) {
		l += len_st + 1;
		if (l < 0)
		    l = 0;
	    }

	    /* empty ? */
	    if (l > r)
		continue;
	    len_st = r - l;
	    if (len_st == 0) 
		continue;

	    /* get pointer right */
	    st += l;
	}
	else if (PyString_Check(o)) {
	    /* String entry: take the whole string */
	    st = PyString_AS_STRING(o);
	    len_st = PyString_GET_SIZE(o);
	}
	else {
	    Py_DECREF(o);
	    Py_Error(PyExc_TypeError,
		     "list must contain tuples or strings as entries");
	}

        Py_DECREF(o);
    
	/* Resize the new string if needed */
	while (current_len + len_st >= len_newstring) {
	    len_newstring += len_newstring >> 1;
	    if (_PyString_Resize(&newstring, len_newstring))
		goto onError;
	    p = PyString_AS_STRING(newstring) + current_len;
	}

	/* Copy snippet into new string */
	memcpy(p,st,len_st);
	p += len_st;
	current_len += len_st;
    }

    /* Resize new string to the actual length */
    if (_PyString_Resize(&newstring,current_len))
	goto onError;

    return newstring;

 onError:
    Py_XDECREF(newstring);
    return NULL;
}

static
PyObject *mxTextTools_HexStringFromString(char *str,
					  int len) 
{
    PyObject *w = 0;
    int i;
    char *hex;
    static const char hexdigits[] = "0123456789abcdef";

    /* Convert to HEX */
    w = PyString_FromStringAndSize(NULL,2*len);
    if (!w)
	goto onError;
    hex = PyString_AS_STRING(w);
    for (i = 0; i < len; i ++) {
	unsigned char c = (unsigned char)*str;
	
	*hex++ = hexdigits[c >> 4];
	*hex++ = hexdigits[c & 0x0F];
	str++;
    }
    return w;

 onError:
    Py_XDECREF(w);
    return NULL;
}

static
PyObject *mxTextTools_StringFromHexString(char *hex,
					  int len)
{
    PyObject *w = 0;
    int i;
    char *str;
    static const char hexdigits[] = "0123456789abcdef";

    /* Convert to string */
    Py_Assert(len % 2 == 0,
	      PyExc_TypeError,
	      "need 2-digit hex string argument");
    len >>= 1;
    w = PyString_FromStringAndSize(NULL,len);
    if (!w)
	goto onError;
    str = PyString_AS_STRING(w);
    for (i = 0; i < len; i++,str++) {
	register char c;
	register int j;

	c = tolower(*hex++);
	for (j = 0; j < (int)sizeof(hexdigits); j++)
	  if (c == hexdigits[j]) {
	    *str = j << 4;
	    break;
	  }
	if (j == sizeof(hexdigits)) {
	  DPRINTF("Failed: '%c' (%u) at %i\n",c,(unsigned int)c,i);
	  Py_Error(PyExc_ValueError,
		   "argument contains non-hex characters");
	}

	c = tolower(*hex++);
	for (j = 0; j < (int)sizeof(hexdigits); j++)
	  if (c == hexdigits[j]) {
	    *str += j;
	    break;
	  }
	if (j == sizeof(hexdigits)) {
	  DPRINTF("Failed2: '%c' (%u) at %i\n",c,(unsigned int)c,i);
	  Py_Error(PyExc_ValueError,
		   "argument contains non-hex characters");
	}
    }
    return w;

 onError:
    Py_XDECREF(w);
    return NULL;
}

/* Stips off characters appearing in the character set from text[start:stop]
   and returns the result as Python string object.

   where indicates the mode:
   where < 0: strip left only
   where = 0: strip left and right
   where > 0: strip right only

   WARNING: No bounds checking is done. text[start:stop] must exist
   and set must also be a valid character set as returned by
   mxTextTools_set().

*/
static
PyObject *mxTextTools_Strip(char *text,
			    char *set,
			    int start,
			    int stop,
			    int where)
{
    int left,right;

    /* Strip left */
    if (where <= 0) {
	register int x;
	register char *tx;
	for (x = start, tx = text + x; x < stop; tx++, x++) 
	    if (!Py_CharInSet(*tx,set))
		break;
	left = x;
    }
    else
	left = start;

    /* Strip right */
    if (where >= 0) {
	register int x;
	register char *tx;
	for (x = stop - 1, tx = text + x; x >= start; tx--, x--) 
	    if (!Py_CharInSet(*tx,set))
		break;
	right = x + 1;
    }
    else
	right = stop;

    return PyString_FromStringAndSize(text + left, right - left);
}


/* --- Module functions ------------------------------------------------*/

/* Interface to the tagging engine in mxte.c */

Py_C_Function( mxTextTools_tag,
	       "tag(text,tagtable,[startindex=0,len_text=len(text),taglist=[]]) \n"""
	       "Produce a tag list for a string, given a tag-table\n"
	       "- returns a tuple (success, taglist, nextindex)\n"
	       "- if taglist == None, then no taglist is created\n"
	       "- note: this function does not except keywords !")
{
    PyObject *pytext = 0;
    PyObject *table = 0;
    int len_text = INT_MAX;
    int start = 0;
    PyObject *taglist = 0;
    int next,result;
    PyObject *res;
    
    Py_Get5Args("OO|iiO:tag",pytext,table,start,len_text,taglist);

    if (taglist == NULL) { 
	/* not given, so use default: an empty list */
	taglist = PyList_New(0);
	if (taglist == NULL)
	    goto onError;
    }
    else {
	Py_INCREF(taglist);
	Py_Assert(PyList_Check(taglist) || taglist == Py_None,
		  PyExc_TypeError,
		  "optional fourth argument must be a list or None");
    }
    
    Py_Assert(PyString_Check(pytext),
	      PyExc_TypeError,
	      "first argument must be a string");
    Py_Assert(PyTuple_Check(table),
	      PyExc_TypeError,
	      "second argument must be a tuple (the tag table)");

    Py_CheckSlice(pytext,start,len_text);

    /* Call the tagging engine */
    result = fast_tag(pytext,
		      PyString_AS_STRING(pytext),
		      len_text,
		      table,
		      start,
		      taglist,
		      &next);

    if (result == 0)
	goto onError;
    result--;

    /* Build result tuple */
    res = PyTuple_New(3);
    if (!res)
	goto onError;
    PyTuple_SET_ITEM(res,0,PyInt_FromLong(result));
    PyTuple_SET_ITEM(res,1,taglist);
    PyTuple_SET_ITEM(res,2,PyInt_FromLong(next));
    return res;

 onError:
    if (!PyErr_Occurred())
	Py_Error(PyExc_SystemError,
		 "NULL result without error in builtin tag()");
    Py_XDECREF(taglist);
    return NULL;
}

/* An extended version of string.join() for taglists: */

Py_C_Function( mxTextTools_join,
	       "join(joinlist,sep='',start=0,stop=len(joinlist))\n\n"
	       "Copy snippets from different strings together producing a\n"
	       "new string\n"
	       "The first argument must be a list of tuples or strings;\n"
	       "tuples must be of the form (string,l,r[,...]) and turn out\n"
	       "as string[l:r]\n"
	       "NOTE: the syntax used for negative slices is different\n"
	       "than the Python standard: -1 corresponds to the first\n"
	       "character *after* the string, e.g. ('Example',0,-1) gives\n"
	       "'Example' and not 'Exampl', like in Python\n"
	       "sep is an optional separator string, start and stop\n"
	       "define the slice of joinlist that is taken into accont."
	       )
{
    PyObject *joinlist = 0;
    int len_joinlist;
    char *sep;
    int len_sep = 0;
    int start=0,stop=INT_MAX;

    Py_Get5Args("O|"TYPECODE_TEXT_AND_LENGTH"ii:join",
		joinlist,sep,len_sep,start,stop);

    Py_Assert(PySequence_Check(joinlist),
	      PyExc_TypeError,
	      "first argument needs to be a sequence");

    len_joinlist = PySequence_Length(joinlist);
    Py_Assert(len_joinlist >= 0,
	      PyExc_TypeError,
	      "first argument needs to have a __len__ method");
    
    /* Normalize start and stop */
    if (start < 0) {
	start += len_joinlist;
	if (start < 0)
	    start = 0;
    }
    if (stop < 0) {
	stop += len_joinlist;
	if (stop < 0)
	    stop = 0;
    }
    if (stop > len_joinlist)
	stop = len_joinlist;

    /* Short-cut */
    if ((stop - start) <= 0)
	return PyString_FromString("");

    if (len_sep == 0)
	return mxTextTools_JoinSequence(joinlist,start,stop);
    else
	return mxTextTools_JoinSequenceWithSeparator(joinlist,start,stop,
						     sep,len_sep);

 onError:
    return NULL;
}

/*
   Special compare function for taglist-tuples, comparing
   the text-slices given:
    - slices starting at a smaller index come first
    - for slices starting at the same index, the longer one
      wins
*/

Py_C_Function( mxTextTools_cmp,
	       "cmp(a,b)\n\n"
	       "Compare two valid taglist tuples w/r to their slice\n"
	       "position; this is useful for sorting joinlists.")
{
    PyObject *v,*w;
    int cmp;

    Py_Get2Args("OO:cmp",v,w);

    Py_Assert(PyTuple_Check(v) && PyTuple_Check(w) && 
	      PyTuple_GET_SIZE(v) >= 3 && PyTuple_GET_SIZE(w) >= 3,
	      PyExc_TypeError,
	      "invalid taglist-tuple");

    cmp = PyObject_Compare(PyTuple_GET_ITEM(v,1),PyTuple_GET_ITEM(w,1));
    if (cmp != 0) 
	return PyInt_FromLong(cmp);
    cmp = - PyObject_Compare(PyTuple_GET_ITEM(v,2),PyTuple_GET_ITEM(w,2));
    return PyInt_FromLong(cmp);

 onError:
    return NULL;
}

Py_C_Function( mxTextTools_joinlist,
	       "joinlist(text,list,start=0,stop=len(text))\n\n"
	       "Takes a list of tuples (replacement,l,r,...) and produces\n"
	       "a taglist suitable for join() which creates a copy\n"
	       "of text where every slice [l:r] is replaced by the\n"
	       "given replacement\n"
	       "- the list must be sorted using cmp() as compare function\n"
	       "- it may not contain overlapping slices\n"
	       "- the slices may not contain negative indices\n"
	       "- if the taglist cannot contain overlapping slices, you can\n"
	       "  give this function the taglist produced by tag() directly\n"
	       "  (sorting is not needed, as the list will already be sorted)\n"
	       "- start and stop set the slice to work in, i.e. text[start:stop]"
)
{
    PyObject *list;
    PyObject *text;
    PyObject *joinlist = 0;
    int len_list;
    int len_text = INT_MAX;
    int pos = 0;
    register int i;
    int listitem = 0;
    int listsize = INITIAL_LIST_SIZE;
    
    Py_Get4Args("OO|ii:joinlist",text,list,pos,len_text);

    Py_Assert(PyString_Check(text),
	      PyExc_TypeError,
	      "first argument needs to be a string");
    Py_Assert(PyList_Check(list),
	      PyExc_TypeError,
	      "second argument needs to be a list");

    if (len_text > PyString_GET_SIZE(text))
	len_text = PyString_GET_SIZE(text);
    
    len_list = PyList_GET_SIZE(list);

    joinlist = PyList_New(listsize);
    if (joinlist == NULL)
	goto onError;

    for (i = 0; i < len_list; i++) {
	register PyObject *t;
	register int left,right;
	
	t = PyList_GET_ITEM(list,i);
	Py_Assert(PyTuple_Check(t) && 
		  (PyTuple_GET_SIZE(t) >= 3) &&
		  PyInt_Check(PyTuple_GET_ITEM(t,1)) &&
		  PyInt_Check(PyTuple_GET_ITEM(t,2)),
		  PyExc_TypeError,
		  "tuples must be of the form (string,int,int,...)");
	left = PyInt_AS_LONG(PyTuple_GET_ITEM(t,1));
	right = PyInt_AS_LONG(PyTuple_GET_ITEM(t,2));

	Py_Assert(left >= pos,
		  PyExc_ValueError,
		  "list is not sorted ascending");

	if (left > pos) { /* joinlist.append((text,pos,left)) */
	    register PyObject *v;
	    register PyObject *w;
	    
	    v = PyTuple_New(3);
	    if (v == NULL)
		goto onError;

	    Py_INCREF(text);
	    PyTuple_SET_ITEM(v,0,text);

	    w = PyInt_FromLong(pos);
	    if (w == NULL)
		goto onError;
	    PyTuple_SET_ITEM(v,1,w);

	    w = PyTuple_GET_ITEM(t,1);
	    Py_INCREF(w);
	    PyTuple_SET_ITEM(v,2,w);

	    if (listitem < listsize)
		PyList_SET_ITEM(joinlist,listitem,v);
	    else {
		PyList_Append(joinlist,v);
		Py_DECREF(v);
	    }
	    listitem++;
	}
	
	/* joinlist.append(string) */
	if (listitem < listsize) {
	    register PyObject *v = PyTuple_GET_ITEM(t,0);
	    Py_INCREF(v);
	    PyList_SET_ITEM(joinlist,listitem,v);
	}
	else
	    PyList_Append(joinlist,PyTuple_GET_ITEM(t,0));
	listitem++;
	
	pos = right;
    }
    
    if (pos < len_text) { /* joinlist.append((text,pos,len_text)) */
	register PyObject *v;
	register PyObject *w;
	    
	v = PyTuple_New(3);
	if (v == NULL)
	    goto onError;

	Py_INCREF(text);
	PyTuple_SET_ITEM(v,0,text);

	w = PyInt_FromLong(pos);
	if (w == NULL)
	    goto onError;
	PyTuple_SET_ITEM(v,1,w);

	w = PyInt_FromLong(len_text);
	if (w == NULL)
	    goto onError;
	PyTuple_SET_ITEM(v,2,w);

	if (listitem < listsize)
	    PyList_SET_ITEM(joinlist,listitem,v);
	else {
	    PyList_Append(joinlist,v);
	    Py_DECREF(v);
	}
	listitem++;
    }

    /* Resize list if necessary */
    if (listitem < listsize)
	PyList_SetSlice(joinlist,listitem,listsize,(PyObject*)NULL);

    return joinlist;

 onError:

    Py_XDECREF(joinlist);
    return NULL;
}

Py_C_Function( mxTextTools_charsplit,
	       "charsplit(text,char,start=0,stop=len(text))\n\n"
	       "Split text[start:stop] into substrings at char and\n"
	       "return the result as list of strings."
)
{
    PyObject *text;
    PyObject *list = 0;
    char *separator;
    int sep_len;
    int len_text = INT_MAX;
    int start = 0;
    register int x;
    char *tx;
    int listitem = 0;
    int listsize = INITIAL_LIST_SIZE;

    Py_Get5Args("O"TYPECODE_TEXT_AND_LENGTH"|ii:charsplit",
		text,separator,sep_len,start,len_text);

    Py_Assert(PyString_Check(text),
	      PyExc_TypeError,
	      "first argument needs to be a string");
    Py_Assert(sep_len == 1,
	      PyExc_TypeError,
	      "second argument needs to be a single character");
    Py_CheckSlice(text,start,len_text);

    list = PyList_New(listsize);
    if (!list)
	goto onError;

    x = start;
    tx = PyString_AS_STRING(text);

    while (1) {
	PyObject *s;
	register int z;

	/* Skip to next separator */
	z = x;
	for (;x < len_text; x++) 
	    if (tx[x] == *separator)
		break;

	/* Append the slice to list */
	s = PyString_FromStringAndSize(&tx[z], x - z);
	if (!s)
	    goto onError;
	if (listitem < listsize)
	    PyList_SET_ITEM(list,listitem,s);
	else {
	    PyList_Append(list,s);
	    Py_DECREF(s);
	}
	listitem++;

	if (x == len_text)
	    break;

	/* Skip separator */
	x++;
    }

    /* Resize list if necessary */
    if (listitem < listsize)
	PyList_SetSlice(list,listitem,listsize,(PyObject*)NULL);

    return list;
    
 onError:
    Py_XDECREF(list);
    return NULL;
}

Py_C_Function( mxTextTools_splitat,
	       "splitat(text,char,nth=1,start=0,stop=len(text))\n\n"
	       "Split text[start:stop] into two substrings at the nth\n"
	       "occurance of char and return the result as 2-tuple. If the\n"
	       "character is not found, the second string is empty. nth may\n"
	       "be negative: the search is then done from the right and the\n"
	       "first string is empty in case the character is not found."
)
{
    PyObject *text;
    PyObject *tuple = 0;
    char *separator;
    int sep_len;
    int len_text = INT_MAX;
    int start = 0;
    int nth = 1;
    register int x;
    char *tx;
    PyObject *s;

    Py_Get6Args("O"TYPECODE_TEXT_AND_LENGTH"|iii:splitat",
		text,separator,sep_len,nth,start,len_text);

    Py_Assert(PyString_Check(text),
	      PyExc_TypeError,
	      "first argument needs to be a string");
    Py_Assert(sep_len == 1,
	      PyExc_TypeError,
	      "second argument needs to be a single character");
    Py_CheckSlice(text,start,len_text);

    tuple = PyTuple_New(2);
    if (!tuple)
	goto onError;

    tx = PyString_AS_STRING(text);

    if (nth > 0) {
	/* Skip to nth separator from the left */
	x = start;
	while (1) {
	    for (; x < len_text; x++) 
		if (tx[x] == *separator)
		    break;
	    if (--nth == 0 || x == len_text)
		break;
	    x++;
	}
    }
    else if (nth < 0) {
	/* Skip to nth separator from the right */
	x = len_text - 1;
	while (1) {
	    for (; x >= start; x--) 
		if (tx[x] == *separator)
		    break;
	    if (++nth == 0 || x < start)
		break;
	    x--;
	}
    }
    else
	Py_Error(PyExc_ValueError,
		 "nth must be non-zero");
    
    /* Add to tuple */
    if (x < start)
	s = PyString_FromStringAndSize("",0);
    else
	s = PyString_FromStringAndSize(&tx[start], x - start);
    if (!s)
	goto onError;
    PyTuple_SET_ITEM(tuple,0,s);

    /* Skip separator */
    x++;

    if (x >= len_text)
	s = PyString_FromStringAndSize("",0);
    else
	s = PyString_FromStringAndSize(&tx[x], len_text - x);
    if (!s)
	goto onError;
    PyTuple_SET_ITEM(tuple,1,s);

    return tuple;
    
 onError:
    Py_XDECREF(tuple);
    return NULL;
}

Py_C_Function( mxTextTools_suffix,
	       "suffix(text,suffixes,start=0,stop=len(text)[,translate])\n\n"
	       "Looks at text[start:stop] and returns the first matching\n"
	       "suffix out of the tuple of strings given in suffixes.\n"
	       "If no suffix is found to be matching, None is returned.\n"
	       "The optional 256 char translate string is used to translate\n"
	       "the text prior to comparing it with the given suffixes."
	       )
{
    PyObject *text;
    PyObject *suffixes;
    unsigned char *tr = NULL;
    int tr_len;
    int len_text = INT_MAX;
    int start = 0;
    int i;

    Py_Get6Args("OO|ii"TYPECODE_TEXT_AND_LENGTH":suffix",
		text,suffixes,start,len_text,tr,tr_len);

    Py_Assert(PyString_Check(text),
	      PyExc_TypeError,
	      "first argument needs to be a string");
    Py_Assert(PyTuple_Check(suffixes),
	      PyExc_TypeError,
	      "second argument needs to be a tuple of strings");
    Py_CheckSlice(text,start,len_text);

    if (tr) {
	Py_Assert(tr_len == 256,
		  PyExc_TypeError,
		  "translate string must have 256 characters");
	for (i = 0; i < PyTuple_GET_SIZE(suffixes); i++) {
	    PyObject *suffix = PyTuple_GET_ITEM(suffixes,i);
	    int start_cmp;
	    register unsigned char *s;
	    register unsigned char *t;
	    register int j;

	    Py_AssertWithArg(PyString_Check(suffix),
			     PyExc_TypeError,
			     "tuple entry %i is not a string",i);
	    start_cmp = len_text - PyString_GET_SIZE(suffix);
	    if (start_cmp < start)
		continue;

	    /* Do the compare using a translate table */
	    s = (unsigned char *)PyString_AS_STRING(suffix);
	    t = (unsigned char *)PyString_AS_STRING(text) + start_cmp;
	    for (j = start_cmp; j < len_text; j++, s++, t++)
		if (*s != tr[*t])
		    break;
	    if (j == len_text) {
		Py_INCREF(suffix);
		return suffix;
	    }
	}
    }
    else
	for (i = 0; i < PyTuple_GET_SIZE(suffixes); i++) {
	    PyObject *suffix = PyTuple_GET_ITEM(suffixes,i);
	    int start_cmp;

	    Py_AssertWithArg(PyString_Check(suffix),
			     PyExc_TypeError,
			     "tuple entry %i is not a string",i);
	    start_cmp = len_text - PyString_GET_SIZE(suffix);
	    if (start_cmp < start)
		continue;

	    /* Compare without translate table */
	    if (PyString_AS_STRING(suffix)[0] == \
		PyString_AS_STRING(text)[start_cmp]
		&&
		strncmp(PyString_AS_STRING(suffix),
			&PyString_AS_STRING(text)[start_cmp],
			PyString_GET_SIZE(suffix)) == 0) {
		Py_INCREF(suffix);
		return suffix;
	    }
	}

    Py_ReturnNone();
    
 onError:
    return NULL;
}

Py_C_Function( mxTextTools_prefix,
	       "prefix(text,prefixes,start=0,stop=len(text)[,translate])\n\n"
	       "Looks at text[start:stop] and returns the first matching\n"
	       "prefix out of the tuple of strings given in prefixes.\n"
	       "If no prefix is found to be matching, None is returned.\n"
	       "The optional 256 char translate string is used to translate\n"
	       "the text prior to comparing it with the given suffixes."
)
{
    PyObject *text;
    PyObject *prefixes;
    unsigned char *tr = NULL;
    int tr_len;
    int len_text = INT_MAX;
    int start = 0;
    int i;

    Py_Get6Args("OO|ii"TYPECODE_TEXT_AND_LENGTH":prefix",
		text,prefixes,start,len_text,tr,tr_len);

    Py_Assert(PyString_Check(text),
	      PyExc_TypeError,
	      "first argument needs to be a string");
    Py_Assert(PyTuple_Check(prefixes),
	      PyExc_TypeError,
	      "second argument needs to be a tuple of strings");
    Py_CheckSlice(text,start,len_text);

    if (tr) {
	Py_Assert(tr_len == 256,
		  PyExc_TypeError,
		  "translate string must have 256 characters");
	for (i = 0; i < PyTuple_GET_SIZE(prefixes); i++) {
	    PyObject *prefix = PyTuple_GET_ITEM(prefixes,i);
	    int cmp_len;
	    register unsigned char *s;
	    register unsigned char *t;
	    register int j;

	    Py_AssertWithArg(PyString_Check(prefix),
			     PyExc_TypeError,
			     "tuple entry %i is not a string",i);
	    cmp_len = PyString_GET_SIZE(prefix);
	    if (start + cmp_len > len_text)
		continue;

	    /* Do the compare using a translate table */
	    s = (unsigned char *)PyString_AS_STRING(prefix);
	    t = (unsigned char *)PyString_AS_STRING(text) + start;
	    for (j = 0; j < cmp_len; j++, s++, t++)
		if (*s != tr[*t])
		    break;
	    if (j == cmp_len) {
		Py_INCREF(prefix);
		return prefix;
	    }
	}
    }
    else
	for (i = 0; i < PyTuple_GET_SIZE(prefixes); i++) {
	    PyObject *prefix = PyTuple_GET_ITEM(prefixes,i);

	    Py_AssertWithArg(PyString_Check(prefix),
			     PyExc_TypeError,
			     "tuple entry %i is not a string",i);
	    if (start + PyString_GET_SIZE(prefix) > len_text)
		continue;

	    /* Compare without translate table */
	    if (PyString_AS_STRING(prefix)[0] == \
		PyString_AS_STRING(text)[start]
		&&
		strncmp(PyString_AS_STRING(prefix),
			&PyString_AS_STRING(text)[start],
			PyString_GET_SIZE(prefix)) == 0) {
		Py_INCREF(prefix);
		return prefix;
	    }
	}

    Py_ReturnNone();
    
 onError:
    return NULL;
}

Py_C_Function( mxTextTools_set,
	       "set(string,logic=1)\n\n"
	       "Returns a character set for string: a bit encoded version\n"
	       "of the characters occurring in string.\n"
	       "- logic can be set to 0 if all characters *not* in string\n"
	       "  should go into the set")
{
    PyObject *sto;
    char *s,*st;
    int len_s;
    int logic = 1;
    int i;

    Py_Get3Args(TYPECODE_TEXT_AND_LENGTH"|i:set",
		s,len_s,logic);

    sto = PyString_FromStringAndSize(NULL,32);
    if (sto == NULL)
	goto onError;
    
    st = PyString_AS_STRING(sto);

    if (logic) {
	memset(st,0x00,32);
	for (i = 0; i < len_s; i++,s++) {
	    int j = (unsigned char)*s;
	    
	    st[j >> 3] |= 1 << (j & 7);
	}
    }
    else {
	memset(st,0xFF,32);
	for (i = 0; i < len_s; i++,s++) {
	    int j = (unsigned char)*s;
	    
	    st[j >> 3] &= ~(1 << (j & 7));
	}
    }
    return sto;

 onError:
    return NULL;
}

Py_C_Function( mxTextTools_setfind,
	       "setfind(text,set,start=0,stop=len(text))\n\n"
	       "Find the first occurence of any character from set in\n"
	       "text[start:stop]\n"
	       "- set must be a string obtained with set()"
)
{
    PyObject *text;
    PyObject *set;
    int len_text = INT_MAX;
    int start = 0;
    register int x;
    register char *tx;
    register unsigned char *setstr;
    
    Py_Get4Args("OO|ii:setfind",text,set,start,len_text);

    Py_Assert(PyString_Check(text),
	      PyExc_TypeError,
	      "first argument needs to be a string");
    Py_Assert(PyString_Check(set) && PyString_GET_SIZE(set) == 32,
	      PyExc_TypeError,
	      "second argument needs to be a set");
    Py_CheckSlice(text,start,len_text);

    x = start;
    tx = PyString_AS_STRING(text) + x;
    setstr = (unsigned char *)PyString_AS_STRING(set);

    for (;x < len_text; tx++, x++) 
	if (Py_CharInSet(*tx,setstr))
	    break;
    
    if (x == len_text)
	/* Not found */
	return PyInt_FromLong(-1L);
    else
	return PyInt_FromLong(x);

 onError:
    return NULL;
}

Py_C_Function( mxTextTools_setstrip,
	       "setstrip(text,set,start=0,stop=len(text),mode=0)\n\n"
	       "Strip all characters in text[start:stop] appearing in set.\n"
	       "mode indicates where to strip (<0: left; =0: left and right;\n"
	       ">0: right)."
	       "- set must be a string obtained with set()"
)
{
    PyObject *text;
    PyObject *set;
    int stop = INT_MAX;
    int start = 0;
    int mode = 0;
    
    Py_Get5Args("OO|iii:setstrip",text,set,start,stop,mode);

    Py_Assert(PyString_Check(text),
	      PyExc_TypeError,
	      "first argument needs to be a string");
    Py_Assert(PyString_Check(set) && PyString_GET_SIZE(set) == 32,
	      PyExc_TypeError,
	      "second argument needs to be a set");
    Py_CheckSlice(text,start,stop);

    return mxTextTools_Strip(PyString_AS_STRING(text),
			     PyString_AS_STRING(set),
			     start,stop,mode);

 onError:
    return NULL;
}

Py_C_Function( mxTextTools_setsplit,
	       "setsplit(text,set,start=0,stop=len(text))\n\n"
	       "Split text[start:stop] into substrings using set,\n"
	       "omitting the splitting parts and empty substrings.\n"
	       "- set must be a string obtained from set()"
)
{
    PyObject *text;
    PyObject *set;
    PyObject *list = 0;
    int len_text = INT_MAX;
    int start = 0;
    register int x;
    unsigned char *tx;
    unsigned char *setstr;
    int listitem = 0;
    int listsize = INITIAL_LIST_SIZE;

    Py_Get4Args("OO|ii:setsplit",text,set,start,len_text);

    Py_Assert(PyString_Check(text),
	      PyExc_TypeError,
	      "first argument needs to be a string");
    Py_Assert(PyString_Check(set) && PyString_GET_SIZE(set) == 32,
	      PyExc_TypeError,
	      "second argument needs to be a set");
    Py_CheckSlice(text,start,len_text);

    list = PyList_New(listsize);
    if (!list)
	goto onError;

    x = start;
    tx = (unsigned char *)PyString_AS_STRING(text);
    setstr = (unsigned char *)PyString_AS_STRING(set);

    while (x < len_text) {
	register int z;

	/* Skip all text not in set */
	z = x;
	for (;x < len_text; x++) {
	    register unsigned int c = tx[x];
	    register unsigned int block = setstr[c >> 3];
	    if (block && ((block & (1 << (c & 7))) != 0))
		break;
	}

	/* Append the slice to list if it is not empty */
	if (x > z) {
	    PyObject *s;
	    s = PyString_FromStringAndSize(&tx[z], x - z);
	    if (!s)
		goto onError;
	    if (listitem < listsize)
		PyList_SET_ITEM(list,listitem,s);
	    else {
		PyList_Append(list,s);
		Py_DECREF(s);
	    }
	    listitem++;
	}

	if (x == len_text)
	    break;

	/* Skip all text in set */
	for (;x < len_text; x++) {
	    register unsigned int c = tx[x];
	    register unsigned int block = setstr[c >> 3];
	    if (!block || ((block & (1 << (c & 7))) == 0))
		break;
	}
    }

    /* Resize list if necessary */
    if (listitem < listsize)
	PyList_SetSlice(list,listitem,listsize,(PyObject*)NULL);

    return list;
    
 onError:
    Py_XDECREF(list);
    return NULL;
}

Py_C_Function( mxTextTools_setsplitx,
	       "setsplitx(text,set,start=0,stop=len(text))\n\n"
	       "Split text[start:stop] into substrings using set, so\n"
	       "that every second entry consists only of characters in set.\n"
	       "- set must be a string obtained with set()"
)
{
    PyObject *text;
    PyObject *set;
    PyObject *list = 0;
    int len_text = INT_MAX;
    int start = 0;
    register int x;
    unsigned char *tx;
    unsigned char *setstr;
    int listitem = 0;
    int listsize = INITIAL_LIST_SIZE;

    Py_Get4Args("OO|ii:setsplit",text,set,start,len_text);

    Py_Assert(PyString_Check(text),
	      PyExc_TypeError,
	      "first argument needs to be a string");
    Py_Assert(PyString_Check(set) && PyString_GET_SIZE(set) == 32,
	      PyExc_TypeError,
	      "second argument needs to be a set");
    Py_CheckSlice(text,start,len_text);

    list = PyList_New(listsize);
    if (!list)
	goto onError;

    x = start;
    tx = (unsigned char *)PyString_AS_STRING(text);
    setstr = (unsigned char *)PyString_AS_STRING(set);

    while (x < len_text) {
	PyObject *s;
	register int z;

	/* Skip all text not in set */
	z = x;
	for (;x < len_text; x++) {
	    register unsigned int c = tx[x];
	    register unsigned int block = setstr[c >> 3];
	    if (block && ((block & (1 << (c & 7))) != 0))
		break;
	}

	/* Append the slice to list */
	s = PyString_FromStringAndSize(&tx[z], x - z);
	if (!s)
	    goto onError;
	if (listitem < listsize)
	    PyList_SET_ITEM(list,listitem,s);
	else {
	    PyList_Append(list,s);
	    Py_DECREF(s);
	}
	listitem++;

	if (x >= len_text)
	    break;

	/* Skip all text in set */
	z = x;
	for (;x < len_text; x++) {
	    register unsigned int c = tx[x];
	    register unsigned int block = setstr[c >> 3];
	    if (!block || ((block & (1 << (c & 7))) == 0))
		break;
	}

	/* Append the slice to list if it is not empty */
	s = PyString_FromStringAndSize(&tx[z], x - z);
	if (!s)
	    goto onError;
	if (listitem < listsize)
	    PyList_SET_ITEM(list,listitem,s);
	else {
	    PyList_Append(list,s);
	    Py_DECREF(s);
	}
	listitem++;
    }

    /* Resize list if necessary */
    if (listitem < listsize)
	PyList_SetSlice(list,listitem,listsize,(PyObject*)NULL);

    return list;
    
 onError:
    Py_XDECREF(list);
    return NULL;
}

Py_C_Function( mxTextTools_upper,
	       "upper(text)\n\n"
	       "Return text converted to upper case.")
{
    PyObject *text;
    PyObject *ntext;
    register unsigned char *s;
    register unsigned char *orig;
    register int i;
    unsigned char *tr;
    int	len;
    
    Py_GetArgObject(text);

    Py_Assert(PyString_Check(text),
	      PyExc_TypeError,
	      "expected a Python string");

    len = PyString_GET_SIZE(text);
    ntext = PyString_FromStringAndSize(NULL,len);
    if (!ntext)
	goto onError;
    
    /* Translate */
    tr = (unsigned char *)PyString_AS_STRING(mxTo_Upper);
    orig = (unsigned char *)PyString_AS_STRING(text);
    s = (unsigned char *)PyString_AS_STRING(ntext);
    for (i = 0; i < len; i++, s++, orig++)
	*s = tr[*orig];
    
    return ntext;
    
 onError:
    return NULL;
}

Py_C_Function( mxTextTools_lower,
	       "lower(text)\n\n"
	       "Return text converted to lower case.")
{
    PyObject *text;
    PyObject *ntext;
    register unsigned char *s;
    register unsigned char *orig;
    register int i;
    unsigned char *tr;
    int len;
    
    Py_GetArgObject(text);

    Py_Assert(PyString_Check(text),
	      PyExc_TypeError,
	      "expected a Python string");

    len = PyString_GET_SIZE(text);
    ntext = PyString_FromStringAndSize(NULL,len);
    if (!ntext)
	goto onError;
    
    /* Translate */
    tr = (unsigned char *)PyString_AS_STRING(mxTo_Lower);
    orig = (unsigned char *)PyString_AS_STRING(text);
    s = (unsigned char *)PyString_AS_STRING(ntext);
    for (i = 0; i < len; i++, s++, orig++)
	*s = tr[*orig];
    
    return ntext;
    
 onError:
    return NULL;
}

Py_C_Function( mxTextTools_str2hex,
	       "str2hex(text)\n\n"
	       "Return text converted to a string consisting of two byte\n"
	       "HEX values.")
{
    char *str;
    int len;
    
    Py_Get2Args(TYPECODE_TEXT_AND_LENGTH,str,len);

    return mxTextTools_HexStringFromString(str,len);
    
 onError:
    return NULL;
}

Py_C_Function( mxTextTools_hex2str,
	       "hex2str(text)\n\n"
	       "Return text interpreted as two byte HEX values converted\n"
	       "to a string.")
{
    char *str;
    int len;
    
    Py_Get2Args(TYPECODE_TEXT_AND_LENGTH,str,len);

    return mxTextTools_StringFromHexString(str,len);
    
 onError:
    return NULL;
}

/* --- module init --------------------------------------------------------- */

/* Python Method Table */

static PyMethodDef Module_methods[] =
{   
    Py_MethodListEntry("tag",mxTextTools_tag),
    Py_MethodListEntry("join",mxTextTools_join),
    Py_MethodListEntry("cmp",mxTextTools_cmp),
    Py_MethodListEntry("joinlist",mxTextTools_joinlist),
    Py_MethodListEntry("set",mxTextTools_set),
    Py_MethodListEntry("setfind",mxTextTools_setfind),
    Py_MethodListEntry("setsplit",mxTextTools_setsplit),
    Py_MethodListEntry("setsplitx",mxTextTools_setsplitx),
    Py_MethodListEntry("setstrip",mxTextTools_setstrip),
    Py_MethodListEntry("BMS",mxBMS_new),
#ifdef MXFASTSEARCH
    Py_MethodListEntry("FS",mxFS_new),
#endif
    Py_MethodListEntrySingleArg("upper",mxTextTools_upper),
    Py_MethodListEntrySingleArg("lower",mxTextTools_lower),
    Py_MethodListEntry("charsplit",mxTextTools_charsplit),
    Py_MethodListEntry("splitat",mxTextTools_splitat),
    Py_MethodListEntry("suffix",mxTextTools_suffix),
    Py_MethodListEntry("prefix",mxTextTools_prefix),
    Py_MethodListEntry("hex2str",mxTextTools_hex2str),
    Py_MethodListEntry("str2hex",mxTextTools_str2hex),
    {NULL,NULL} /* end of list */
};

/* Cleanup function */
static 
void mxTextToolsModule_Cleanup(void)
{
}

MX_EXPORT(void) 
     initmxTextTools(void)
{
    PyObject *module, *moddict;
    
    /* Init type objects */
    PyType_Init(mxBMS_Type);
#ifdef MXFASTSEARCH
    PyType_Init(mxFS_Type);
#endif

    /* create module */
    module = Py_InitModule4(MXTEXTTOOLS_MODULE, /* Module name */
			    Module_methods, /* Method list */
			    Module_docstring, /* Module doc-string */
			    (PyObject *)NULL, /* always pass this as *self */
			    PYTHON_API_VERSION); /* API Version */
    if (!module)
	goto onError;

    /* Register cleanup function */
    if (Py_AtExit(mxTextToolsModule_Cleanup))
	/* XXX what to do if we can't register that function ??? */;

    /* Add some symbolic constants to the module */
    moddict = PyModule_GetDict(module);
    PyDict_SetItemString(moddict, 
			 "__version__",
			 PyString_FromString(VERSION));

    mxTo_Upper = mxTo_Upper_New();
    PyDict_SetItemString(moddict, 
			 "to_upper",
			 mxTo_Upper);

    mxTo_Lower = mxTo_Lower_New();
    PyDict_SetItemString(moddict, 
			 "to_lower",
			 mxTo_Lower);
  
    /* Type objects */
    Py_INCREF(&mxBMS_Type);
    PyDict_SetItemString(moddict, "BMSType",
			 (PyObject *)&mxBMS_Type);
#ifdef MXFASTSEARCH
    Py_INCREF(&mxFS_Type);
    PyDict_SetItemString(moddict, "FSType",
			 (PyObject *)&mxFS_Type);
#endif

 onError:
    /* Check for errors and report them */
    if (PyErr_Occurred())
	Py_ReportModuleInitError(MXTEXTTOOLS_MODULE);
    return;
}
