48 #include "EST_Token.h"
50 #include "EST_cutils.h"
51 #include "EST_error.h"
53 const EST_String EST_Token_Default_WhiteSpaceChars =
" \t\n\r";
54 const EST_String EST_Token_Default_SingleCharSymbols =
"(){}[]";
55 const EST_String EST_Token_Default_PrePunctuationSymbols =
"\"'`({[";
56 const EST_String EST_Token_Default_PunctuationSymbols =
"\"'`.,:;!?]})";
57 const EST_String Token_Origin_FD =
"existing file descriptor";
58 const EST_String Token_Origin_Stream =
"existing istream";
59 const EST_String Token_Origin_String =
"existing string";
61 static EST_Regex RXanywhitespace(
"[ \t\n\r]");
63 static inline char *check_extend_str_in(
char *str,
int pos,
int *max)
75 newstuff =
new char[*max];
76 strncpy(newstuff,str,pos);
84 #define check_extend_str(STR, POS, MAX) \
85 (((POS)>= *(MAX))?check_extend_str_in((STR),(POS),(MAX)):(STR))
87 ostream& operator<<(ostream& s,
const EST_Token &p)
89 s <<
"[TOKEN " << p.pname <<
"]";
98 p_filepos = a.p_filepos;
99 p_quoted = a.p_quoted;
118 EST_TokenStream::EST_TokenStream()
121 tok_wspace =
new char[tok_wspacelen];
123 tok_stuff =
new char[tok_stufflen];
124 tok_prepuncslen = 32;
125 tok_prepuncs =
new char[tok_prepuncslen];
134 cerr <<
"TokenStream: warning passing TokenStream not as reference"
150 void EST_TokenStream::default_values()
154 peeked_charp = FALSE;
159 WhiteSpaceChars = EST_Token_Default_WhiteSpaceChars;
169 if (type != tst_none)
171 delete [] tok_wspace;
173 delete [] tok_prepuncs;
179 s <<
"[TOKENSTREAM ";
183 cerr <<
"UNSET";
break;
185 cerr <<
"FILE";
break;
187 cerr <<
"PIPE";
break;
189 cerr <<
"ISTREAM";
break;
191 cerr <<
"STRING";
break;
193 cerr <<
"UNKNOWN" << endl;
202 if (type != tst_none)
205 fp = fopen(filename,
"rb");
208 cerr <<
"Cannot open file " << filename <<
" as tokenstream"
221 if (type != tst_none)
227 cerr <<
"Cannot absorb NULL filestream as tokenstream" << endl;
230 Origin = Token_Origin_FD;
233 close_at_end = close_when_finished;
241 if (type != tst_none)
245 Origin = Token_Origin_Stream;
255 if (type != tst_none)
258 buf = (
const char *)newbuffer;
259 buffer_length = newbuffer.
length();
260 buffer =
new char[buffer_length+1];
261 memmove(buffer,buf,buffer_length+1);
263 Origin = Token_Origin_String;
269 int EST_TokenStream::seek_end()
272 peeked_charp = FALSE;
278 cerr <<
"EST_TokenStream unset" << endl;
282 fseek(fp,0,SEEK_END);
283 p_filepos = ftell(fp);
286 cerr <<
"EST_TokenStream seek on pipe not supported" << endl;
290 is->seekg(0,is->end);
291 p_filepos = is->tellg();
298 cerr <<
"EST_TokenStream: unknown type" << endl;
307 peeked_charp = FALSE;
313 cerr <<
"EST_TokenStream unset" << endl;
317 p_filepos = position;
318 return fseek(fp,position,SEEK_SET);
320 cerr <<
"EST_TokenStream seek on pipe not supported" << endl;
324 p_filepos = position;
325 is->seekg(position, is->beg);
341 cerr <<
"EST_TokenStream: unknown type" << endl;
349 static int stdio_fread(
void *buff,
int size,
int nitems,FILE *fp)
352 return fread(buff,size,nitems,fp);
364 <<
" peeked into binary data" << endl;
368 peeked_charp = FALSE;
374 cerr <<
"EST_TokenStream unset" << endl;
378 items_read = stdio_fread(buff,(
size_t)size,(
size_t)nitems,fp);
379 p_filepos += items_read*size;
382 cerr <<
"EST_TokenStream fread pipe not yet supported" << endl;
386 is->read((
char*)buff, (
size_t) size*nitems);
387 return is->gcount()/size;
390 if ((buffer_length-pos)/size < nitems)
391 items_read = (buffer_length-pos)/size;
394 memcpy(buff,&buffer[pos],items_read*size);
395 pos += items_read*size;
398 cerr <<
"EST_TokenStream: unknown type" << endl;
427 cerr <<
"EST_TokenStream: unknown type" << endl;
432 peeked_charp = FALSE;
446 fp = freopen(Origin,
"rb",fp);
450 cerr <<
"EST_TokenStream: can't rewind pipe" << endl;
454 cerr <<
"EST_TokenStream: can't rewind istream" << endl;
460 cerr <<
"EST_TokenStream: unknown type" << endl;
465 peeked_charp = FALSE;
503 result += t.whitespace() + t.prepunctuation() +
504 t.string() + t.punctuation();
507 cerr <<
"EST_TokenStream: end of file when looking for \"" <<
526 result += t.whitespace() + t.prepunctuation();
529 result += quote_string(t.string());
531 result += t.string();
533 result += t.punctuation();
546 char *w = wstrdup(
peek().whitespace());
548 for (i=0; w[i] != 0; i++)
573 EST_error(
"Expected '%s' got '%s' at %s",
574 (
const char *)expected,
584 void EST_TokenStream::build_table()
590 for (i=0; i<256; ++i)
593 for (p=WhiteSpaceChars; *p; ++p)
594 if (p_table[c=(
unsigned char)*p])
595 EST_warning(
"Character '%c' has two classes, '%c' and '%c'",
600 for (p=SingleCharSymbols; *p; ++p)
601 if (p_table[c=(
unsigned char)*p])
602 EST_warning(
"Character '%c' has two classes, '%c' and '%c'",
603 *p, p_table[c],
'!');
607 for (p=PunctuationSymbols; *p; ++p)
608 if (p_table[c=(
unsigned char)*p] ==
'@')
611 EST_warning(
"Character '%c' has two classes, '%c' and '%c'",
612 *p, p_table[c],
'.');
616 for(p=PrePunctuationSymbols; *p; ++p)
617 if (p_table[c=(
unsigned char)*p] ==
'@')
619 else if (p_table[c] ==
'.')
622 EST_warning(
"Character '%c' has two classes, '%c' and '%c'",
623 *p, p_table[c],
'$');
630 inline int EST_TokenStream::getpeeked_internal(
void)
632 peeked_charp = FALSE;
637 int EST_TokenStream::getch_internal()
640 if (EST_TokenStream::peeked_charp)
642 return getpeeked_internal();
648 cerr <<
"EST_TokenStream unset" << endl;
655 if (stdio_fread(&lc,1,1,fp) == 0)
662 cerr <<
"EST_TokenStream pipe not yet supported" << endl;
669 if (pos < buffer_length)
672 return buffer[pos++];
677 cerr <<
"EST_TokenStream: unknown type" << endl;
684 int EST_TokenStream::getch(
void)
686 return getch_internal();
689 inline int EST_TokenStream::peekch_internal()
694 peeked_char = getch_internal();
700 int EST_TokenStream::peekch(
void)
702 return peekch_internal();
706 #define CLASS(C,CL) (p_table[(unsigned char)(C)]==(CL))
708 #define CLASS2(C,CL1,CL2) (p_table[(unsigned char)(C)]==(CL1)||p_table[(unsigned char)(C)]==(CL2))
724 for (i=0; (CLASS(c=getch_internal(),
' ') &&
727 if (c ==
'\n') linepos++;
728 tok_wspace = check_extend_str(tok_wspace,i,&tok_wspacelen);
731 tok_wspace[i] =
'\0';
743 ((c = getch_internal()) != EOF)
748 tok_stuff = check_extend_str(tok_stuff,i,&tok_stufflen);
750 c = getch_internal();
757 for (i=0,tok_stuff[i++]=c;
760 !CLASS(c=peekch_internal(),
' ') &&
764 tok_stuff = check_extend_str(tok_stuff,i,&tok_stufflen);
766 tok_stuff[i++] = getpeeked_internal();
772 ((j < i) && CLASS2(tok_stuff[j],
'$',
'"'));
774 if ((j > 0) && (j < i))
776 tok_prepuncs = check_extend_str(tok_prepuncs,j+1,&tok_prepuncslen);
777 memmove(tok_prepuncs,tok_stuff,j);
778 tok_prepuncs[j] =
'\0';
790 ((j > 0) && CLASS2(word[j],
'.',
'"'));
792 if (word[j+1] !=
'\0')
801 if (tok_wspace[0] ==
'\0')
823 if ((
peek().whitespace().contains(
"\n")) ||
eof())
849 quoted[0] = quote(0);
850 for (i=1,j=0; j < s.
length(); j++,i++)
852 if (s(j) == quote(0))
853 quoted[i++] = escape(0);
854 else if (s(j) == escape(0))
855 quoted[i++] = escape(0);
858 quoted[i++] = quote(0);
860 quoted_form = quoted;
~EST_TokenStream()
will close file if appropriate for type
const EST_String pos_description() const
A string describing current position, suitable for error messages.
void set_prepunctuation(const EST_String &p)
set prepunction
A Regular expression class to go with the CSTR EST_String class.
void close(void)
Close stream.
const EST_String filename() const
The originating filename (if there is one)
EST_String itoString(int n)
Make a EST_String object from an integer.
void set_punctuation(const EST_String &p)
set (post) punctuation of token.
int fread(void *buff, int size, int nitems)
Reading binary data, (don't use peek() immediately beforehand)
int open(const EST_String &filename)
open a EST_TokenStream for a file.
int open_string(const EST_String &newbuffer)
open a EST_TokenStream for string rather than a file
void set_token(const EST_String &p)
set token from a string
int restart(void)
Reset to start of file/string.
void set_filepos(int c)
Set file position in original EST_TokenStream.
EST_Token & peek(void)
peek at next token
int length(void) const
Length of string ({not} length of underlying chunk)
EST_Token & get()
get next token in stream
EST_Token get_upto(const EST_String &s)
get up to s in stream as a single token.
int contains(const char *s, int pos=-1) const
Does it contain this substring?
const EST_String pos_description()
A string describing current position, suitable for error messages.
EST_Token get_upto_eoln(void)
get up to s in end of line as a single token.
int seek(int position)
seek, reposition file pointer
void set_whitespace(const EST_String &p)
set whitespace of token.
static const EST_String Empty
Constant empty string.
void set_quoted(int q)
Note that this token was quoted (or not)
Utility EST_String Functions header file.