Edinburgh Speech Tools  2.1-release
 All Classes Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
XML_Parser.cc
1  /************************************************************************/
2  /* */
3  /* Centre for Speech Technology Research */
4  /* University of Edinburgh, UK */
5  /* Copyright (c) 1996,1997 */
6  /* All Rights Reserved. */
7  /* */
8  /* Permission is hereby granted, free of charge, to use and distribute */
9  /* this software and its documentation without restriction, including */
10  /* without limitation the rights to use, copy, modify, merge, publish, */
11  /* distribute, sublicense, and/or sell copies of this work, and to */
12  /* permit persons to whom this work is furnished to do so, subject to */
13  /* the following conditions: */
14  /* 1. The code must retain the above copyright notice, this list of */
15  /* conditions and the following disclaimer. */
16  /* 2. Any modifications must be clearly marked as such. */
17  /* 3. Original authors' names are not deleted. */
18  /* 4. The authors' names are not used to endorse or promote products */
19  /* derived from this software without specific prior written */
20  /* permission. */
21  /* */
22  /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23  /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24  /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25  /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26  /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27  /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28  /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29  /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30  /* THIS SOFTWARE. */
31  /* */
32  /*************************************************************************/
33  /* */
34  /* Author: Richard Caley (rjc@cstr.ed.ac.uk) */
35  /* -------------------------------------------------------------------- */
36  /* Recursive descent parsing skeleton. */
37  /* */
38  /*************************************************************************/
39 
40 #include "EST_error.h"
41 #include "XML_Parser.h"
42 #include "rxp.h"
43 
45 {
46 }
47 
49 {
50  known_ids.add_item(id_pattern, directory);
51 }
52 
54 {
55  EST_Litem *p;
56 
57  for(p=known_ids.head(); p != 0; p= p->next())
58  {
59  EST_String re(known_ids.key(p).tostring());
60  EST_String &pattern = known_ids.val(p);
61 
62  list.append(re);
63  list.append(pattern);
64  }
65 }
66 
67 XML_Parser *XML_Parser_Class::make_parser(InputSource source, Entity ent, void *data)
68 {
69  return new XML_Parser(*this, source, ent, data);
70 }
71 
72 XML_Parser *XML_Parser_Class::make_parser(InputSource source, void *data)
73 {
74  return new XML_Parser(*this, source, NULL, data);
75 }
76 
77 
79  const EST_String desc,
80  void *data)
81 {
82  Entity ent = NewExternalEntity(0,0,strdup8(desc),0,0);
83 
84  FILE16 *input16=MakeFILE16FromFILE(input, "r");
85 
86  if (input16==NULL)
87  EST_sys_error("Can't open 16 bit '%s'", (const char *)desc);
88 
89  SetCloseUnderlying(input16, 0);
90 
91  return make_parser(NewInputSource(ent, input16), ent, data);
92 }
93 
94 
96  void *data)
97 {
98  return make_parser(input, "<ANONYMOUS>", data);
99 }
100 
101 
103  void *data)
104 {
105  if ( filename == "-" )
106  return make_parser(stdin, data);
107 
108  FILE *input = fopen(filename, "r");
109 
110  if (input==NULL)
111  EST_sys_error("Can't open '%s'", (const char *)filename);
112 
113  Entity ent = NewExternalEntity(0,0,strdup8(filename),0,0);
114 
115  FILE16 *input16=MakeFILE16FromFILE(input, "r");
116 
117  if (input16==NULL)
118  EST_sys_error("Can't open 16 bit '%s'", (const char *)filename);
119 
120  SetCloseUnderlying(input16, 1);
121 
122  return make_parser(NewInputSource(ent, input16), data);
123 }
124 
125 InputSource XML_Parser_Class::try_and_open(Entity ent)
126 
127 {
128  EST_String id = ent->publicid?ent->publicid:ent->systemid;
129  EST_Litem *p;
130 
131  int starts[EST_Regex_max_subexpressions];
132  int ends[EST_Regex_max_subexpressions];
133  for (p = known_ids.head(); p != 0; p = p->next())
134  {
135  EST_Regex &re = known_ids.key(p);
136  EST_String pattern(known_ids.val(p));
137 
138  if (id.matches(re, 0, starts, ends))
139  {
140  EST_String res(pattern);
141  res.subst(id, starts, ends);
142 
143  FILE *f;
144  FILE16 *f16;
145  if((f = fopen(res, "r")))
146  {
147  if(!(f16 = MakeFILE16FromFILE(f, "r")))
148  return 0;
149  SetCloseUnderlying(f16, 1);
150 
151  return NewInputSource(ent, f16);
152  }
153  }
154  }
155 
156  return EntityOpen(ent);
157 }
158 
159 
160 InputSource XML_Parser_Class::open_entity(Entity ent, void *arg)
161 {
162  XML_Parser *parser = (XML_Parser *)arg;
163 
164  return parser->open(ent);
165 }
166 
167 // Default do-nothing callbacks.
168 
170  XML_Parser &p,
171  void *data)
172 { (void)c; (void)p; (void)data; }
173 
175  XML_Parser &p,
176  void *data)
177 { (void)c; (void)p; (void)data; }
178 
180  XML_Parser &p,
181  void *data,
182  const char *name,
183  XML_Attribute_List &attributes)
184 { (void)c; (void)p; (void)data; (void)name; (void)attributes; }
185 
187  XML_Parser &p,
188  void *data,
189  const char *name,
190  XML_Attribute_List &attributes)
191 { (void)c; (void)p; (void)data; (void)name; (void)attributes;
192  element_open(c, p, data, name, attributes);
193  element_close(c, p, data, name);
194 }
195 
197  XML_Parser &p,
198  void *data,
199  const char *name)
200 { (void)c; (void)p; (void)data; (void)name; }
201 
203  XML_Parser &p,
204  void *data,
205  const char *chars)
206 { (void)c; (void)p; (void)data; (void)chars; }
207 
209  XML_Parser &p,
210  void *data,
211  const char *chars)
212 { (void)c; (void)p; (void)data; (void)chars; }
213 
215  XML_Parser &p,
216  void *data,
217  const char *instruction)
218 { (void)c; (void)p; (void)data; (void)instruction; }
219 
221  XML_Parser &p,
222  void *data)
223 { (void)c; (void)p; (void)data; }
224 
226 {
227  return p.get_error();
228 }
229 
231  XML_Parser &p,
232  void *data,
233  EST_String message)
234 {
235  if (p.current_bit != NULL)
236  p.current_bit->error_message = message;
237  error(c, p, data);
238 }
239 
240  /*************************************************************************/
241  /* */
242  /* An actual parser. */
243  /* */
244  /*************************************************************************/
245 
247  InputSource s,
248  Entity ent,
249  void *d)
250 {
251  pclass=&pc;
252  source=s;
253  initial_entity=ent;
254  data=d;
255  p = NewParser();
256  ParserSetEntityOpener(p, XML_Parser_Class::open_entity);
257  ParserSetFlag(p, ReturnDefaultedAttributes, 1);
258  ParserSetCallbackArg(p, (void *)this);
259 }
260 
262 {
263  if (initial_entity)
264  FreeEntity(initial_entity);
265  FreeDtd(p->dtd);
266  FreeParser(p);
267 }
268 
269 InputSource XML_Parser::open(Entity ent)
270 {
271  return pclass->try_and_open(ent);
272 }
273 
275 {
276 
277  if (p_track_context)
278  p_context.clear();
279 
280  if (ParserPush(p, source) == -1)
281  EST_error("XML Parser error in push");
282 
283  pclass->document_open(*pclass, *this, data);
284 
285  XBit bit;
286  while (1)
287  {
288  current_bit = bit = ReadXBit(p);
289  if (bit->type == XBIT_eof)
290  break;
291  else if (bit->type == XBIT_start || bit->type == XBIT_empty)
292  {
293  Attribute b;
294  XML_Attribute_List att(10);
295 
296  for (b=bit->attributes; b; b=b->next)
297  {
298  att.add_item(EST_String(b->definition->name), EST_String(b->value));
299  }
300 
301  if (bit->type == XBIT_start)
302  {
304  *this,
305  data,
306  bit->element_definition->name,
307  att
308  );
309  if (p_track_context)
310  {
311  EST_String nm(bit->element_definition->name);
312  p_context.push(nm);
313  }
314 
315  }
316  else
317  pclass->element(*pclass,
318  *this,
319  data,
320  bit->element_definition->name,
321  att
322  );
323  }
324  else if (bit->type == XBIT_end)
325  {
326  if (p_track_context)
327  p_context.pop();
328 
330  *this,
331  data,
332  bit->element_definition->name
333  );
334  }
335  else if (bit->type == XBIT_pcdata)
336  {
337  pclass->pcdata(*pclass,
338  *this,
339  data,
340  bit->pcdata_chars
341  );
342  }
343  else if (bit->type == XBIT_cdsect)
344  {
345  pclass->cdata(*pclass,
346  *this,
347  data,
348  bit->cdsect_chars
349  );
350  }
351  else if (bit->type == XBIT_pi)
352  {
354  *this,
355  data,
356  bit->pi_chars
357  );
358  }
359  else if (bit->type == XBIT_error)
360  {
361  pclass->error(*pclass,
362  *this,
363  data);
364  break;
365  }
366  else
367  {
368  // ignore it
369  }
370  FreeXBit(bit);
371  current_bit=NULL;
372  }
373 
374  if (current_bit!=NULL)
375  {
376  FreeXBit(bit);
377  current_bit=NULL;
378  }
379 
380  pclass->document_close(*pclass, *this, data);
381 }
382 
384 {
385  p_track_context=flag;
386 }
387 
389 {
390  p_track_contents=flag;
391 }
392 
393 
394 // Stolen from xmlparser.c, will need to be tweaked for internal rxp changes.
396 {
397  int linenum, charnum;
398  InputSource s;
399  XBit bit = current_bit;
400 
401  if (!bit)
402  return "No Parse In Progress";
403 
404  p_error_message =
406  bit->type == XBIT_error ? "Error" : "Warning",
407  ": ",
408  bit->error_message?bit->error_message:"non XML error"
409  );
410 
411  for(s=p->source; s; s=s->parent)
412  {
413  if(s->entity->name)
414  {
415  p_error_message += " in entity \"";
416  p_error_message += s->entity->name;
417  p_error_message += "\"";
418  }
419  else
420  p_error_message += " in unnamed entity";
421 
422  switch(SourceLineAndChar(s, &linenum, &charnum))
423  {
424  case 1:
425  p_error_message += EST_String::cat(" at line ",
426  EST_String::Number(linenum+1),
427  " char ",
428  EST_String::Number(charnum+1),
429  " of ");
430  break;
431  case 0:
432  p_error_message += EST_String::cat(" defined at line ",
433  EST_String::Number(linenum+1),
434  " char ",
435  EST_String::Number(charnum+1),
436  " of ");
437  break;
438  case -1:
439  p_error_message += " defined in ";
440  break;
441  }
442 
443  p_error_message += EntityDescription(s->entity);
444  p_error_message += "\n";
445  }
446 
447  return (const char *)p_error_message;
448 }
449 
451 {
452  return p_context.nth(n);
453 }
454 
XBit current_bit
The piece of markup being processed.
Definition: XML_Parser.h:259
void track_contents(bool flag)
Definition: XML_Parser.cc:388
const K & key(EST_Litem *ptr, int m=1) const
find key, reference by ptr
Definition: EST_TKVL.cc:201
int subst(EST_String source, int(&starts)[EST_Regex_max_subexpressions], int(&ends)[EST_Regex_max_subexpressions])
Substitute the result of a match into a string.
Definition: EST_String.cc:467
void clear(void)
Empty it out.
Definition: EST_TDeque.cc:140
static EST_String Number(int i, int base=10)
Build string from an integer.
Definition: EST_String.cc:1197
virtual void element_close(XML_Parser_Class &c, XML_Parser &p, void *data, const char *name)
Definition: XML_Parser.cc:196
virtual void pcdata(XML_Parser_Class &c, XML_Parser &p, void *data, const char *chars)
Definition: XML_Parser.cc:202
virtual void document_close(XML_Parser_Class &c, XML_Parser &p, void *data)
Definition: XML_Parser.cc:174
virtual void element_open(XML_Parser_Class &c, XML_Parser &p, void *data, const char *name, XML_Attribute_List &attributes)
Definition: XML_Parser.cc:179
A Regular expression class to go with the CSTR EST_String class.
Definition: EST_Regex.h:56
void registered_ids(EST_TList< EST_String > &list)
Definition: XML_Parser.cc:53
const char * get_error(XML_Parser &p)
Get the error message for the last error.
Definition: XML_Parser.cc:225
Entity initial_entity
Definition: XML_Parser.h:267
A specialised hash table for when the key is an EST_String.
Definition: EST_THash.h:284
~XML_Parser()
Destructor, may close input if required.
Definition: XML_Parser.cc:261
friend class XML_Parser
Definition: XML_Parser.h:231
Parser p
The RXP parser object.
Definition: XML_Parser.h:273
void register_id(EST_Regex id_pattern, EST_String directory)
Definition: XML_Parser.cc:48
static EST_String cat(const EST_String s1, const EST_String s2=Empty, const EST_String s3=Empty, const EST_String s4=Empty, const EST_String s5=Empty, const EST_String s6=Empty, const EST_String s7=Empty, const EST_String s8=Empty, const EST_String s9=Empty)
Definition: EST_String.cc:1082
void go()
Run the parser.
Definition: XML_Parser.cc:274
InputSource try_and_open(Entity ent)
Definition: XML_Parser.cc:125
static InputSource open_entity(Entity ent, void *arg)
Definition: XML_Parser.cc:160
XML_Parser(XML_Parser_Class &parent, InputSource source, Entity initial_entity, void *data)
Creator used by XML_Parser_Class::make_parser()
Definition: XML_Parser.cc:246
virtual void element(XML_Parser_Class &c, XML_Parser &p, void *data, const char *name, XML_Attribute_List &attributes)
Definition: XML_Parser.cc:186
void track_context(bool flag)
Definition: XML_Parser.cc:383
const char * get_error()
Get the error message for the last error.
Definition: XML_Parser.cc:395
EST_Litem * head() const
Return First key value pair in list.
Definition: EST_TKVL.h:102
virtual void cdata(XML_Parser_Class &c, XML_Parser &p, void *data, const char *chars)
Definition: XML_Parser.cc:208
XML_Parser_Class * pclass
Definition: XML_Parser.h:256
const V & val(const K &rkey, bool m=0) const
return value according to key (const)
Definition: EST_TKVL.cc:145
EST_String context(int n)
Definition: XML_Parser.cc:450
void append(const T &item)
add item onto end of list
Definition: EST_TList.h:198
int add_item(const K &key, const V &value, int no_search=0)
Add an entry to the table.
Definition: EST_THash.cc:167
InputSource open(Entity ent)
Open. Asks the parser class to do the work.
Definition: XML_Parser.cc:269
XML_Parser * make_parser(InputSource source, void *data)
Create a parser for the RXP InputSource.
Definition: XML_Parser.cc:72
int add_item(const K &rkey, const V &rval, int no_search=0)
add key-val pair to list
Definition: EST_TKVL.cc:248
InputSource source
Where we are reading from.
Definition: XML_Parser.h:262
virtual void error(XML_Parser_Class &c, XML_Parser &p, void *data)
Definition: XML_Parser.cc:220
virtual void processing(XML_Parser_Class &c, XML_Parser &p, void *data, const char *instruction)
Definition: XML_Parser.cc:214
EST_String tostring(void) const
Get the expression as a string.
Definition: EST_Regex.h:97
virtual void document_open(XML_Parser_Class &c, XML_Parser &p, void *data)
Definition: XML_Parser.cc:169
EST_TDeque< EST_String > p_context
If context is being tracked, this is a stack of element names.
Definition: XML_Parser.h:276