| Package pyparsing ::
        Module pyparsing |  | 
   1   
   2   
   3   
   4   
   5   
   6   
   7   
   8   
   9   
  10   
  11   
  12   
  13   
  14   
  15   
  16   
  17   
  18   
  19   
  20   
  21   
  22   
  23   
  24   
  25   
  26  __doc__ = \ 
  27  """ 
  28  pyparsing module - Classes and methods to define and execute parsing grammars 
  29   
  30  The pyparsing module is an alternative approach to creating and executing simple grammars, 
  31  vs. the traditional lex/yacc approach, or the use of regular expressions.  With pyparsing, you 
  32  don't need to learn a new syntax for defining grammars or matching expressions - the parsing module 
  33  provides a library of classes that you use to construct the grammar directly in Python. 
  34   
  35  Here is a program to parse "Hello, World!" (or any greeting of the form "<salutation>, <addressee>!"):: 
  36   
  37      from pyparsing import Word, alphas 
  38   
  39      # define grammar of a greeting 
  40      greet = Word( alphas ) + "," + Word( alphas ) + "!" 
  41   
  42      hello = "Hello, World!" 
  43      print hello, "->", greet.parseString( hello ) 
  44   
  45  The program outputs the following:: 
  46   
  47      Hello, World! -> ['Hello', ',', 'World', '!'] 
  48   
  49  The Python representation of the grammar is quite readable, owing to the self-explanatory 
  50  class names, and the use of '+', '|' and '^' operators. 
  51   
  52  The parsed results returned from parseString() can be accessed as a nested list, a dictionary, or an 
  53  object with named attributes. 
  54   
  55  The pyparsing module handles some of the problems that are typically vexing when writing text parsers: 
  56   - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello  ,  World  !", etc.) 
  57   - quoted strings 
  58   - embedded comments 
  59  """ 
  60   
  61  __version__ = "1.5.0" 
  62  __versionTime__ = "28 May 2008 10:05" 
  63  __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>" 
  64   
  65  import string 
  66  from weakref import ref as wkref 
  67  import copy,sys 
  68  import warnings 
  69  import re 
  70  import sre_constants 
  71  import xml.sax.saxutils 
  72   
  73   
  74  __all__ = [ 
  75  'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty', 
  76  'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal', 
  77  'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or', 
  78  'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException', 
  79  'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException', 
  80  'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', 'Upcase', 
  81  'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', 
  82  'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col', 
  83  'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString', 
  84  'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'getTokensEndLoc', 'hexnums', 
  85  'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno', 
  86  'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral', 
  87  'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables', 
  88  'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity', 
  89  'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd', 
  90  'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute', 
  91  'indentedBlock', 
  92  ] 
  93   
  94   
  95  """ 
  96  Detect if we are running version 3.X and make appropriate changes 
  97  Robert A. Clark 
  98  """ 
  99  if sys.version_info[0] > 2: 
 100      _PY3K = True 
 101      _MAX_INT = sys.maxsize 
 102      basestring = str 
 103  else: 
 104      _PY3K = False 
 105      _MAX_INT = sys.maxint 
 106   
 107  if not _PY3K: 
 109          """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries 
 110             str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It 
 111             then < returns the unicode object | encodes it with the default encoding | ... >. 
 112          """ 
 113          try: 
 114               
 115               
 116              return str(obj) 
 117   
 118          except UnicodeEncodeError: 
 119               
 120               
 121               
 122               
 123               
 124              return unicode(obj) 
  125               
 126               
 127               
 128               
 129               
 130               
 131  else: 
 132      _ustr = str 
 133   
 135      return dict( [(c,0) for c in strg] ) 
  136       
 137   
 140   
 141  if not _PY3K: 
 142      alphas     = string.lowercase + string.uppercase 
 143  else: 
 144      alphas     = string.ascii_lowercase + string.ascii_uppercase 
 145  nums       = string.digits 
 146  hexnums    = nums + "ABCDEFabcdef" 
 147  alphanums  = alphas + nums 
 148  _bslash = "\\" 
 149  printables = "".join( [ c for c in string.printable if c not in string.whitespace ] ) 
 150   
 152      """base exception class for all parsing runtime exceptions""" 
 153      __slots__ = ( "loc","msg","pstr","parserElement" ) 
 154       
 155       
 156 -    def __init__( self, pstr, loc=0, msg=None, elem=None ): 
  165   
 167          """supported attributes by name are: 
 168              - lineno - returns the line number of the exception text 
 169              - col - returns the column number of the exception text 
 170              - line - returns the line containing the exception text 
 171          """ 
 172          if( aname == "lineno" ): 
 173              return lineno( self.loc, self.pstr ) 
 174          elif( aname in ("col", "column") ): 
 175              return col( self.loc, self.pstr ) 
 176          elif( aname == "line" ): 
 177              return line( self.loc, self.pstr ) 
 178          else: 
 179              raise AttributeError(aname) 
  180   
 182          return "%s (at char %d), (line:%d, col:%d)" % \ 
 183                  ( self.msg, self.loc, self.lineno, self.column ) 
   196   
 198      """exception thrown when parse expressions don't match class; 
 199         supported attributes by name are: 
 200          - lineno - returns the line number of the exception text 
 201          - col - returns the column number of the exception text 
 202          - line - returns the line containing the exception text 
 203      """ 
 204      pass 
  205   
 207      """user-throwable exception thrown when inconsistent parse content 
 208         is found; stops all parsing immediately""" 
 209      pass 
  210   
 212      """just like ParseFatalException, but thrown internally when an 
 213         ErrorStop indicates that parsing is to stop immediately because 
 214         an unbacktrackable syntax error has been found""" 
  218   
 219   
 220       
 221          
 222           
 223           
 224          
 225          
 226          
 227          
 228       
 229           
 230           
 231   
 233      """exception thrown by validate() if the grammar could be improperly recursive""" 
 234 -    def __init__( self, parseElementList ): 
  235          self.parseElementTrace = parseElementList 
  236   
 238          return "RecursiveGrammarException: %s" % self.parseElementTrace 
   239   
 246          return repr(self.tup) 
  249      """Structured parse results, to provide multiple means of access to the parsed data: 
 250         - as a list (len(results)) 
 251         - by list index (results[0], results[1], etc.) 
 252         - by attribute (results.<resultsName>) 
 253         """ 
 254      __slots__ = ( "__toklist", "__tokdict", "__doinit", "__name", "__parent", "__accumNames", "__weakref__" ) 
 255 -    def __new__(cls, toklist, name=None, asList=True, modal=True ): 
  256          if isinstance(toklist, cls): 
 257              return toklist 
 258          retobj = object.__new__(cls) 
 259          retobj.__doinit = True 
 260          return retobj 
  261   
 262       
 263       
 264 -    def __init__( self, toklist, name=None, asList=True, modal=True ): 
  265          if self.__doinit: 
 266              self.__doinit = False 
 267              self.__name = None 
 268              self.__parent = None 
 269              self.__accumNames = {} 
 270              if isinstance(toklist, list): 
 271                  self.__toklist = toklist[:] 
 272              else: 
 273                  self.__toklist = [toklist] 
 274              self.__tokdict = dict() 
 275   
 276           
 277           
 278   
 279          if name: 
 280              if not modal: 
 281                  self.__accumNames[name] = 0 
 282              if isinstance(name,int): 
 283                  name = _ustr(name)  
 284              self.__name = name 
 285              if not toklist in (None,'',[]): 
 286                  if isinstance(toklist,basestring): 
 287                      toklist = [ toklist ] 
 288                  if asList: 
 289                      if isinstance(toklist,ParseResults): 
 290                          self[name] = _ParseResultsWithOffset(toklist.copy(),-1) 
 291                      else: 
 292                          self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),-1) 
 293                      self[name].__name = name 
 294                  else: 
 295                      try: 
 296                          self[name] = toklist[0] 
 297                      except (KeyError,TypeError): 
 298                          self[name] = toklist 
  299   
 301          if isinstance( i, (int,slice) ): 
 302              return self.__toklist[i] 
 303          else: 
 304              if i not in self.__accumNames: 
 305                  return self.__tokdict[i][-1][0] 
 306              else: 
 307                  return ParseResults([ v[0] for v in self.__tokdict[i] ]) 
  308   
 310          if isinstance(v,_ParseResultsWithOffset): 
 311              self.__tokdict[k] = self.__tokdict.get(k,list()) + [v] 
 312              sub = v[0] 
 313          elif isinstance(k,int): 
 314              self.__toklist[k] = v 
 315              sub = v 
 316          else: 
 317              self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)] 
 318              sub = v 
 319          if isinstance(sub,ParseResults): 
 320              sub.__parent = wkref(self) 
  321   
 323          if isinstance(i,(int,slice)): 
 324              mylen = len( self.__toklist ) 
 325              del self.__toklist[i] 
 326   
 327               
 328              if isinstance(i, int): 
 329                  if i < 0: 
 330                      i += mylen 
 331                  i = slice(i, i+1) 
 332               
 333              removed = list(range(*i.indices(mylen))) 
 334              removed.reverse() 
 335               
 336              for name in self.__tokdict: 
 337                  occurrences = self.__tokdict[name] 
 338                  for j in removed: 
 339                      for k, (value, position) in enumerate(occurrences): 
 340                          occurrences[k] = _ParseResultsWithOffset(value, position - (position > j)) 
 341          else: 
 342              del self.__tokdict[i] 
  343   
 345          return k in self.__tokdict 
  346   
 347 -    def __len__( self ): return len( self.__toklist ) 
  348 -    def __bool__(self): return len( self.__toklist ) > 0 
  349      __nonzero__ = __bool__ 
 350 -    def __iter__( self ): return iter( self.__toklist ) 
  351 -    def __reversed__( self ): return iter( reversed(self.__toklist) ) 
  353          """Returns all named result keys.""" 
 354          return self.__tokdict.keys() 
  355   
 356 -    def pop( self, index=-1 ): 
  357          """Removes and returns item at specified index (default=last). 
 358             Will work with either numeric indices or dict-key indicies.""" 
 359          ret = self[index] 
 360          del self[index] 
 361          return ret 
  362   
 363 -    def get(self, key, defaultValue=None): 
  364          """Returns named result matching the given key, or if there is no 
 365             such name, then returns the given defaultValue or None if no 
 366             defaultValue is specified.""" 
 367          if key in self: 
 368              return self[key] 
 369          else: 
 370              return defaultValue 
  371   
 372 -    def insert( self, index, insStr ): 
  373          self.__toklist.insert(index, insStr) 
 374           
 375          for name in self.__tokdict: 
 376              occurrences = self.__tokdict[name] 
 377              for k, (value, position) in enumerate(occurrences): 
 378                  occurrences[k] = _ParseResultsWithOffset(value, position + (position > j)) 
  379   
 381          """Returns all named result keys and values as a list of tuples.""" 
 382          return [(k,self[k]) for k in self.__tokdict] 
  383   
 385          """Returns all named result values.""" 
 386          return [ v[-1][0] for v in self.__tokdict.values() ] 
  387   
 389          if name not in self.__slots__: 
 390              if name in self.__tokdict: 
 391                  if name not in self.__accumNames: 
 392                      return self.__tokdict[name][-1][0] 
 393                  else: 
 394                      return ParseResults([ v[0] for v in self.__tokdict[name] ]) 
 395              else: 
 396                  return "" 
 397          return None 
  398   
 400          ret = self.copy() 
 401          ret += other 
 402          return ret 
  403   
 405          if other.__tokdict: 
 406              offset = len(self.__toklist) 
 407              addoffset = ( lambda a: (a<0 and offset) or (a+offset) ) 
 408              otheritems = other.__tokdict.items() 
 409              otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) ) 
 410                                  for (k,vlist) in otheritems for v in vlist] 
 411              for k,v in otherdictitems: 
 412                  self[k] = v 
 413                  if isinstance(v[0],ParseResults): 
 414                      v[0].__parent = wkref(self) 
 415          self.__toklist += other.__toklist 
 416          self.__accumNames.update( other.__accumNames ) 
 417          del other 
 418          return self 
  419   
 421          return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) ) 
  422   
 424          out = "[" 
 425          sep = "" 
 426          for i in self.__toklist: 
 427              if isinstance(i, ParseResults): 
 428                  out += sep + _ustr(i) 
 429              else: 
 430                  out += sep + repr(i) 
 431              sep = ", " 
 432          out += "]" 
 433          return out 
  434   
 436          out = [] 
 437          for item in self.__toklist: 
 438              if out and sep: 
 439                  out.append(sep) 
 440              if isinstance( item, ParseResults ): 
 441                  out += item._asStringList() 
 442              else: 
 443                  out.append( _ustr(item) ) 
 444          return out 
  445   
 447          """Returns the parse results as a nested list of matching tokens, all converted to strings.""" 
 448          out = [] 
 449          for res in self.__toklist: 
 450              if isinstance(res,ParseResults): 
 451                  out.append( res.asList() ) 
 452              else: 
 453                  out.append( res ) 
 454          return out 
  455   
 457          """Returns the named parse results as dictionary.""" 
 458          return dict( self.items() ) 
  459   
 461          """Returns a new copy of a ParseResults object.""" 
 462          ret = ParseResults( self.__toklist ) 
 463          ret.__tokdict = self.__tokdict.copy() 
 464          ret.__parent = self.__parent 
 465          ret.__accumNames.update( self.__accumNames ) 
 466          ret.__name = self.__name 
 467          return ret 
  468   
 469 -    def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ): 
  470          """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.""" 
 471          nl = "\n" 
 472          out = [] 
 473          namedItems = dict( [ (v[1],k) for (k,vlist) in self.__tokdict.items() 
 474                                                              for v in vlist ] ) 
 475          nextLevelIndent = indent + "  " 
 476   
 477           
 478          if not formatted: 
 479              indent = "" 
 480              nextLevelIndent = "" 
 481              nl = "" 
 482   
 483          selfTag = None 
 484          if doctag is not None: 
 485              selfTag = doctag 
 486          else: 
 487              if self.__name: 
 488                  selfTag = self.__name 
 489   
 490          if not selfTag: 
 491              if namedItemsOnly: 
 492                  return "" 
 493              else: 
 494                  selfTag = "ITEM" 
 495   
 496          out += [ nl, indent, "<", selfTag, ">" ] 
 497   
 498          worklist = self.__toklist 
 499          for i,res in enumerate(worklist): 
 500              if isinstance(res,ParseResults): 
 501                  if i in namedItems: 
 502                      out += [ res.asXML(namedItems[i], 
 503                                          namedItemsOnly and doctag is None, 
 504                                          nextLevelIndent, 
 505                                          formatted)] 
 506                  else: 
 507                      out += [ res.asXML(None, 
 508                                          namedItemsOnly and doctag is None, 
 509                                          nextLevelIndent, 
 510                                          formatted)] 
 511              else: 
 512                   
 513                  resTag = None 
 514                  if i in namedItems: 
 515                      resTag = namedItems[i] 
 516                  if not resTag: 
 517                      if namedItemsOnly: 
 518                          continue 
 519                      else: 
 520                          resTag = "ITEM" 
 521                  xmlBodyText = xml.sax.saxutils.escape(_ustr(res)) 
 522                  out += [ nl, nextLevelIndent, "<", resTag, ">", 
 523                                                  xmlBodyText, 
 524                                                  "</", resTag, ">" ] 
 525   
 526          out += [ nl, indent, "</", selfTag, ">" ] 
 527          return "".join(out) 
  528   
 530          for k,vlist in self.__tokdict.items(): 
 531              for v,loc in vlist: 
 532                  if sub is v: 
 533                      return k 
 534          return None 
  535   
 537          """Returns the results name for this token expression.""" 
 538          if self.__name: 
 539              return self.__name 
 540          elif self.__parent: 
 541              par = self.__parent() 
 542              if par: 
 543                  return par.__lookup(self) 
 544              else: 
 545                  return None 
 546          elif (len(self) == 1 and 
 547                 len(self.__tokdict) == 1 and 
 548                 self.__tokdict.values()[0][0][1] in (0,-1)): 
 549              return self.__tokdict.keys()[0] 
 550          else: 
 551              return None 
  552   
 553 -    def dump(self,indent='',depth=0): 
  554          """Diagnostic method for listing out the contents of a ParseResults. 
 555             Accepts an optional indent argument so that this string can be embedded 
 556             in a nested display of other data.""" 
 557          out = [] 
 558          out.append( indent+_ustr(self.asList()) ) 
 559          keys = self.items() 
 560          keys.sort() 
 561          for k,v in keys: 
 562              if out: 
 563                  out.append('\n') 
 564              out.append( "%s%s- %s: " % (indent,('  '*depth), k) ) 
 565              if isinstance(v,ParseResults): 
 566                  if v.keys(): 
 567                       
 568                      out.append( v.dump(indent,depth+1) ) 
 569                       
 570                  else: 
 571                      out.append(_ustr(v)) 
 572              else: 
 573                  out.append(_ustr(v)) 
 574           
 575          return "".join(out) 
  576   
 577       
 579          return ( self.__toklist, 
 580                   ( self.__tokdict.copy(), 
 581                     self.__parent is not None and self.__parent() or None, 
 582                     self.__accumNames, 
 583                     self.__name ) ) 
  584   
 586          self.__toklist = state[0] 
 587          self.__tokdict, \ 
 588          par, \ 
 589          inAccumNames, \ 
 590          self.__name = state[1] 
 591          self.__accumNames = {} 
 592          self.__accumNames.update(inAccumNames) 
 593          if par is not None: 
 594              self.__parent = wkref(par) 
 595          else: 
 596              self.__parent = None 
   597   
 598   
 600      """Returns current column within a string, counting newlines as line separators. 
 601     The first column is number 1. 
 602   
 603     Note: the default parsing behavior is to expand tabs in the input string 
 604     before starting the parsing process.  See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 
 605     on parsing strings containing <TAB>s, and suggested methods to maintain a 
 606     consistent view of the parsed string, the parse location, and line and column 
 607     positions within the parsed string. 
 608     """ 
 609      return (loc<len(strg) and strg[loc] == '\n') and 1 or loc - strg.rfind("\n", 0, loc) 
  610   
 612      """Returns current line number within a string, counting newlines as line separators. 
 613     The first line is number 1. 
 614   
 615     Note: the default parsing behavior is to expand tabs in the input string 
 616     before starting the parsing process.  See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 
 617     on parsing strings containing <TAB>s, and suggested methods to maintain a 
 618     consistent view of the parsed string, the parse location, and line and column 
 619     positions within the parsed string. 
 620     """ 
 621      return strg.count("\n",0,loc) + 1 
  622   
 623 -def line( loc, strg ): 
  624      """Returns the line of text containing loc within a string, counting newlines as line separators. 
 625         """ 
 626      lastCR = strg.rfind("\n", 0, loc) 
 627      nextCR = strg.find("\n", loc) 
 628      if nextCR > 0: 
 629          return strg[lastCR+1:nextCR] 
 630      else: 
 631          return strg[lastCR+1:] 
  632   
 634      print ("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )) 
  635   
 637      print ("Matched " + _ustr(expr) + " -> " + str(toks.asList())) 
  638   
 640      print ("Exception raised:" + _ustr(exc)) 
  641   
 643      """'Do-nothing' debug action, to suppress debugging output during parsing.""" 
 644      pass 
  645   
 647      """Abstract base level parser element class.""" 
 648      DEFAULT_WHITE_CHARS = " \n\t\r" 
 649   
 654      setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars) 
 655   
 657          self.parseAction = list() 
 658          self.failAction = None 
 659           
 660          self.strRepr = None 
 661          self.resultsName = None 
 662          self.saveAsList = savelist 
 663          self.skipWhitespace = True 
 664          self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 
 665          self.copyDefaultWhiteChars = True 
 666          self.mayReturnEmpty = False  
 667          self.keepTabs = False 
 668          self.ignoreExprs = list() 
 669          self.debug = False 
 670          self.streamlined = False 
 671          self.mayIndexError = True  
 672          self.errmsg = "" 
 673          self.modalResults = True  
 674          self.debugActions = ( None, None, None )  
 675          self.re = None 
 676          self.callPreparse = True  
 677          self.callDuringTry = False 
  678   
 680          """Make a copy of this ParserElement.  Useful for defining different parse actions 
 681             for the same parsing pattern, using copies of the original parse element.""" 
 682          cpy = copy.copy( self ) 
 683          cpy.parseAction = self.parseAction[:] 
 684          cpy.ignoreExprs = self.ignoreExprs[:] 
 685          if self.copyDefaultWhiteChars: 
 686              cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 
 687          return cpy 
  688   
 690          """Define name for this expression, for use in debugging.""" 
 691          self.name = name 
 692          self.errmsg = "Expected " + self.name 
 693          if hasattr(self,"exception"): 
 694              self.exception.msg = self.errmsg 
 695          return self 
  696   
 698          """Define name for referencing matching tokens as a nested attribute 
 699             of the returned parse results. 
 700             NOTE: this returns a *copy* of the original ParserElement object; 
 701             this is so that the client can define a basic element, such as an 
 702             integer, and reference it in multiple places with different names. 
 703          """ 
 704          newself = self.copy() 
 705          newself.resultsName = name 
 706          newself.modalResults = not listAllMatches 
 707          return newself 
  708   
 710          """Method to invoke the Python pdb debugger when this element is 
 711             about to be parsed. Set breakFlag to True to enable, False to 
 712             disable. 
 713          """ 
 714          if breakFlag: 
 715              _parseMethod = self._parse 
 716              def breaker(instring, loc, doActions=True, callPreParse=True): 
 717                  import pdb 
 718                  pdb.set_trace() 
 719                  _parseMethod( instring, loc, doActions, callPreParse ) 
  720              breaker._originalParseMethod = _parseMethod 
 721              self._parse = breaker 
 722          else: 
 723              if hasattr(self._parse,"_originalParseMethod"): 
 724                  self._parse = self._parse._originalParseMethod 
 725          return self 
  726   
 728          """Internal method used to decorate parse actions that take fewer than 3 arguments, 
 729             so that all parse actions can be called as f(s,l,t).""" 
 730          STAR_ARGS = 4 
 731   
 732          try: 
 733              restore = None 
 734              if isinstance(f,type): 
 735                  restore = f 
 736                  f = f.__init__ 
 737              if not _PY3K: 
 738                  codeObj = f.func_code 
 739              else: 
 740                  codeObj = f.code 
 741              if codeObj.co_flags & STAR_ARGS: 
 742                  return f 
 743              numargs = codeObj.co_argcount 
 744              if not _PY3K: 
 745                  if hasattr(f,"im_self"): 
 746                      numargs -= 1 
 747              else: 
 748                  if hasattr(f,"__self__"): 
 749                      numargs -= 1 
 750              if restore: 
 751                  f = restore 
 752          except AttributeError: 
 753              try: 
 754                  if not _PY3K: 
 755                      call_im_func_code = f.__call__.im_func.func_code 
 756                  else: 
 757                      call_im_func_code = f.__code__ 
 758   
 759                   
 760                   
 761                  if call_im_func_code.co_flags & STAR_ARGS: 
 762                      return f 
 763                  numargs = call_im_func_code.co_argcount 
 764                  if not _PY3K: 
 765                      if hasattr(f.__call__,"im_self"): 
 766                          numargs -= 1 
 767                  else: 
 768                      if hasattr(f.__call__,"__self__"): 
 769                          numargs -= 0 
 770              except AttributeError: 
 771                  if not _PY3K: 
 772                      call_func_code = f.__call__.func_code 
 773                  else: 
 774                      call_func_code = f.__call__.__code__ 
 775                   
 776                  if call_func_code.co_flags & STAR_ARGS: 
 777                      return f 
 778                  numargs = call_func_code.co_argcount 
 779                  if not _PY3K: 
 780                      if hasattr(f.__call__,"im_self"): 
 781                          numargs -= 1 
 782                  else: 
 783                      if hasattr(f.__call__,"__self__"): 
 784                          numargs -= 1 
 785   
 786   
 787           
 788          if numargs == 3: 
 789              return f 
 790          else: 
 791              if numargs > 3: 
 792                  def tmp(s,l,t): 
 793                      return f(f.__call__.__self__, s,l,t) 
  794              if numargs == 2: 
 795                  def tmp(s,l,t): 
 796                      return f(l,t) 
 797              elif numargs == 1: 
 798                  def tmp(s,l,t): 
 799                      return f(t) 
 800              else:  
 801                  def tmp(s,l,t): 
 802                      return f() 
 803              try: 
 804                  tmp.__name__ = f.__name__ 
 805              except (AttributeError,TypeError): 
 806                   
 807                  pass 
 808              try: 
 809                  tmp.__doc__ = f.__doc__ 
 810              except (AttributeError,TypeError): 
 811                   
 812                  pass 
 813              try: 
 814                  tmp.__dict__.update(f.__dict__) 
 815              except (AttributeError,TypeError): 
 816                   
 817                  pass 
 818              return tmp 
 819      _normalizeParseActionArgs = staticmethod(_normalizeParseActionArgs) 
 820   
 822          """Define action to perform when successfully matching parse element definition. 
 823             Parse action fn is a callable method with 0-3 arguments, called as fn(s,loc,toks), 
 824             fn(loc,toks), fn(toks), or just fn(), where: 
 825              - s   = the original string being parsed (see note below) 
 826              - loc = the location of the matching substring 
 827              - toks = a list of the matched tokens, packaged as a ParseResults object 
 828             If the functions in fns modify the tokens, they can return them as the return 
 829             value from fn, and the modified list of tokens will replace the original. 
 830             Otherwise, fn does not need to return any value. 
 831   
 832             Note: the default parsing behavior is to expand tabs in the input string 
 833             before starting the parsing process.  See L{I{parseString}<parseString>} for more information 
 834             on parsing strings containing <TAB>s, and suggested methods to maintain a 
 835             consistent view of the parsed string, the parse location, and line and column 
 836             positions within the parsed string. 
 837             """ 
 838          self.parseAction = list(map(self._normalizeParseActionArgs, list(fns))) 
 839          self.callDuringTry = ("callDuringTry" in kwargs and kwargs["callDuringTry"]) 
 840          return self 
  841   
 843          """Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}.""" 
 844          self.parseAction += list(map(self._normalizeParseActionArgs, list(fns))) 
 845          self.callDuringTry = self.callDuringTry or ("callDuringTry" in kwargs and kwargs["callDuringTry"]) 
 846          return self 
  847   
 849          """Define action to perform if parsing fails at this expression. 
 850             Fail acton fn is a callable function that takes the arguments 
 851             fn(s,loc,expr,err) where: 
 852              - s = string being parsed 
 853              - loc = location where expression match was attempted and failed 
 854              - expr = the parse expression that failed 
 855              - err = the exception thrown 
 856             The function returns no value.  It may throw ParseFatalException 
 857             if it is desired to stop parsing immediately.""" 
 858          self.failAction = fn 
 859          return self 
  860   
 862          exprsFound = True 
 863          while exprsFound: 
 864              exprsFound = False 
 865              for e in self.ignoreExprs: 
 866                  try: 
 867                      while 1: 
 868                          loc,dummy = e._parse( instring, loc ) 
 869                          exprsFound = True 
 870                  except ParseException: 
 871                      pass 
 872          return loc 
  873   
 875          if self.ignoreExprs: 
 876              loc = self._skipIgnorables( instring, loc ) 
 877   
 878          if self.skipWhitespace: 
 879              wt = self.whiteChars 
 880              instrlen = len(instring) 
 881              while loc < instrlen and instring[loc] in wt: 
 882                  loc += 1 
 883   
 884          return loc 
  885   
 886 -    def parseImpl( self, instring, loc, doActions=True ): 
  888   
 889 -    def postParse( self, instring, loc, tokenlist ): 
  891   
 892       
 893 -    def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ): 
  894          debugging = ( self.debug )  
 895   
 896          if debugging or self.failAction: 
 897               
 898              if (self.debugActions[0] ): 
 899                  self.debugActions[0]( instring, loc, self ) 
 900              if callPreParse and self.callPreparse: 
 901                  preloc = self.preParse( instring, loc ) 
 902              else: 
 903                  preloc = loc 
 904              tokensStart = loc 
 905              try: 
 906                  try: 
 907                      loc,tokens = self.parseImpl( instring, preloc, doActions ) 
 908                  except IndexError: 
 909                      raise ParseException( instring, len(instring), self.errmsg, self ) 
 910              except ParseBaseException, err: 
 911                   
 912                  if self.debugActions[2]: 
 913                      self.debugActions[2]( instring, tokensStart, self, err ) 
 914                  if self.failAction: 
 915                      self.failAction( instring, tokensStart, self, err ) 
 916                  raise 
 917          else: 
 918              if callPreParse and self.callPreparse: 
 919                  preloc = self.preParse( instring, loc ) 
 920              else: 
 921                  preloc = loc 
 922              tokensStart = loc 
 923              if self.mayIndexError or loc >= len(instring): 
 924                  try: 
 925                      loc,tokens = self.parseImpl( instring, preloc, doActions ) 
 926                  except IndexError: 
 927                      raise ParseException( instring, len(instring), self.errmsg, self ) 
 928              else: 
 929                  loc,tokens = self.parseImpl( instring, preloc, doActions ) 
 930   
 931          tokens = self.postParse( instring, loc, tokens ) 
 932   
 933          retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults ) 
 934          if self.parseAction and (doActions or self.callDuringTry): 
 935              if debugging: 
 936                  try: 
 937                      for fn in self.parseAction: 
 938                          tokens = fn( instring, tokensStart, retTokens ) 
 939                          if tokens is not None: 
 940                              retTokens = ParseResults( tokens, 
 941                                                        self.resultsName, 
 942                                                        asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 
 943                                                        modal=self.modalResults ) 
 944                  except ParseBaseException, err: 
 945                       
 946                      if (self.debugActions[2] ): 
 947                          self.debugActions[2]( instring, tokensStart, self, err ) 
 948                      raise 
 949              else: 
 950                  for fn in self.parseAction: 
 951                      tokens = fn( instring, tokensStart, retTokens ) 
 952                      if tokens is not None: 
 953                          retTokens = ParseResults( tokens, 
 954                                                    self.resultsName, 
 955                                                    asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 
 956                                                    modal=self.modalResults ) 
 957   
 958          if debugging: 
 959               
 960              if (self.debugActions[1] ): 
 961                  self.debugActions[1]( instring, tokensStart, loc, self, retTokens ) 
 962   
 963          return loc, retTokens 
  964   
 970   
 971       
 972       
 973 -    def _parseCache( self, instring, loc, doActions=True, callPreParse=True ): 
  974          lookup = (self,instring,loc,callPreParse,doActions) 
 975          if lookup in ParserElement._exprArgCache: 
 976              value = ParserElement._exprArgCache[ lookup ] 
 977              if isinstance(value,Exception): 
 978                  raise value 
 979              return value 
 980          else: 
 981              try: 
 982                  value = self._parseNoCache( instring, loc, doActions, callPreParse ) 
 983                  ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy()) 
 984                  return value 
 985              except ParseBaseException, pe: 
 986                  ParserElement._exprArgCache[ lookup ] = pe 
 987                  raise 
  988   
 989      _parse = _parseNoCache 
 990   
 991       
 992      _exprArgCache = {} 
 995      resetCache = staticmethod(resetCache) 
 996   
 997      _packratEnabled = False 
 999          """Enables "packrat" parsing, which adds memoizing to the parsing logic. 
1000             Repeated parse attempts at the same string location (which happens 
1001             often in many complex grammars) can immediately return a cached value, 
1002             instead of re-executing parsing/validating code.  Memoizing is done of 
1003             both valid results and parsing exceptions. 
1004   
1005             This speedup may break existing programs that use parse actions that 
1006             have side-effects.  For this reason, packrat parsing is disabled when 
1007             you first import pyparsing.  To activate the packrat feature, your 
1008             program must call the class method ParserElement.enablePackrat().  If 
1009             your program uses psyco to "compile as you go", you must call 
1010             enablePackrat before calling psyco.full().  If you do not do this, 
1011             Python will crash.  For best results, call enablePackrat() immediately 
1012             after importing pyparsing. 
1013          """ 
1014          if not ParserElement._packratEnabled: 
1015              ParserElement._packratEnabled = True 
1016              ParserElement._parse = ParserElement._parseCache 
 1017      enablePackrat = staticmethod(enablePackrat) 
1018   
1020          """Execute the parse expression with the given string. 
1021             This is the main interface to the client code, once the complete 
1022             expression has been built. 
1023   
1024             If you want the grammar to require that the entire input string be 
1025             successfully parsed, then set parseAll to True (equivalent to ending 
1026             the grammar with StringEnd()). 
1027   
1028             Note: parseString implicitly calls expandtabs() on the input string, 
1029             in order to report proper column numbers in parse actions. 
1030             If the input string contains tabs and 
1031             the grammar uses parse actions that use the loc argument to index into the 
1032             string being parsed, you can ensure you have a consistent view of the input 
1033             string by: 
1034              - calling parseWithTabs on your grammar before calling parseString 
1035                (see L{I{parseWithTabs}<parseWithTabs>}) 
1036              - define your parse action using the full (s,loc,toks) signature, and 
1037                reference the input string using the parse action's s argument 
1038              - explictly expand the tabs in your input string before calling 
1039                parseString 
1040          """ 
1041          ParserElement.resetCache() 
1042          if not self.streamlined: 
1043              self.streamline() 
1044               
1045          for e in self.ignoreExprs: 
1046              e.streamline() 
1047          if not self.keepTabs: 
1048              instring = instring.expandtabs() 
1049          loc, tokens = self._parse( instring, 0 ) 
1050          if parseAll: 
1051              StringEnd()._parse( instring, loc ) 
1052          return tokens 
 1053   
1055          """Scan the input string for expression matches.  Each match will return the 
1056             matching tokens, start location, and end location.  May be called with optional 
1057             maxMatches argument, to clip scanning after 'n' matches are found. 
1058   
1059             Note that the start and end locations are reported relative to the string 
1060             being parsed.  See L{I{parseString}<parseString>} for more information on parsing 
1061             strings with embedded tabs.""" 
1062          if not self.streamlined: 
1063              self.streamline() 
1064          for e in self.ignoreExprs: 
1065              e.streamline() 
1066   
1067          if not self.keepTabs: 
1068              instring = _ustr(instring).expandtabs() 
1069          instrlen = len(instring) 
1070          loc = 0 
1071          preparseFn = self.preParse 
1072          parseFn = self._parse 
1073          ParserElement.resetCache() 
1074          matches = 0 
1075          while loc <= instrlen and matches < maxMatches: 
1076              try: 
1077                  preloc = preparseFn( instring, loc ) 
1078                  nextLoc,tokens = parseFn( instring, preloc, callPreParse=False ) 
1079              except ParseException: 
1080                  loc = preloc+1 
1081              else: 
1082                  matches += 1 
1083                  yield tokens, preloc, nextLoc 
1084                  loc = nextLoc 
 1085   
1110   
1112          """Another extension to scanString, simplifying the access to the tokens found 
1113             to match the given parse expression.  May be called with optional 
1114             maxMatches argument, to clip searching after 'n' matches are found. 
1115          """ 
1116          return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ]) 
 1117   
1119          """Implementation of + operator - returns And""" 
1120          if isinstance( other, basestring ): 
1121              other = Literal( other ) 
1122          if not isinstance( other, ParserElement ): 
1123              warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 
1124                      SyntaxWarning, stacklevel=2) 
1125              return None 
1126          return And( [ self, other ] ) 
 1127   
1129          """Implementation of + operator when left operand is not a ParserElement""" 
1130          if isinstance( other, basestring ): 
1131              other = Literal( other ) 
1132          if not isinstance( other, ParserElement ): 
1133              warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 
1134                      SyntaxWarning, stacklevel=2) 
1135              return None 
1136          return other + self 
 1137   
1139          """Implementation of - operator, returns And with error stop""" 
1140          if isinstance( other, basestring ): 
1141              other = Literal( other ) 
1142          if not isinstance( other, ParserElement ): 
1143              warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 
1144                      SyntaxWarning, stacklevel=2) 
1145              return None 
1146          return And( [ self, And._ErrorStop(), other ] ) 
 1147   
1149          """Implementation of - operator when left operand is not a ParserElement""" 
1150          if isinstance( other, basestring ): 
1151              other = Literal( other ) 
1152          if not isinstance( other, ParserElement ): 
1153              warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 
1154                      SyntaxWarning, stacklevel=2) 
1155              return None 
1156          return other - self 
 1157   
1159          if isinstance(other,int): 
1160              minElements, optElements = other,0 
1161          elif isinstance(other,tuple): 
1162              if len(other)==0: 
1163                  other = (None,None) 
1164              elif len(other)==1: 
1165                  other = (other[0],None) 
1166              if len(other)==2: 
1167                  if other[0] is None: 
1168                      other = (0, other[1]) 
1169                  if isinstance(other[0],int) and other[1] is None: 
1170                      if other[0] == 0: 
1171                          return ZeroOrMore(self) 
1172                      if other[0] == 1: 
1173                          return OneOrMore(self) 
1174                      else: 
1175                          return self*other[0] + ZeroOrMore(self) 
1176                  elif isinstance(other[0],int) and isinstance(other[1],int): 
1177                      minElements, optElements = other 
1178                      optElements -= minElements 
1179                  else: 
1180                      raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1])) 
1181              else: 
1182                  raise TypeError("can only multiply 'ParserElement' and int or (int,int) objects") 
1183          else: 
1184              raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other)) 
1185   
1186          if minElements < 0: 
1187              raise ValueError("cannot multiply ParserElement by negative value") 
1188          if optElements < 0: 
1189              raise ValueError("second tuple value must be greater or equal to first tuple value") 
1190          if minElements == optElements == 0: 
1191              raise ValueError("cannot multiply ParserElement by 0 or (0,0)") 
1192   
1193          if (optElements): 
1194              def makeOptionalList(n): 
1195                  if n>1: 
1196                      return Optional(self + makeOptionalList(n-1)) 
1197                  else: 
1198                      return Optional(self) 
 1199              if minElements: 
1200                  if minElements == 1: 
1201                      ret = self + makeOptionalList(optElements) 
1202                  else: 
1203                      ret = And([self]*minElements) + makeOptionalList(optElements) 
1204              else: 
1205                  ret = makeOptionalList(optElements) 
1206          else: 
1207              if minElements == 1: 
1208                  ret = self 
1209              else: 
1210                  ret = And([self]*minElements) 
1211          return ret 
1212   
1215   
1217          """Implementation of | operator - returns MatchFirst""" 
1218          if isinstance( other, basestring ): 
1219              other = Literal( other ) 
1220          if not isinstance( other, ParserElement ): 
1221              warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 
1222                      SyntaxWarning, stacklevel=2) 
1223              return None 
1224          return MatchFirst( [ self, other ] ) 
 1225   
1227          """Implementation of | operator when left operand is not a ParserElement""" 
1228          if isinstance( other, basestring ): 
1229              other = Literal( other ) 
1230          if not isinstance( other, ParserElement ): 
1231              warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 
1232                      SyntaxWarning, stacklevel=2) 
1233              return None 
1234          return other | self 
 1235   
1237          """Implementation of ^ operator - returns Or""" 
1238          if isinstance( other, basestring ): 
1239              other = Literal( other ) 
1240          if not isinstance( other, ParserElement ): 
1241              warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 
1242                      SyntaxWarning, stacklevel=2) 
1243              return None 
1244          return Or( [ self, other ] ) 
 1245   
1247          """Implementation of ^ operator when left operand is not a ParserElement""" 
1248          if isinstance( other, basestring ): 
1249              other = Literal( other ) 
1250          if not isinstance( other, ParserElement ): 
1251              warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 
1252                      SyntaxWarning, stacklevel=2) 
1253              return None 
1254          return other ^ self 
 1255   
1257          """Implementation of & operator - returns Each""" 
1258          if isinstance( other, basestring ): 
1259              other = Literal( other ) 
1260          if not isinstance( other, ParserElement ): 
1261              warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 
1262                      SyntaxWarning, stacklevel=2) 
1263              return None 
1264          return Each( [ self, other ] ) 
 1265   
1267          """Implementation of & operator when left operand is not a ParserElement""" 
1268          if isinstance( other, basestring ): 
1269              other = Literal( other ) 
1270          if not isinstance( other, ParserElement ): 
1271              warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 
1272                      SyntaxWarning, stacklevel=2) 
1273              return None 
1274          return other & self 
 1275   
1277          """Implementation of ~ operator - returns NotAny""" 
1278          return NotAny( self ) 
 1279   
1281          """Shortcut for setResultsName, with listAllMatches=default:: 
1282               userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno") 
1283             could be written as:: 
1284               userdata = Word(alphas)("name") + Word(nums+"-")("socsecno") 
1285             """ 
1286          return self.setResultsName(name) 
 1287   
1289          """Suppresses the output of this ParserElement; useful to keep punctuation from 
1290             cluttering up returned output. 
1291          """ 
1292          return Suppress( self ) 
 1293   
1295          """Disables the skipping of whitespace before matching the characters in the 
1296             ParserElement's defined pattern.  This is normally only used internally by 
1297             the pyparsing module, but may be needed in some whitespace-sensitive grammars. 
1298          """ 
1299          self.skipWhitespace = False 
1300          return self 
 1301   
1303          """Overrides the default whitespace chars 
1304          """ 
1305          self.skipWhitespace = True 
1306          self.whiteChars = chars 
1307          self.copyDefaultWhiteChars = False 
1308          return self 
 1309   
1311          """Overrides default behavior to expand <TAB>s to spaces before parsing the input string. 
1312             Must be called before parseString when the input grammar contains elements that 
1313             match <TAB> characters.""" 
1314          self.keepTabs = True 
1315          return self 
 1316   
1318          """Define expression to be ignored (e.g., comments) while doing pattern 
1319             matching; may be called repeatedly, to define multiple comment or other 
1320             ignorable patterns. 
1321          """ 
1322          if isinstance( other, Suppress ): 
1323              if other not in self.ignoreExprs: 
1324                  self.ignoreExprs.append( other ) 
1325          else: 
1326              self.ignoreExprs.append( Suppress( other ) ) 
1327          return self 
 1328   
1329 -    def setDebugActions( self, startAction, successAction, exceptionAction ): 
 1330          """Enable display of debugging messages while doing pattern matching.""" 
1331          self.debugActions = (startAction or _defaultStartDebugAction, 
1332                               successAction or _defaultSuccessDebugAction, 
1333                               exceptionAction or _defaultExceptionDebugAction) 
1334          self.debug = True 
1335          return self 
 1336   
1338          """Enable display of debugging messages while doing pattern matching. 
1339             Set flag to True to enable, False to disable.""" 
1340          if flag: 
1341              self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction ) 
1342          else: 
1343              self.debug = False 
1344          return self 
 1345   
1348   
1351   
1353          self.streamlined = True 
1354          self.strRepr = None 
1355          return self 
 1356   
1359   
1360 -    def validate( self, validateTrace=[] ): 
 1361          """Check defined expressions for valid structure, check for infinite recursive definitions.""" 
1362          self.checkRecursion( [] ) 
 1363   
1365          """Execute the parse expression on the given file or filename. 
1366             If a filename is specified (instead of a file object), 
1367             the entire file is opened, read, and closed before parsing. 
1368          """ 
1369          try: 
1370              file_contents = file_or_filename.read() 
1371          except AttributeError: 
1372              f = open(file_or_filename, "rb") 
1373              file_contents = f.read() 
1374              f.close() 
1375          return self.parseString(file_contents) 
 1376   
1379   
1381          if aname == "myException": 
1382              self.myException = ret = self.getException(); 
1383              return ret; 
1384          else: 
1385              raise AttributeError("no such attribute " + aname) 
 1386   
1396   
1398          return hash(id(self)) 
 1399   
1401          return self == other 
 1402   
1403   
1404 -class Token(ParserElement): 
 1405      """Abstract ParserElement subclass, for defining atomic matching patterns.""" 
1408           
1409   
1411          s = super(Token,self).setName(name) 
1412          self.errmsg = "Expected " + self.name 
1413           
1414          return s 
  1415   
1416   
1418      """An empty token, will always match.""" 
1420          super(Empty,self).__init__() 
1421          self.name = "Empty" 
1422          self.mayReturnEmpty = True 
1423          self.mayIndexError = False 
  1424   
1425   
1427      """A token that will never match.""" 
1429          super(NoMatch,self).__init__() 
1430          self.name = "NoMatch" 
1431          self.mayReturnEmpty = True 
1432          self.mayIndexError = False 
1433          self.errmsg = "Unmatchable token" 
 1434           
1435   
1436 -    def parseImpl( self, instring, loc, doActions=True ): 
 1437          exc = self.myException 
1438          exc.loc = loc 
1439          exc.pstr = instring 
1440          raise exc 
  1441   
1442   
1444      """Token to exactly match a specified string.""" 
1446          super(Literal,self).__init__() 
1447          self.match = matchString 
1448          self.matchLen = len(matchString) 
1449          try: 
1450              self.firstMatchChar = matchString[0] 
1451          except IndexError: 
1452              warnings.warn("null string passed to Literal; use Empty() instead", 
1453                              SyntaxWarning, stacklevel=2) 
1454              self.__class__ = Empty 
1455          self.name = '"%s"' % _ustr(self.match) 
1456          self.errmsg = "Expected " + self.name 
1457          self.mayReturnEmpty = False 
1458           
1459          self.mayIndexError = False 
 1460   
1461       
1462       
1463       
1464       
1465 -    def parseImpl( self, instring, loc, doActions=True ): 
 1466          if (instring[loc] == self.firstMatchChar and 
1467              (self.matchLen==1 or instring.startswith(self.match,loc)) ): 
1468              return loc+self.matchLen, self.match 
1469           
1470          exc = self.myException 
1471          exc.loc = loc 
1472          exc.pstr = instring 
1473          raise exc 
  1474  _L = Literal 
1475   
1477      """Token to exactly match a specified string as a keyword, that is, it must be 
1478         immediately followed by a non-keyword character.  Compare with Literal:: 
1479           Literal("if") will match the leading 'if' in 'ifAndOnlyIf'. 
1480           Keyword("if") will not; it will only match the leading 'if in 'if x=1', or 'if(y==2)' 
1481         Accepts two optional constructor arguments in addition to the keyword string: 
1482         identChars is a string of characters that would be valid identifier characters, 
1483         defaulting to all alphanumerics + "_" and "$"; caseless allows case-insensitive 
1484         matching, default is False. 
1485      """ 
1486      DEFAULT_KEYWORD_CHARS = alphanums+"_$" 
1487   
1489          super(Keyword,self).__init__() 
1490          self.match = matchString 
1491          self.matchLen = len(matchString) 
1492          try: 
1493              self.firstMatchChar = matchString[0] 
1494          except IndexError: 
1495              warnings.warn("null string passed to Keyword; use Empty() instead", 
1496                              SyntaxWarning, stacklevel=2) 
1497          self.name = '"%s"' % self.match 
1498          self.errmsg = "Expected " + self.name 
1499          self.mayReturnEmpty = False 
1500           
1501          self.mayIndexError = False 
1502          self.caseless = caseless 
1503          if caseless: 
1504              self.caselessmatch = matchString.upper() 
1505              identChars = identChars.upper() 
1506          self.identChars = _str2dict(identChars) 
 1507   
1508 -    def parseImpl( self, instring, loc, doActions=True ): 
 1509          if self.caseless: 
1510              if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 
1511                   (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and 
1512                   (loc == 0 or instring[loc-1].upper() not in self.identChars) ): 
1513                  return loc+self.matchLen, self.match 
1514          else: 
1515              if (instring[loc] == self.firstMatchChar and 
1516                  (self.matchLen==1 or instring.startswith(self.match,loc)) and 
1517                  (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and 
1518                  (loc == 0 or instring[loc-1] not in self.identChars) ): 
1519                  return loc+self.matchLen, self.match 
1520           
1521          exc = self.myException 
1522          exc.loc = loc 
1523          exc.pstr = instring 
1524          raise exc 
 1525   
1530   
1535      setDefaultKeywordChars = staticmethod(setDefaultKeywordChars) 
 1536   
1537   
1539      """Token to match a specified string, ignoring case of letters. 
1540         Note: the matched results will always be in the case of the given 
1541         match string, NOT the case of the input text. 
1542      """ 
1544          super(CaselessLiteral,self).__init__( matchString.upper() ) 
1545           
1546          self.returnString = matchString 
1547          self.name = "'%s'" % self.returnString 
1548          self.errmsg = "Expected " + self.name 
 1549           
1550   
1551 -    def parseImpl( self, instring, loc, doActions=True ): 
 1552          if instring[ loc:loc+self.matchLen ].upper() == self.match: 
1553              return loc+self.matchLen, self.returnString 
1554           
1555          exc = self.myException 
1556          exc.loc = loc 
1557          exc.pstr = instring 
1558          raise exc 
  1559   
1563   
1564 -    def parseImpl( self, instring, loc, doActions=True ): 
 1565          if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 
1566               (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ): 
1567              return loc+self.matchLen, self.match 
1568           
1569          exc = self.myException 
1570          exc.loc = loc 
1571          exc.pstr = instring 
1572          raise exc 
  1573   
1575      """Token for matching words composed of allowed character sets. 
1576         Defined with string containing all allowed initial characters, 
1577         an optional string containing allowed body characters (if omitted, 
1578         defaults to the initial character set), and an optional minimum, 
1579         maximum, and/or exact length.  The default value for min is 1 (a 
1580         minimum value < 1 is not valid); the default values for max and exact 
1581         are 0, meaning no maximum or exact length restriction. 
1582      """ 
1583 -    def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False ): 
 1584          super(Word,self).__init__() 
1585          self.initCharsOrig = initChars 
1586          self.initChars = _str2dict(initChars) 
1587          if bodyChars : 
1588              self.bodyCharsOrig = bodyChars 
1589              self.bodyChars = _str2dict(bodyChars) 
1590          else: 
1591              self.bodyCharsOrig = initChars 
1592              self.bodyChars = _str2dict(initChars) 
1593   
1594          self.maxSpecified = max > 0 
1595   
1596          if min < 1: 
1597              raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted") 
1598   
1599          self.minLen = min 
1600   
1601          if max > 0: 
1602              self.maxLen = max 
1603          else: 
1604              self.maxLen = _MAX_INT 
1605   
1606          if exact > 0: 
1607              self.maxLen = exact 
1608              self.minLen = exact 
1609   
1610          self.name = _ustr(self) 
1611          self.errmsg = "Expected " + self.name 
1612           
1613          self.mayIndexError = False 
1614          self.asKeyword = asKeyword 
1615   
1616          if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0): 
1617              if self.bodyCharsOrig == self.initCharsOrig: 
1618                  self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig) 
1619              elif len(self.bodyCharsOrig) == 1: 
1620                  self.reString = "%s[%s]*" % \ 
1621                                        (re.escape(self.initCharsOrig), 
1622                                        _escapeRegexRangeChars(self.bodyCharsOrig),) 
1623              else: 
1624                  self.reString = "[%s][%s]*" % \ 
1625                                        (_escapeRegexRangeChars(self.initCharsOrig), 
1626                                        _escapeRegexRangeChars(self.bodyCharsOrig),) 
1627              if self.asKeyword: 
1628                  self.reString = r"\b"+self.reString+r"\b" 
1629              try: 
1630                  self.re = re.compile( self.reString ) 
1631              except: 
1632                  self.re = None 
 1633   
1634 -    def parseImpl( self, instring, loc, doActions=True ): 
 1635          if self.re: 
1636              result = self.re.match(instring,loc) 
1637              if not result: 
1638                  exc = self.myException 
1639                  exc.loc = loc 
1640                  exc.pstr = instring 
1641                  raise exc 
1642   
1643              loc = result.end() 
1644              return loc,result.group() 
1645   
1646          if not(instring[ loc ] in self.initChars): 
1647               
1648              exc = self.myException 
1649              exc.loc = loc 
1650              exc.pstr = instring 
1651              raise exc 
1652          start = loc 
1653          loc += 1 
1654          instrlen = len(instring) 
1655          bodychars = self.bodyChars 
1656          maxloc = start + self.maxLen 
1657          maxloc = min( maxloc, instrlen ) 
1658          while loc < maxloc and instring[loc] in bodychars: 
1659              loc += 1 
1660   
1661          throwException = False 
1662          if loc - start < self.minLen: 
1663              throwException = True 
1664          if self.maxSpecified and loc < instrlen and instring[loc] in bodychars: 
1665              throwException = True 
1666          if self.asKeyword: 
1667              if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars): 
1668                  throwException = True 
1669   
1670          if throwException: 
1671               
1672              exc = self.myException 
1673              exc.loc = loc 
1674              exc.pstr = instring 
1675              raise exc 
1676   
1677          return loc, instring[start:loc] 
 1678   
1680          try: 
1681              return super(Word,self).__str__() 
1682          except: 
1683              pass 
1684   
1685   
1686          if self.strRepr is None: 
1687   
1688              def charsAsStr(s): 
1689                  if len(s)>4: 
1690                      return s[:4]+"..." 
1691                  else: 
1692                      return s 
 1693   
1694              if ( self.initCharsOrig != self.bodyCharsOrig ): 
1695                  self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) ) 
1696              else: 
1697                  self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig) 
1698   
1699          return self.strRepr 
 1700   
1701   
1703      """Token for matching strings that match a given regular expression. 
1704         Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module. 
1705      """ 
1706 -    def __init__( self, pattern, flags=0): 
 1707          """The parameters pattern and flags are passed to the re.compile() function as-is. See the Python re module for an explanation of the acceptable patterns and flags.""" 
1708          super(Regex,self).__init__() 
1709   
1710          if len(pattern) == 0: 
1711              warnings.warn("null string passed to Regex; use Empty() instead", 
1712                      SyntaxWarning, stacklevel=2) 
1713   
1714          self.pattern = pattern 
1715          self.flags = flags 
1716   
1717          try: 
1718              self.re = re.compile(self.pattern, self.flags) 
1719              self.reString = self.pattern 
1720          except sre_constants.error: 
1721              warnings.warn("invalid pattern (%s) passed to Regex" % pattern, 
1722                  SyntaxWarning, stacklevel=2) 
1723              raise 
1724   
1725          self.name = _ustr(self) 
1726          self.errmsg = "Expected " + self.name 
1727           
1728          self.mayIndexError = False 
1729          self.mayReturnEmpty = True 
 1730   
1731 -    def parseImpl( self, instring, loc, doActions=True ): 
 1732          result = self.re.match(instring,loc) 
1733          if not result: 
1734              exc = self.myException 
1735              exc.loc = loc 
1736              exc.pstr = instring 
1737              raise exc 
1738   
1739          loc = result.end() 
1740          d = result.groupdict() 
1741          ret = ParseResults(result.group()) 
1742          if d: 
1743              for k in d: 
1744                  ret[k] = d[k] 
1745          return loc,ret 
 1746   
1748          try: 
1749              return super(Regex,self).__str__() 
1750          except: 
1751              pass 
1752   
1753          if self.strRepr is None: 
1754              self.strRepr = "Re:(%s)" % repr(self.pattern) 
1755   
1756          return self.strRepr 
  1757   
1758   
1760      """Token for matching strings that are delimited by quoting characters. 
1761      """ 
1762 -    def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None): 
 1763          """ 
1764             Defined with the following parameters: 
1765              - quoteChar - string of one or more characters defining the quote delimiting string 
1766              - escChar - character to escape quotes, typically backslash (default=None) 
1767              - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None) 
1768              - multiline - boolean indicating whether quotes can span multiple lines (default=False) 
1769              - unquoteResults - boolean indicating whether the matched text should be unquoted (default=True) 
1770              - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=None => same as quoteChar) 
1771          """ 
1772          super(QuotedString,self).__init__() 
1773   
1774           
1775          quoteChar = quoteChar.strip() 
1776          if len(quoteChar) == 0: 
1777              warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 
1778              raise SyntaxError() 
1779   
1780          if endQuoteChar is None: 
1781              endQuoteChar = quoteChar 
1782          else: 
1783              endQuoteChar = endQuoteChar.strip() 
1784              if len(endQuoteChar) == 0: 
1785                  warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 
1786                  raise SyntaxError() 
1787   
1788          self.quoteChar = quoteChar 
1789          self.quoteCharLen = len(quoteChar) 
1790          self.firstQuoteChar = quoteChar[0] 
1791          self.endQuoteChar = endQuoteChar 
1792          self.endQuoteCharLen = len(endQuoteChar) 
1793          self.escChar = escChar 
1794          self.escQuote = escQuote 
1795          self.unquoteResults = unquoteResults 
1796   
1797          if multiline: 
1798              self.flags = re.MULTILINE | re.DOTALL 
1799              self.pattern = r'%s(?:[^%s%s]' % \ 
1800                  ( re.escape(self.quoteChar), 
1801                    _escapeRegexRangeChars(self.endQuoteChar[0]), 
1802                    (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 
1803          else: 
1804              self.flags = 0 
1805              self.pattern = r'%s(?:[^%s\n\r%s]' % \ 
1806                  ( re.escape(self.quoteChar), 
1807                    _escapeRegexRangeChars(self.endQuoteChar[0]), 
1808                    (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 
1809          if len(self.endQuoteChar) > 1: 
1810              self.pattern += ( 
1811                  '|(?:' + ')|(?:'.join(["%s[^%s]" % (re.escape(self.endQuoteChar[:i]), 
1812                                                 _escapeRegexRangeChars(self.endQuoteChar[i])) 
1813                                      for i in range(len(self.endQuoteChar)-1,0,-1)]) + ')' 
1814                  ) 
1815          if escQuote: 
1816              self.pattern += (r'|(?:%s)' % re.escape(escQuote)) 
1817          if escChar: 
1818              self.pattern += (r'|(?:%s.)' % re.escape(escChar)) 
1819              self.escCharReplacePattern = re.escape(self.escChar)+"(.)" 
1820          self.pattern += (r')*%s' % re.escape(self.endQuoteChar)) 
1821   
1822          try: 
1823              self.re = re.compile(self.pattern, self.flags) 
1824              self.reString = self.pattern 
1825          except sre_constants.error: 
1826              warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern, 
1827                  SyntaxWarning, stacklevel=2) 
1828              raise 
1829   
1830          self.name = _ustr(self) 
1831          self.errmsg = "Expected " + self.name 
1832           
1833          self.mayIndexError = False 
1834          self.mayReturnEmpty = True 
 1835   
1836 -    def parseImpl( self, instring, loc, doActions=True ): 
 1837          result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None 
1838          if not result: 
1839              exc = self.myException 
1840              exc.loc = loc 
1841              exc.pstr = instring 
1842              raise exc 
1843   
1844          loc = result.end() 
1845          ret = result.group() 
1846   
1847          if self.unquoteResults: 
1848   
1849               
1850              ret = ret[self.quoteCharLen:-self.endQuoteCharLen] 
1851   
1852              if isinstance(ret,basestring): 
1853                   
1854                  if self.escChar: 
1855                      ret = re.sub(self.escCharReplacePattern,"\g<1>",ret) 
1856   
1857                   
1858                  if self.escQuote: 
1859                      ret = ret.replace(self.escQuote, self.endQuoteChar) 
1860   
1861          return loc, ret 
 1862   
1864          try: 
1865              return super(QuotedString,self).__str__() 
1866          except: 
1867              pass 
1868   
1869          if self.strRepr is None: 
1870              self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar) 
1871   
1872          return self.strRepr 
  1873   
1874   
1876      """Token for matching words composed of characters *not* in a given set. 
1877         Defined with string containing all disallowed characters, and an optional 
1878         minimum, maximum, and/or exact length.  The default value for min is 1 (a 
1879         minimum value < 1 is not valid); the default values for max and exact 
1880         are 0, meaning no maximum or exact length restriction. 
1881      """ 
1882 -    def __init__( self, notChars, min=1, max=0, exact=0 ): 
 1883          super(CharsNotIn,self).__init__() 
1884          self.skipWhitespace = False 
1885          self.notChars = notChars 
1886   
1887          if min < 1: 
1888              raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted") 
1889   
1890          self.minLen = min 
1891   
1892          if max > 0: 
1893              self.maxLen = max 
1894          else: 
1895              self.maxLen = _MAX_INT 
1896   
1897          if exact > 0: 
1898              self.maxLen = exact 
1899              self.minLen = exact 
1900   
1901          self.name = _ustr(self) 
1902          self.errmsg = "Expected " + self.name 
1903          self.mayReturnEmpty = ( self.minLen == 0 ) 
1904           
1905          self.mayIndexError = False 
 1906   
1907 -    def parseImpl( self, instring, loc, doActions=True ): 
 1908          if instring[loc] in self.notChars: 
1909               
1910              exc = self.myException 
1911              exc.loc = loc 
1912              exc.pstr = instring 
1913              raise exc 
1914   
1915          start = loc 
1916          loc += 1 
1917          notchars = self.notChars 
1918          maxlen = min( start+self.maxLen, len(instring) ) 
1919          while loc < maxlen and \ 
1920                (instring[loc] not in notchars): 
1921              loc += 1 
1922   
1923          if loc - start < self.minLen: 
1924               
1925              exc = self.myException 
1926              exc.loc = loc 
1927              exc.pstr = instring 
1928              raise exc 
1929   
1930          return loc, instring[start:loc] 
 1931   
1933          try: 
1934              return super(CharsNotIn, self).__str__() 
1935          except: 
1936              pass 
1937   
1938          if self.strRepr is None: 
1939              if len(self.notChars) > 4: 
1940                  self.strRepr = "!W:(%s...)" % self.notChars[:4] 
1941              else: 
1942                  self.strRepr = "!W:(%s)" % self.notChars 
1943   
1944          return self.strRepr 
  1945   
1947      """Special matching class for matching whitespace.  Normally, whitespace is ignored 
1948         by pyparsing grammars.  This class is included when some whitespace structures 
1949         are significant.  Define with a string containing the whitespace characters to be 
1950         matched; default is " \\t\\n".  Also takes optional min, max, and exact arguments, 
1951         as defined for the Word class.""" 
1952      whiteStrs = { 
1953          " " : "<SPC>", 
1954          "\t": "<TAB>", 
1955          "\n": "<LF>", 
1956          "\r": "<CR>", 
1957          "\f": "<FF>", 
1958          } 
1959 -    def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0): 
 1960          super(White,self).__init__() 
1961          self.matchWhite = ws 
1962          self.setWhitespaceChars( "".join([c for c in self.whiteChars if c not in self.matchWhite]) ) 
1963           
1964          self.name = ("".join([White.whiteStrs[c] for c in self.matchWhite])) 
1965          self.mayReturnEmpty = True 
1966          self.errmsg = "Expected " + self.name 
1967           
1968   
1969          self.minLen = min 
1970   
1971          if max > 0: 
1972              self.maxLen = max 
1973          else: 
1974              self.maxLen = _MAX_INT 
1975   
1976          if exact > 0: 
1977              self.maxLen = exact 
1978              self.minLen = exact 
 1979   
1980 -    def parseImpl( self, instring, loc, doActions=True ): 
 1981          if not(instring[ loc ] in self.matchWhite): 
1982               
1983              exc = self.myException 
1984              exc.loc = loc 
1985              exc.pstr = instring 
1986              raise exc 
1987          start = loc 
1988          loc += 1 
1989          maxloc = start + self.maxLen 
1990          maxloc = min( maxloc, len(instring) ) 
1991          while loc < maxloc and instring[loc] in self.matchWhite: 
1992              loc += 1 
1993   
1994          if loc - start < self.minLen: 
1995               
1996              exc = self.myException 
1997              exc.loc = loc 
1998              exc.pstr = instring 
1999              raise exc 
2000   
2001          return loc, instring[start:loc] 
  2002   
2003   
2006          super(_PositionToken,self).__init__() 
2007          self.name=self.__class__.__name__ 
2008          self.mayReturnEmpty = True 
2009          self.mayIndexError = False 
 2012      """Token to advance to a specific column of input text; useful for tabular report scraping.""" 
2016   
2018          if col(loc,instring) != self.col: 
2019              instrlen = len(instring) 
2020              if self.ignoreExprs: 
2021                  loc = self._skipIgnorables( instring, loc ) 
2022              while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col : 
2023                  loc += 1 
2024          return loc 
 2025   
2026 -    def parseImpl( self, instring, loc, doActions=True ): 
 2027          thiscol = col( loc, instring ) 
2028          if thiscol > self.col: 
2029              raise ParseException( instring, loc, "Text not in expected column", self ) 
2030          newloc = loc + self.col - thiscol 
2031          ret = instring[ loc: newloc ] 
2032          return newloc, ret 
  2033   
2035      """Matches if current position is at the beginning of a line within the parse string""" 
2040           
2041   
2047   
2048 -    def parseImpl( self, instring, loc, doActions=True ): 
 2049          if not( loc==0 or 
2050              (loc == self.preParse( instring, 0 )) or 
2051              (instring[loc-1] == "\n") ):  
2052               
2053              exc = self.myException 
2054              exc.loc = loc 
2055              exc.pstr = instring 
2056              raise exc 
2057          return loc, [] 
  2058   
2060      """Matches if current position is at the end of a line within the parse string""" 
2065           
2066   
2067 -    def parseImpl( self, instring, loc, doActions=True ): 
 2068          if loc<len(instring): 
2069              if instring[loc] == "\n": 
2070                  return loc+1, "\n" 
2071              else: 
2072                   
2073                  exc = self.myException 
2074                  exc.loc = loc 
2075                  exc.pstr = instring 
2076                  raise exc 
2077          elif loc == len(instring): 
2078              return loc+1, [] 
2079          else: 
2080              exc = self.myException 
2081              exc.loc = loc 
2082              exc.pstr = instring 
2083              raise exc 
  2084   
2086      """Matches if current position is at the beginning of the parse string""" 
2090           
2091   
2092 -    def parseImpl( self, instring, loc, doActions=True ): 
 2093          if loc != 0: 
2094               
2095              if loc != self.preParse( instring, 0 ): 
2096                   
2097                  exc = self.myException 
2098                  exc.loc = loc 
2099                  exc.pstr = instring 
2100                  raise exc 
2101          return loc, [] 
  2102   
2104      """Matches if current position is at the end of the parse string""" 
2108           
2109   
2110 -    def parseImpl( self, instring, loc, doActions=True ): 
 2111          if loc < len(instring): 
2112               
2113              exc = self.myException 
2114              exc.loc = loc 
2115              exc.pstr = instring 
2116              raise exc 
2117          elif loc == len(instring): 
2118              return loc+1, [] 
2119          elif loc > len(instring): 
2120              return loc, [] 
2121          else: 
2122              exc = self.myException 
2123              exc.loc = loc 
2124              exc.pstr = instring 
2125              raise exc 
  2126   
2128      """Matches if the current position is at the beginning of a Word, and 
2129         is not preceded by any character in a given set of wordChars 
2130         (default=printables). To emulate the \b behavior of regular expressions, 
2131         use WordStart(alphanums). WordStart will also match at the beginning of 
2132         the string being parsed, or at the beginning of a line. 
2133      """ 
2135          super(WordStart,self).__init__() 
2136          self.wordChars = _str2dict(wordChars) 
2137          self.errmsg = "Not at the start of a word" 
 2138   
2139 -    def parseImpl(self, instring, loc, doActions=True ): 
 2140          if loc != 0: 
2141              if (instring[loc-1] in self.wordChars or 
2142                  instring[loc] not in self.wordChars): 
2143                  exc = self.myException 
2144                  exc.loc = loc 
2145                  exc.pstr = instring 
2146                  raise exc 
2147          return loc, [] 
  2148   
2150      """Matches if the current position is at the end of a Word, and 
2151         is not followed by any character in a given set of wordChars 
2152         (default=printables). To emulate the \b behavior of regular expressions, 
2153         use WordEnd(alphanums). WordEnd will also match at the end of 
2154         the string being parsed, or at the end of a line. 
2155      """ 
2157          super(WordEnd,self).__init__() 
2158          self.wordChars = _str2dict(wordChars) 
2159          self.skipWhitespace = False 
2160          self.errmsg = "Not at the end of a word" 
 2161   
2162 -    def parseImpl(self, instring, loc, doActions=True ): 
 2163          instrlen = len(instring) 
2164          if instrlen>0 and loc<instrlen: 
2165              if (instring[loc] in self.wordChars or 
2166                  instring[loc-1] not in self.wordChars): 
2167                   
2168                  exc = self.myException 
2169                  exc.loc = loc 
2170                  exc.pstr = instring 
2171                  raise exc 
2172          return loc, [] 
  2173   
2174   
2176      """Abstract subclass of ParserElement, for combining and post-processing parsed tokens.""" 
2177 -    def __init__( self, exprs, savelist = False ): 
 2178          super(ParseExpression,self).__init__(savelist) 
2179          if isinstance( exprs, list ): 
2180              self.exprs = exprs 
2181          elif isinstance( exprs, basestring ): 
2182              self.exprs = [ Literal( exprs ) ] 
2183          else: 
2184              self.exprs = [ exprs ] 
2185          self.callPreparse = False 
 2186   
2188          return self.exprs[i] 
 2189   
2191          self.exprs.append( other ) 
2192          self.strRepr = None 
2193          return self 
 2194   
2196          """Extends leaveWhitespace defined in base class, and also invokes leaveWhitespace on 
2197             all contained expressions.""" 
2198          self.skipWhitespace = False 
2199          self.exprs = [ e.copy() for e in self.exprs ] 
2200          for e in self.exprs: 
2201              e.leaveWhitespace() 
2202          return self 
 2203   
2205          if isinstance( other, Suppress ): 
2206              if other not in self.ignoreExprs: 
2207                  super( ParseExpression, self).ignore( other ) 
2208                  for e in self.exprs: 
2209                      e.ignore( self.ignoreExprs[-1] ) 
2210          else: 
2211              super( ParseExpression, self).ignore( other ) 
2212              for e in self.exprs: 
2213                  e.ignore( self.ignoreExprs[-1] ) 
2214          return self 
 2215   
2217          try: 
2218              return super(ParseExpression,self).__str__() 
2219          except: 
2220              pass 
2221   
2222          if self.strRepr is None: 
2223              self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) ) 
2224          return self.strRepr 
 2225   
2227          super(ParseExpression,self).streamline() 
2228   
2229          for e in self.exprs: 
2230              e.streamline() 
2231   
2232           
2233           
2234           
2235          if ( len(self.exprs) == 2 ): 
2236              other = self.exprs[0] 
2237              if ( isinstance( other, self.__class__ ) and 
2238                    not(other.parseAction) and 
2239                    other.resultsName is None and 
2240                    not other.debug ): 
2241                  self.exprs = other.exprs[:] + [ self.exprs[1] ] 
2242                  self.strRepr = None 
2243                  self.mayReturnEmpty |= other.mayReturnEmpty 
2244                  self.mayIndexError  |= other.mayIndexError 
2245   
2246              other = self.exprs[-1] 
2247              if ( isinstance( other, self.__class__ ) and 
2248                    not(other.parseAction) and 
2249                    other.resultsName is None and 
2250                    not other.debug ): 
2251                  self.exprs = self.exprs[:-1] + other.exprs[:] 
2252                  self.strRepr = None 
2253                  self.mayReturnEmpty |= other.mayReturnEmpty 
2254                  self.mayIndexError  |= other.mayIndexError 
2255   
2256          return self 
 2257   
2261   
2262 -    def validate( self, validateTrace=[] ): 
 2263          tmp = validateTrace[:]+[self] 
2264          for e in self.exprs: 
2265              e.validate(tmp) 
2266          self.checkRecursion( [] ) 
  2267   
2268 -class And(ParseExpression): 
 2269      """Requires all given ParseExpressions to be found in the given order. 
2270         Expressions may be separated by whitespace. 
2271         May be constructed using the '+' operator. 
2272      """ 
2273   
2276              return And._ErrorStop.instance 
 2280 -    def __init__( self, exprs, savelist = True ): 
 2281          super(And,self).__init__(exprs, savelist) 
2282          self.mayReturnEmpty = True 
2283          for e in self.exprs: 
2284              if not e.mayReturnEmpty: 
2285                  self.mayReturnEmpty = False 
2286                  break 
2287          self.setWhitespaceChars( exprs[0].whiteChars ) 
2288          self.skipWhitespace = exprs[0].skipWhitespace 
2289          self.callPreparse = True 
 2290   
2291 -    def parseImpl( self, instring, loc, doActions=True ): 
 2292           
2293           
2294          loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False ) 
2295          errorStop = False 
2296          for e in self.exprs[1:]: 
2297              if e is And._ErrorStop.instance: 
2298                  errorStop = True 
2299                  continue 
2300              if errorStop: 
2301                  try: 
2302                      loc, exprtokens = e._parse( instring, loc, doActions ) 
2303                  except ParseBaseException, pe: 
2304                      raise ParseSyntaxException(pe) 
2305                  except IndexError, ie: 
2306                      raise ParseSyntaxException( ParseException(instring, len(instring), self.errmsg, self) ) 
2307              else: 
2308                  loc, exprtokens = e._parse( instring, loc, doActions ) 
2309              if exprtokens or exprtokens.keys(): 
2310                  resultlist += exprtokens 
2311          return loc, resultlist 
 2312   
2314          if isinstance( other, basestring ): 
2315              other = Literal( other ) 
2316          return self.append( other )  
 2317   
2319          subRecCheckList = parseElementList[:] + [ self ] 
2320          for e in self.exprs: 
2321              e.checkRecursion( subRecCheckList ) 
2322              if not e.mayReturnEmpty: 
2323                  break 
 2324   
2326          if hasattr(self,"name"): 
2327              return self.name 
2328   
2329          if self.strRepr is None: 
2330              self.strRepr = "{" + " ".join( [ _ustr(e) for e in self.exprs ] ) + "}" 
2331   
2332          return self.strRepr 
  2333   
2334   
2335 -class Or(ParseExpression): 
 2336      """Requires that at least one ParseExpression is found. 
2337         If two expressions match, the expression that matches the longest string will be used. 
2338         May be constructed using the '^' operator. 
2339      """ 
2340 -    def __init__( self, exprs, savelist = False ): 
 2341          super(Or,self).__init__(exprs, savelist) 
2342          self.mayReturnEmpty = False 
2343          for e in self.exprs: 
2344              if e.mayReturnEmpty: 
2345                  self.mayReturnEmpty = True 
2346                  break 
 2347   
2348 -    def parseImpl( self, instring, loc, doActions=True ): 
 2349          maxExcLoc = -1 
2350          maxMatchLoc = -1 
2351          maxException = None 
2352          for e in self.exprs: 
2353              try: 
2354                  loc2 = e.tryParse( instring, loc ) 
2355              except ParseException, err: 
2356                  if err.loc > maxExcLoc: 
2357                      maxException = err 
2358                      maxExcLoc = err.loc 
2359              except IndexError: 
2360                  if len(instring) > maxExcLoc: 
2361                      maxException = ParseException(instring,len(instring),e.errmsg,self) 
2362                      maxExcLoc = len(instring) 
2363              else: 
2364                  if loc2 > maxMatchLoc: 
2365                      maxMatchLoc = loc2 
2366                      maxMatchExp = e 
2367   
2368          if maxMatchLoc < 0: 
2369              if maxException is not None: 
2370                  raise maxException 
2371              else: 
2372                  raise ParseException(instring, loc, "no defined alternatives to match", self) 
2373   
2374          return maxMatchExp._parse( instring, loc, doActions ) 
 2375   
2377          if isinstance( other, basestring ): 
2378              other = Literal( other ) 
2379          return self.append( other )  
 2380   
2382          if hasattr(self,"name"): 
2383              return self.name 
2384   
2385          if self.strRepr is None: 
2386              self.strRepr = "{" + " ^ ".join( [ _ustr(e) for e in self.exprs ] ) + "}" 
2387   
2388          return self.strRepr 
 2389   
2391          subRecCheckList = parseElementList[:] + [ self ] 
2392          for e in self.exprs: 
2393              e.checkRecursion( subRecCheckList ) 
  2394   
2395   
2397      """Requires that at least one ParseExpression is found. 
2398         If two expressions match, the first one listed is the one that will match. 
2399         May be constructed using the '|' operator. 
2400      """ 
2401 -    def __init__( self, exprs, savelist = False ): 
 2402          super(MatchFirst,self).__init__(exprs, savelist) 
2403          if exprs: 
2404              self.mayReturnEmpty = False 
2405              for e in self.exprs: 
2406                  if e.mayReturnEmpty: 
2407                      self.mayReturnEmpty = True 
2408                      break 
2409          else: 
2410              self.mayReturnEmpty = True 
 2411   
2412 -    def parseImpl( self, instring, loc, doActions=True ): 
 2413          maxExcLoc = -1 
2414          maxException = None 
2415          for e in self.exprs: 
2416              try: 
2417                  ret = e._parse( instring, loc, doActions ) 
2418                  return ret 
2419              except ParseException, err: 
2420                  if err.loc > maxExcLoc: 
2421                      maxException = err 
2422                      maxExcLoc = err.loc 
2423              except IndexError: 
2424                  if len(instring) > maxExcLoc: 
2425                      maxException = ParseException(instring,len(instring),e.errmsg,self) 
2426                      maxExcLoc = len(instring) 
2427   
2428           
2429          else: 
2430              if maxException is not None: 
2431                  raise maxException 
2432              else: 
2433                  raise ParseException(instring, loc, "no defined alternatives to match", self) 
 2434   
2436          if isinstance( other, basestring ): 
2437              other = Literal( other ) 
2438          return self.append( other )  
 2439   
2441          if hasattr(self,"name"): 
2442              return self.name 
2443   
2444          if self.strRepr is None: 
2445              self.strRepr = "{" + " | ".join( [ _ustr(e) for e in self.exprs ] ) + "}" 
2446   
2447          return self.strRepr 
 2448   
2450          subRecCheckList = parseElementList[:] + [ self ] 
2451          for e in self.exprs: 
2452              e.checkRecursion( subRecCheckList ) 
  2453   
2454   
2455 -class Each(ParseExpression): 
 2456      """Requires all given ParseExpressions to be found, but in any order. 
2457         Expressions may be separated by whitespace. 
2458         May be constructed using the '&' operator. 
2459      """ 
2460 -    def __init__( self, exprs, savelist = True ): 
 2461          super(Each,self).__init__(exprs, savelist) 
2462          self.mayReturnEmpty = True 
2463          for e in self.exprs: 
2464              if not e.mayReturnEmpty: 
2465                  self.mayReturnEmpty = False 
2466                  break 
2467          self.skipWhitespace = True 
2468          self.initExprGroups = True 
 2469   
2470 -    def parseImpl( self, instring, loc, doActions=True ): 
 2471          if self.initExprGroups: 
2472              self.optionals = [ e.expr for e in self.exprs if isinstance(e,Optional) ] 
2473              self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ] 
2474              self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ] 
2475              self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ] 
2476              self.required += self.multirequired 
2477              self.initExprGroups = False 
2478          tmpLoc = loc 
2479          tmpReqd = self.required[:] 
2480          tmpOpt  = self.optionals[:] 
2481          matchOrder = [] 
2482   
2483          keepMatching = True 
2484          while keepMatching: 
2485              tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired 
2486              failed = [] 
2487              for e in tmpExprs: 
2488                  try: 
2489                      tmpLoc = e.tryParse( instring, tmpLoc ) 
2490                  except ParseException: 
2491                      failed.append(e) 
2492                  else: 
2493                      matchOrder.append(e) 
2494                      if e in tmpReqd: 
2495                          tmpReqd.remove(e) 
2496                      elif e in tmpOpt: 
2497                          tmpOpt.remove(e) 
2498              if len(failed) == len(tmpExprs): 
2499                  keepMatching = False 
2500   
2501          if tmpReqd: 
2502              missing = ", ".join( [ _ustr(e) for e in tmpReqd ] ) 
2503              raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing ) 
2504   
2505           
2506          matchOrder += list(e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt) 
2507   
2508          resultlist = [] 
2509          for e in matchOrder: 
2510              loc,results = e._parse(instring,loc,doActions) 
2511              resultlist.append(results) 
2512   
2513          finalResults = ParseResults([]) 
2514          for r in resultlist: 
2515              dups = {} 
2516              for k in r.keys(): 
2517                  if k in finalResults.keys(): 
2518                      tmp = ParseResults(finalResults[k]) 
2519                      tmp += ParseResults(r[k]) 
2520                      dups[k] = tmp 
2521              finalResults += ParseResults(r) 
2522              for k,v in dups.items(): 
2523                  finalResults[k] = v 
2524          return loc, finalResults 
 2525   
2527          if hasattr(self,"name"): 
2528              return self.name 
2529   
2530          if self.strRepr is None: 
2531              self.strRepr = "{" + " & ".join( [ _ustr(e) for e in self.exprs ] ) + "}" 
2532   
2533          return self.strRepr 
 2534   
2536          subRecCheckList = parseElementList[:] + [ self ] 
2537          for e in self.exprs: 
2538              e.checkRecursion( subRecCheckList ) 
  2539   
2540   
2542      """Abstract subclass of ParserElement, for combining and post-processing parsed tokens.""" 
2543 -    def __init__( self, expr, savelist=False ): 
 2544          super(ParseElementEnhance,self).__init__(savelist) 
2545          if isinstance( expr, basestring ): 
2546              expr = Literal(expr) 
2547          self.expr = expr 
2548          self.strRepr = None 
2549          if expr is not None: 
2550              self.mayIndexError = expr.mayIndexError 
2551              self.mayReturnEmpty = expr.mayReturnEmpty 
2552              self.setWhitespaceChars( expr.whiteChars ) 
2553              self.skipWhitespace = expr.skipWhitespace 
2554              self.saveAsList = expr.saveAsList 
2555              self.callPreparse = expr.callPreparse 
2556              self.ignoreExprs.extend(expr.ignoreExprs) 
 2557   
2558 -    def parseImpl( self, instring, loc, doActions=True ): 
 2559          if self.expr is not None: 
2560              return self.expr._parse( instring, loc, doActions, callPreParse=False ) 
2561          else: 
2562              raise ParseException("",loc,self.errmsg,self) 
 2563   
2565          self.skipWhitespace = False 
2566          self.expr = self.expr.copy() 
2567          if self.expr is not None: 
2568              self.expr.leaveWhitespace() 
2569          return self 
 2570   
2572          if isinstance( other, Suppress ): 
2573              if other not in self.ignoreExprs: 
2574                  super( ParseElementEnhance, self).ignore( other ) 
2575                  if self.expr is not None: 
2576                      self.expr.ignore( self.ignoreExprs[-1] ) 
2577          else: 
2578              super( ParseElementEnhance, self).ignore( other ) 
2579              if self.expr is not None: 
2580                  self.expr.ignore( self.ignoreExprs[-1] ) 
2581          return self 
 2582   
2588   
2590          if self in parseElementList: 
2591              raise RecursiveGrammarException( parseElementList+[self] ) 
2592          subRecCheckList = parseElementList[:] + [ self ] 
2593          if self.expr is not None: 
2594              self.expr.checkRecursion( subRecCheckList ) 
 2595   
2596 -    def validate( self, validateTrace=[] ): 
 2597          tmp = validateTrace[:]+[self] 
2598          if self.expr is not None: 
2599              self.expr.validate(tmp) 
2600          self.checkRecursion( [] ) 
 2601   
2603          try: 
2604              return super(ParseElementEnhance,self).__str__() 
2605          except: 
2606              pass 
2607   
2608          if self.strRepr is None and self.expr is not None: 
2609              self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) ) 
2610          return self.strRepr 
  2611   
2612   
2614      """Lookahead matching of the given parse expression.  FollowedBy 
2615      does *not* advance the parsing position within the input string, it only 
2616      verifies that the specified parse expression matches at the current 
2617      position.  FollowedBy always returns a null token list.""" 
2621   
2622 -    def parseImpl( self, instring, loc, doActions=True ): 
  2625   
2626   
2627 -class NotAny(ParseElementEnhance): 
 2628      """Lookahead to disallow matching with the given parse expression.  NotAny 
2629      does *not* advance the parsing position within the input string, it only 
2630      verifies that the specified parse expression does *not* match at the current 
2631      position.  Also, NotAny does *not* skip over leading whitespace. NotAny 
2632      always returns a null token list.  May be constructed using the '~' operator.""" 
2634          super(NotAny,self).__init__(expr) 
2635           
2636          self.skipWhitespace = False   
2637          self.mayReturnEmpty = True 
2638          self.errmsg = "Found unwanted token, "+_ustr(self.expr) 
 2639           
2640   
2641 -    def parseImpl( self, instring, loc, doActions=True ): 
 2642          try: 
2643              self.expr.tryParse( instring, loc ) 
2644          except (ParseException,IndexError): 
2645              pass 
2646          else: 
2647               
2648              exc = self.myException 
2649              exc.loc = loc 
2650              exc.pstr = instring 
2651              raise exc 
2652          return loc, [] 
 2653   
2655          if hasattr(self,"name"): 
2656              return self.name 
2657   
2658          if self.strRepr is None: 
2659              self.strRepr = "~{" + _ustr(self.expr) + "}" 
2660   
2661          return self.strRepr 
  2662   
2663   
2665      """Optional repetition of zero or more of the given expression.""" 
2669   
2670 -    def parseImpl( self, instring, loc, doActions=True ): 
 2671          tokens = [] 
2672          try: 
2673              loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 
2674              hasIgnoreExprs = ( len(self.ignoreExprs) > 0 ) 
2675              while 1: 
2676                  if hasIgnoreExprs: 
2677                      preloc = self._skipIgnorables( instring, loc ) 
2678                  else: 
2679                      preloc = loc 
2680                  loc, tmptokens = self.expr._parse( instring, preloc, doActions ) 
2681                  if tmptokens or tmptokens.keys(): 
2682                      tokens += tmptokens 
2683          except (ParseException,IndexError): 
2684              pass 
2685   
2686          return loc, tokens 
 2687   
2689          if hasattr(self,"name"): 
2690              return self.name 
2691   
2692          if self.strRepr is None: 
2693              self.strRepr = "[" + _ustr(self.expr) + "]..." 
2694   
2695          return self.strRepr 
 2696   
 2701   
2702   
2704      """Repetition of one or more of the given expression.""" 
2705 -    def parseImpl( self, instring, loc, doActions=True ): 
 2706           
2707          loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 
2708          try: 
2709              hasIgnoreExprs = ( len(self.ignoreExprs) > 0 ) 
2710              while 1: 
2711                  if hasIgnoreExprs: 
2712                      preloc = self._skipIgnorables( instring, loc ) 
2713                  else: 
2714                      preloc = loc 
2715                  loc, tmptokens = self.expr._parse( instring, preloc, doActions ) 
2716                  if tmptokens or tmptokens.keys(): 
2717                      tokens += tmptokens 
2718          except (ParseException,IndexError): 
2719              pass 
2720   
2721          return loc, tokens 
 2722   
2724          if hasattr(self,"name"): 
2725              return self.name 
2726   
2727          if self.strRepr is None: 
2728              self.strRepr = "{" + _ustr(self.expr) + "}..." 
2729   
2730          return self.strRepr 
 2731   
 2736   
2743   
2744  _optionalNotMatched = _NullToken() 
2746      """Optional matching of the given expression. 
2747         A default return string can also be specified, if the optional expression 
2748         is not found. 
2749      """ 
2751          super(Optional,self).__init__( exprs, savelist=False ) 
2752          self.defaultValue = default 
2753          self.mayReturnEmpty = True 
 2754   
2755 -    def parseImpl( self, instring, loc, doActions=True ): 
 2756          try: 
2757              loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 
2758          except (ParseException,IndexError): 
2759              if self.defaultValue is not _optionalNotMatched: 
2760                  if self.expr.resultsName: 
2761                      tokens = ParseResults([ self.defaultValue ]) 
2762                      tokens[self.expr.resultsName] = self.defaultValue 
2763                  else: 
2764                      tokens = [ self.defaultValue ] 
2765              else: 
2766                  tokens = [] 
2767          return loc, tokens 
 2768   
2770          if hasattr(self,"name"): 
2771              return self.name 
2772   
2773          if self.strRepr is None: 
2774              self.strRepr = "[" + _ustr(self.expr) + "]" 
2775   
2776          return self.strRepr 
  2777   
2778   
2779 -class SkipTo(ParseElementEnhance): 
 2780      """Token for skipping over all undefined text until the matched expression is found. 
2781         If include is set to true, the matched expression is also consumed.  The ignore 
2782         argument is used to define grammars (typically quoted strings and comments) that 
2783         might contain false matches. 
2784      """ 
2785 -    def __init__( self, other, include=False, ignore=None ): 
 2786          super( SkipTo, self ).__init__( other ) 
2787          if ignore is not None: 
2788              self.expr = self.expr.copy() 
2789              self.expr.ignore(ignore) 
2790          self.mayReturnEmpty = True 
2791          self.mayIndexError = False 
2792          self.includeMatch = include 
2793          self.asList = False 
2794          self.errmsg = "No match found for "+_ustr(self.expr) 
 2795           
2796   
2797 -    def parseImpl( self, instring, loc, doActions=True ): 
 2798          startLoc = loc 
2799          instrlen = len(instring) 
2800          expr = self.expr 
2801          while loc <= instrlen: 
2802              try: 
2803                  loc = expr._skipIgnorables( instring, loc ) 
2804                  expr._parse( instring, loc, doActions=False, callPreParse=False ) 
2805                  if self.includeMatch: 
2806                      skipText = instring[startLoc:loc] 
2807                      loc,mat = expr._parse(instring,loc,doActions,callPreParse=False) 
2808                      if mat: 
2809                          skipRes = ParseResults( skipText ) 
2810                          skipRes += mat 
2811                          return loc, [ skipRes ] 
2812                      else: 
2813                          return loc, [ skipText ] 
2814                  else: 
2815                      return loc, [ instring[startLoc:loc] ] 
2816              except (ParseException,IndexError): 
2817                  loc += 1 
2818          exc = self.myException 
2819          exc.loc = loc 
2820          exc.pstr = instring 
2821          raise exc 
  2822   
2823 -class Forward(ParseElementEnhance): 
 2824      """Forward declaration of an expression to be defined later - 
2825         used for recursive grammars, such as algebraic infix notation. 
2826         When the expression is known, it is assigned to the Forward variable using the '<<' operator. 
2827   
2828         Note: take care when assigning to Forward not to overlook precedence of operators. 
2829         Specifically, '|' has a lower precedence than '<<', so that:: 
2830            fwdExpr << a | b | c 
2831         will actually be evaluated as:: 
2832            (fwdExpr << a) | b | c 
2833         thereby leaving b and c out as parseable alternatives.  It is recommended that you 
2834         explicitly group the values inserted into the Forward:: 
2835            fwdExpr << (a | b | c) 
2836      """ 
2839   
2841          if isinstance( other, basestring ): 
2842              other = Literal(other) 
2843          self.expr = other 
2844          self.mayReturnEmpty = other.mayReturnEmpty 
2845          self.strRepr = None 
2846          self.mayIndexError = self.expr.mayIndexError 
2847          self.mayReturnEmpty = self.expr.mayReturnEmpty 
2848          self.setWhitespaceChars( self.expr.whiteChars ) 
2849          self.skipWhitespace = self.expr.skipWhitespace 
2850          self.saveAsList = self.expr.saveAsList 
2851          self.ignoreExprs.extend(self.expr.ignoreExprs) 
2852          return None 
 2853   
2855          self.skipWhitespace = False 
2856          return self 
 2857   
2859          if not self.streamlined: 
2860              self.streamlined = True 
2861              if self.expr is not None: 
2862                  self.expr.streamline() 
2863          return self 
 2864   
2865 -    def validate( self, validateTrace=[] ): 
 2866          if self not in validateTrace: 
2867              tmp = validateTrace[:]+[self] 
2868              if self.expr is not None: 
2869                  self.expr.validate(tmp) 
2870          self.checkRecursion([]) 
 2871   
2873          if hasattr(self,"name"): 
2874              return self.name 
2875   
2876          self.__class__ = _ForwardNoRecurse 
2877          try: 
2878              if self.expr is not None: 
2879                  retString = _ustr(self.expr) 
2880              else: 
2881                  retString = "None" 
2882          finally: 
2883              self.__class__ = Forward 
2884          return "Forward: "+retString 
 2885   
2887          if self.expr is not None: 
2888              return super(Forward,self).copy() 
2889          else: 
2890              ret = Forward() 
2891              ret << self 
2892              return ret 
  2893   
2897   
2899      """Abstract subclass of ParseExpression, for converting parsed results.""" 
2900 -    def __init__( self, expr, savelist=False ): 
  2903   
2904 -class Upcase(TokenConverter): 
 2905      """Converter to upper case all matching tokens.""" 
2907          super(Upcase,self).__init__(*args) 
2908          warnings.warn("Upcase class is deprecated, use upcaseTokens parse action instead", 
2909                         DeprecationWarning,stacklevel=2) 
 2910   
2911 -    def postParse( self, instring, loc, tokenlist ): 
 2912          return list(map( string.upper, tokenlist )) 
  2913   
2914   
2916      """Converter to concatenate all matching tokens to a single string. 
2917         By default, the matching patterns must also be contiguous in the input string; 
2918         this can be disabled by specifying 'adjacent=False' in the constructor. 
2919      """ 
2920 -    def __init__( self, expr, joinString="", adjacent=True ): 
 2921          super(Combine,self).__init__( expr ) 
2922           
2923          if adjacent: 
2924              self.leaveWhitespace() 
2925          self.adjacent = adjacent 
2926          self.skipWhitespace = True 
2927          self.joinString = joinString 
 2928   
2935   
2936 -    def postParse( self, instring, loc, tokenlist ): 
 2937          retToks = tokenlist.copy() 
2938          del retToks[:] 
2939          retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults) 
2940   
2941          if self.resultsName and len(retToks.keys())>0: 
2942              return [ retToks ] 
2943          else: 
2944              return retToks 
  2945   
2946 -class Group(TokenConverter): 
 2947      """Converter to return the matched tokens as a list - useful for returning tokens of ZeroOrMore and OneOrMore expressions.""" 
2949          super(Group,self).__init__( expr ) 
2950          self.saveAsList = True 
 2951   
2952 -    def postParse( self, instring, loc, tokenlist ): 
 2953          return [ tokenlist ] 
  2954   
2955 -class Dict(TokenConverter): 
 2956      """Converter to return a repetitive expression as a list, but also as a dictionary. 
2957         Each element can also be referenced using the first token in the expression as its key. 
2958         Useful for tabular report scraping when the first column can be used as a item key. 
2959      """ 
2961          super(Dict,self).__init__( exprs ) 
2962          self.saveAsList = True 
 2963   
2964 -    def postParse( self, instring, loc, tokenlist ): 
 2965          for i,tok in enumerate(tokenlist): 
2966              if len(tok) == 0: 
2967                  continue 
2968              ikey = tok[0] 
2969              if isinstance(ikey,int): 
2970                  ikey = _ustr(tok[0]).strip() 
2971              if len(tok)==1: 
2972                  tokenlist[ikey] = _ParseResultsWithOffset("",i) 
2973              elif len(tok)==2 and not isinstance(tok[1],ParseResults): 
2974                  tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i) 
2975              else: 
2976                  dictvalue = tok.copy()  
2977                  del dictvalue[0] 
2978                  if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.keys()): 
2979                      tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i) 
2980                  else: 
2981                      tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i) 
2982   
2983          if self.resultsName: 
2984              return [ tokenlist ] 
2985          else: 
2986              return tokenlist 
  2987   
2988   
2990      """Converter for ignoring the results of a parsed expression.""" 
2991 -    def postParse( self, instring, loc, tokenlist ): 
 2993   
 2996   
2997   
2999      """Wrapper for parse actions, to ensure they are only called once.""" 
3001          self.callable = ParserElement._normalizeParseActionArgs(methodCall) 
3002          self.called = False 
 3004          if not self.called: 
3005              results = self.callable(s,l,t) 
3006              self.called = True 
3007              return results 
3008          raise ParseException(s,l,"") 
  3011   
3013      """Decorator for debugging parse actions.""" 
3014      f = ParserElement._normalizeParseActionArgs(f) 
3015      def z(*paArgs): 
3016          thisFunc = f.func_name 
3017          s,l,t = paArgs[-3:] 
3018          if len(paArgs)>3: 
3019              thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc 
3020          sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) ) 
3021          try: 
3022              ret = f(*paArgs) 
3023          except Exception, exc: 
3024              sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) ) 
3025              raise 
3026          sys.stderr.write( "<<leaving %s (ret: %s)\n" % (thisFunc,ret) ) 
3027          return ret 
 3028      try: 
3029          z.__name__ = f.__name__ 
3030      except AttributeError: 
3031          pass 
3032      return z 
3033   
3034   
3035   
3036   
3038      """Helper to define a delimited list of expressions - the delimiter defaults to ','. 
3039         By default, the list elements and delimiters can have intervening whitespace, and 
3040         comments, but this can be overridden by passing 'combine=True' in the constructor. 
3041         If combine is set to True, the matching tokens are returned as a single token 
3042         string, with the delimiters included; otherwise, the matching tokens are returned 
3043         as a list of tokens, with the delimiters suppressed. 
3044      """ 
3045      dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..." 
3046      if combine: 
3047          return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName) 
3048      else: 
3049          return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName) 
 3050   
3052      """Helper to define a counted list of expressions. 
3053         This helper defines a pattern of the form:: 
3054             integer expr expr expr... 
3055         where the leading integer tells how many expr expressions follow. 
3056         The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed. 
3057      """ 
3058      arrayExpr = Forward() 
3059      def countFieldParseAction(s,l,t): 
3060          n = int(t[0]) 
3061          arrayExpr << (n and Group(And([expr]*n)) or Group(empty)) 
3062          return [] 
 3063      return ( Word(nums).setName("arrayLen").setParseAction(countFieldParseAction, callDuringTry=True) + arrayExpr ) 
3064   
3066      if type(L) is not list: return [L] 
3067      if L == []: return L 
3068      return _flatten(L[0]) + _flatten(L[1:]) 
 3069   
3071      """Helper to define an expression that is indirectly defined from 
3072         the tokens matched in a previous expression, that is, it looks 
3073         for a 'repeat' of a previous expression.  For example:: 
3074             first = Word(nums) 
3075             second = matchPreviousLiteral(first) 
3076             matchExpr = first + ":" + second 
3077         will match "1:1", but not "1:2".  Because this matches a 
3078         previous literal, will also match the leading "1:1" in "1:10". 
3079         If this is not desired, use matchPreviousExpr. 
3080         Do *not* use with packrat parsing enabled. 
3081      """ 
3082      rep = Forward() 
3083      def copyTokenToRepeater(s,l,t): 
3084          if t: 
3085              if len(t) == 1: 
3086                  rep << t[0] 
3087              else: 
3088                   
3089                  tflat = _flatten(t.asList()) 
3090                  rep << And( [ Literal(tt) for tt in tflat ] ) 
3091          else: 
3092              rep << Empty() 
 3093      expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 
3094      return rep 
3095   
3097      """Helper to define an expression that is indirectly defined from 
3098         the tokens matched in a previous expression, that is, it looks 
3099         for a 'repeat' of a previous expression.  For example:: 
3100             first = Word(nums) 
3101             second = matchPreviousExpr(first) 
3102             matchExpr = first + ":" + second 
3103         will match "1:1", but not "1:2".  Because this matches by 
3104         expressions, will *not* match the leading "1:1" in "1:10"; 
3105         the expressions are evaluated first, and then compared, so 
3106         "1" is compared with "10". 
3107         Do *not* use with packrat parsing enabled. 
3108      """ 
3109      rep = Forward() 
3110      e2 = expr.copy() 
3111      rep << e2 
3112      def copyTokenToRepeater(s,l,t): 
3113          matchTokens = _flatten(t.asList()) 
3114          def mustMatchTheseTokens(s,l,t): 
3115              theseTokens = _flatten(t.asList()) 
3116              if  theseTokens != matchTokens: 
3117                  raise ParseException("",0,"") 
 3118          rep.setParseAction( mustMatchTheseTokens, callDuringTry=True ) 
3119      expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 
3120      return rep 
3121   
3123       
3124      for c in r"\^-]": 
3125          s = s.replace(c,"\\"+c) 
3126      s = s.replace("\n",r"\n") 
3127      s = s.replace("\t",r"\t") 
3128      return _ustr(s) 
 3129   
3130 -def oneOf( strs, caseless=False, useRegex=True ): 
 3131      """Helper to quickly define a set of alternative Literals, and makes sure to do 
3132         longest-first testing when there is a conflict, regardless of the input order, 
3133         but returns a MatchFirst for best performance. 
3134   
3135         Parameters: 
3136          - strs - a string of space-delimited literals, or a list of string literals 
3137          - caseless - (default=False) - treat all literals as caseless 
3138          - useRegex - (default=True) - as an optimization, will generate a Regex 
3139            object; otherwise, will generate a MatchFirst object (if caseless=True, or 
3140            if creating a Regex raises an exception) 
3141      """ 
3142      if caseless: 
3143          isequal = ( lambda a,b: a.upper() == b.upper() ) 
3144          masks = ( lambda a,b: b.upper().startswith(a.upper()) ) 
3145          parseElementClass = CaselessLiteral 
3146      else: 
3147          isequal = ( lambda a,b: a == b ) 
3148          masks = ( lambda a,b: b.startswith(a) ) 
3149          parseElementClass = Literal 
3150   
3151      if isinstance(strs,(list,tuple)): 
3152          symbols = strs[:] 
3153      elif isinstance(strs,basestring): 
3154          symbols = strs.split() 
3155      else: 
3156          warnings.warn("Invalid argument to oneOf, expected string or list", 
3157                  SyntaxWarning, stacklevel=2) 
3158   
3159      i = 0 
3160      while i < len(symbols)-1: 
3161          cur = symbols[i] 
3162          for j,other in enumerate(symbols[i+1:]): 
3163              if ( isequal(other, cur) ): 
3164                  del symbols[i+j+1] 
3165                  break 
3166              elif ( masks(cur, other) ): 
3167                  del symbols[i+j+1] 
3168                  symbols.insert(i,other) 
3169                  cur = other 
3170                  break 
3171          else: 
3172              i += 1 
3173   
3174      if not caseless and useRegex: 
3175           
3176          try: 
3177              if len(symbols)==len("".join(symbols)): 
3178                  return Regex( "[%s]" % "".join( [ _escapeRegexRangeChars(sym) for sym in symbols] ) ) 
3179              else: 
3180                  return Regex( "|".join( [ re.escape(sym) for sym in symbols] ) ) 
3181          except: 
3182              warnings.warn("Exception creating Regex for oneOf, building MatchFirst", 
3183                      SyntaxWarning, stacklevel=2) 
3184   
3185   
3186       
3187      return MatchFirst( [ parseElementClass(sym) for sym in symbols ] ) 
 3188   
3190      """Helper to easily and clearly define a dictionary by specifying the respective patterns 
3191         for the key and value.  Takes care of defining the Dict, ZeroOrMore, and Group tokens 
3192         in the proper order.  The key pattern can include delimiting markers or punctuation, 
3193         as long as they are suppressed, thereby leaving the significant key text.  The value 
3194         pattern can include named results, so that the Dict results can include named token 
3195         fields. 
3196      """ 
3197      return Dict( ZeroOrMore( Group ( key + value ) ) ) 
 3198   
3199   
3200  empty       = Empty().setName("empty") 
3201  lineStart   = LineStart().setName("lineStart") 
3202  lineEnd     = LineEnd().setName("lineEnd") 
3203  stringStart = StringStart().setName("stringStart") 
3204  stringEnd   = StringEnd().setName("stringEnd") 
3205   
3206  _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1]) 
3207  _printables_less_backslash = "".join([ c for c in printables if c not in  r"\]" ]) 
3208  _escapedHexChar = Combine( Suppress(_bslash + "0x") + Word(hexnums) ).setParseAction(lambda s,l,t:unichr(int(t[0],16))) 
3209  _escapedOctChar = Combine( Suppress(_bslash) + Word("0","01234567") ).setParseAction(lambda s,l,t:unichr(int(t[0],8))) 
3210  _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(_printables_less_backslash,exact=1) 
3211  _charRange = Group(_singleChar + Suppress("-") + _singleChar) 
3212  _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]" 
3213   
3214  _expanded = lambda p: (isinstance(p,ParseResults) and ''.join([ unichr(c) for c in range(ord(p[0]),ord(p[1])+1) ]) or p) 
3215   
3217      r"""Helper to easily define string ranges for use in Word construction.  Borrows 
3218         syntax from regexp '[]' string range definitions:: 
3219            srange("[0-9]")   -> "0123456789" 
3220            srange("[a-z]")   -> "abcdefghijklmnopqrstuvwxyz" 
3221            srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" 
3222         The input string must be enclosed in []'s, and the returned string is the expanded 
3223         character set joined into a single string. 
3224         The values enclosed in the []'s may be:: 
3225            a single character 
3226            an escaped character with a leading backslash (such as \- or \]) 
3227            an escaped hex character with a leading '\0x' (\0x21, which is a '!' character) 
3228            an escaped octal character with a leading '\0' (\041, which is a '!' character) 
3229            a range of any of the above, separated by a dash ('a-z', etc.) 
3230            any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.) 
3231      """ 
3232      try: 
3233          return "".join([_expanded(part) for part in _reBracketExpr.parseString(s).body]) 
3234      except: 
3235          return "" 
 3236   
3238      """Helper method for defining parse actions that require matching at a specific 
3239         column in the input text. 
3240      """ 
3241      def verifyCol(strg,locn,toks): 
3242          if col(locn,strg) != n: 
3243              raise ParseException(strg,locn,"matched token not at column %d" % n) 
 3244      return verifyCol 
3245   
3247      """Helper method for common parse actions that simply return a literal value.  Especially 
3248         useful when used with transformString(). 
3249      """ 
3250      def _replFunc(*args): 
3251          return [replStr] 
 3252      return _replFunc 
3253   
3255      """Helper parse action for removing quotation marks from parsed quoted strings. 
3256         To use, add this parse action to quoted string using:: 
3257           quotedString.setParseAction( removeQuotes ) 
3258      """ 
3259      return t[0][1:-1] 
 3260   
3262      """Helper parse action to convert tokens to upper case.""" 
3263      return [ tt.upper() for tt in map(_ustr,t) ] 
 3264   
3266      """Helper parse action to convert tokens to lower case.""" 
3267      return [ tt.lower() for tt in map(_ustr,t) ] 
 3268   
3269 -def keepOriginalText(s,startLoc,t): 
 3270      """Helper parse action to preserve original parsed text, 
3271         overriding any nested parse actions.""" 
3272      try: 
3273          endloc = getTokensEndLoc() 
3274      except ParseException: 
3275          raise ParseFatalException("incorrect usage of keepOriginalText - may only be called as a parse action") 
3276      del t[:] 
3277      t += ParseResults(s[startLoc:endloc]) 
3278      return t 
 3279   
3281      """Method to be called from within a parse action to determine the end 
3282         location of the parsed tokens.""" 
3283      import inspect 
3284      fstack = inspect.stack() 
3285      try: 
3286           
3287          for f in fstack[2:]: 
3288              if f[3] == "_parseNoCache": 
3289                  endloc = f[0].f_locals["loc"] 
3290                  return endloc 
3291          else: 
3292              raise ParseFatalException("incorrect usage of getTokensEndLoc - may only be called from within a parse action") 
3293      finally: 
3294          del fstack 
 3295   
3323   
3327   
3331   
3333      """Helper to create a validating parse action to be used with start tags created 
3334         with makeXMLTags or makeHTMLTags. Use withAttribute to qualify a starting tag 
3335         with a required attribute value, to avoid false matches on common tags such as 
3336         <TD> or <DIV>. 
3337   
3338         Call withAttribute with a series of attribute names and values. Specify the list 
3339         of filter attributes names and values as: 
3340          - keyword arguments, as in (class="Customer",align="right"), or 
3341          - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") ) 
3342         For attribute names with a namespace prefix, you must use the second form.  Attribute 
3343         names are matched insensitive to upper/lower case. 
3344   
3345         To verify that the attribute exists, but without specifying a value, pass 
3346         withAttribute.ANY_VALUE as the value. 
3347         """ 
3348      if args: 
3349          attrs = args[:] 
3350      else: 
3351          attrs = attrDict.items() 
3352      attrs = [(k,v) for k,v in attrs] 
3353      def pa(s,l,tokens): 
3354          for attrName,attrValue in attrs: 
3355              if attrName not in tokens: 
3356                  raise ParseException(s,l,"no matching attribute " + attrName) 
3357              if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue: 
3358                  raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" % 
3359                                              (attrName, tokens[attrName], attrValue)) 
 3360      return pa 
3361  withAttribute.ANY_VALUE = object() 
3362   
3363  opAssoc = _Constants() 
3364  opAssoc.LEFT = object() 
3365  opAssoc.RIGHT = object() 
3366   
3368      """Helper method for constructing grammars of expressions made up of 
3369         operators working in a precedence hierarchy.  Operators may be unary or 
3370         binary, left- or right-associative.  Parse actions can also be attached 
3371         to operator expressions. 
3372   
3373         Parameters: 
3374          - baseExpr - expression representing the most basic element for the nested 
3375          - opList - list of tuples, one for each operator precedence level in the 
3376            expression grammar; each tuple is of the form 
3377            (opExpr, numTerms, rightLeftAssoc, parseAction), where: 
3378             - opExpr is the pyparsing expression for the operator; 
3379                may also be a string, which will be converted to a Literal; 
3380                if numTerms is 3, opExpr is a tuple of two expressions, for the 
3381                two operators separating the 3 terms 
3382             - numTerms is the number of terms for this operator (must 
3383                be 1, 2, or 3) 
3384             - rightLeftAssoc is the indicator whether the operator is 
3385                right or left associative, using the pyparsing-defined 
3386                constants opAssoc.RIGHT and opAssoc.LEFT. 
3387             - parseAction is the parse action to be associated with 
3388                expressions matching this operator expression (the 
3389                parse action tuple member may be omitted) 
3390      """ 
3391      ret = Forward() 
3392      lastExpr = baseExpr | ( Suppress('(') + ret + Suppress(')') ) 
3393      for i,operDef in enumerate(opList): 
3394          opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4] 
3395          if arity == 3: 
3396              if opExpr is None or len(opExpr) != 2: 
3397                  raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions") 
3398              opExpr1, opExpr2 = opExpr 
3399          thisExpr = Forward() 
3400          if rightLeftAssoc == opAssoc.LEFT: 
3401              if arity == 1: 
3402                  matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) ) 
3403              elif arity == 2: 
3404                  if opExpr is not None: 
3405                      matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) ) 
3406                  else: 
3407                      matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) ) 
3408              elif arity == 3: 
3409                  matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \ 
3410                              Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr ) 
3411              else: 
3412                  raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 
3413          elif rightLeftAssoc == opAssoc.RIGHT: 
3414              if arity == 1: 
3415                   
3416                  if not isinstance(opExpr, Optional): 
3417                      opExpr = Optional(opExpr) 
3418                  matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr ) 
3419              elif arity == 2: 
3420                  if opExpr is not None: 
3421                      matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) ) 
3422                  else: 
3423                      matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) ) 
3424              elif arity == 3: 
3425                  matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \ 
3426                              Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr ) 
3427              else: 
3428                  raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 
3429          else: 
3430              raise ValueError("operator must indicate right or left associativity") 
3431          if pa: 
3432              matchExpr.setParseAction( pa ) 
3433          thisExpr << ( matchExpr | lastExpr ) 
3434          lastExpr = thisExpr 
3435      ret << lastExpr 
3436      return ret 
 3437   
3438  dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName("string enclosed in double quotes") 
3439  sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName("string enclosed in single quotes") 
3440  quotedString = Regex(r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*')''').setName("quotedString using single or double quotes") 
3441  unicodeString = Combine(_L('u') + quotedString.copy()) 
3442   
3444      """Helper method for defining nested lists enclosed in opening and closing 
3445         delimiters ("(" and ")" are the default). 
3446   
3447         Parameters: 
3448          - opener - opening character for a nested list (default="("); can also be a pyparsing expression 
3449          - closer - closing character for a nested list (default=")"); can also be a pyparsing expression 
3450          - content - expression for items within the nested lists (default=None) 
3451          - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString) 
3452   
3453         If an expression is not provided for the content argument, the nested 
3454         expression will capture all whitespace-delimited content between delimiters 
3455         as a list of separate values. 
3456   
3457         Use the ignoreExpr argument to define expressions that may contain 
3458         opening or closing characters that should not be treated as opening 
3459         or closing characters for nesting, such as quotedString or a comment 
3460         expression.  Specify multiple expressions using an Or or MatchFirst. 
3461         The default is quotedString, but if no expressions are to be ignored, 
3462         then pass None for this argument. 
3463      """ 
3464      if opener == closer: 
3465          raise ValueError("opening and closing strings cannot be the same") 
3466      if content is None: 
3467          if isinstance(opener,basestring) and isinstance(closer,basestring): 
3468              if ignoreExpr is not None: 
3469                  content = (Combine(OneOrMore(~ignoreExpr + 
3470                                  CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 
3471                              ).setParseAction(lambda t:t[0].strip())) 
3472              else: 
3473                  content = (empty+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS).setParseAction(lambda t:t[0].strip())) 
3474          else: 
3475              raise ValueError("opening and closing arguments must be strings if no content expression is given") 
3476      ret = Forward() 
3477      if ignoreExpr is not None: 
3478          ret << Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) ) 
3479      else: 
3480          ret << Group( Suppress(opener) + ZeroOrMore( ret | content )  + Suppress(closer) ) 
3481      return ret 
 3482   
3483 -def indentedBlock(blockStatementExpr, indentStack, indent=True): 
 3484      """Helper method for defining space-delimited indentation blocks, such as  
3485         those used to define block statements in Python source code. 
3486          
3487         Parameters: 
3488          - blockStatementExpr - expression defining syntax of statement that  
3489              is repeated within the indented block 
3490          - indentStack - list created by caller to manage indentation stack 
3491              (multiple statementWithIndentedBlock expressions within a single grammar 
3492              should share a common indentStack) 
3493          - indent - boolean indicating whether block must be indented beyond the  
3494              the current level; set to False for block of left-most statements 
3495              (default=True) 
3496   
3497         A valid block must contain at least one blockStatement. 
3498      """ 
3499      def checkPeerIndent(s,l,t): 
3500          if l >= len(s): return 
3501          curCol = col(l,s) 
3502          if curCol != indentStack[-1]: 
3503              if curCol > indentStack[-1]: 
3504                  raise ParseFatalException(s,l,"illegal nesting") 
3505              raise ParseException(s,l,"not a peer entry") 
 3506   
3507      def checkSubIndent(s,l,t): 
3508          curCol = col(l,s) 
3509          if curCol > indentStack[-1]: 
3510              indentStack.append( curCol ) 
3511          else: 
3512              raise ParseException(s,l,"not a subentry") 
3513   
3514      def checkUnindent(s,l,t): 
3515          if l >= len(s): return 
3516          curCol = col(l,s) 
3517          if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]): 
3518              raise ParseException(s,l,"not an unindent") 
3519          indentStack.pop() 
3520   
3521      NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress()) 
3522      INDENT = Empty() + Empty().setParseAction(checkSubIndent) 
3523      PEER   = Empty().setParseAction(checkPeerIndent) 
3524      UNDENT = Empty().setParseAction(checkUnindent) 
3525      if indent: 
3526          smExpr = Group( Optional(NL) + 
3527              FollowedBy(blockStatementExpr) + 
3528              INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT) 
3529      else: 
3530          smExpr = Group( Optional(NL) + 
3531              (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) ) 
3532      blockStatementExpr.ignore("\\" + LineEnd()) 
3533      return smExpr 
3534   
3535  alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") 
3536  punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") 
3537   
3538  anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:")) 
3539  commonHTMLEntity = Combine(_L("&") + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";") 
3540  _htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),"><& '")) 
3541  replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None 
3542   
3543   
3544  cStyleComment = Regex(r"/\*(?:[^*]*\*+)+?/").setName("C style comment") 
3545   
3546  htmlComment = Regex(r"<!--[\s\S]*?-->") 
3547  restOfLine = Regex(r".*").leaveWhitespace() 
3548  dblSlashComment = Regex(r"\/\/(\\\n|.)*").setName("// comment") 
3549  cppStyleComment = Regex(r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?<!\\)|\Z))").setName("C++ style comment") 
3550   
3551  javaStyleComment = cppStyleComment 
3552  pythonStyleComment = Regex(r"#.*").setName("Python style comment") 
3553  _noncomma = "".join( [ c for c in printables if c != "," ] ) 
3554  _commasepitem = Combine(OneOrMore(Word(_noncomma) + 
3555                                    Optional( Word(" \t") + 
3556                                              ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem") 
3557  commaSeparatedList = delimitedList( Optional( quotedString | _commasepitem, default="") ).setName("commaSeparatedList") 
3558   
3559   
3560  if __name__ == "__main__": 
3561   
3562 -    def test( teststring ): 
 3563          try: 
3564              tokens = simpleSQL.parseString( teststring ) 
3565              tokenlist = tokens.asList() 
3566              print (teststring + "->"   + str(tokenlist)) 
3567              print ("tokens = "         + str(tokens)) 
3568              print ("tokens.columns = " + str(tokens.columns)) 
3569              print ("tokens.tables = "  + str(tokens.tables)) 
3570              print (tokens.asXML("SQL",True)) 
3571          except ParseBaseException,err: 
3572              print (teststring + "->") 
3573              print (err.line) 
3574              print (" "*(err.column-1) + "^") 
3575              print (err) 
3576          print() 
 3577   
3578      selectToken    = CaselessLiteral( "select" ) 
3579      fromToken      = CaselessLiteral( "from" ) 
3580   
3581      ident          = Word( alphas, alphanums + "_$" ) 
3582      columnName     = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens ) 
3583      columnNameList = Group( delimitedList( columnName ) ) 
3584      tableName      = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens ) 
3585      tableNameList  = Group( delimitedList( tableName ) ) 
3586      simpleSQL      = ( selectToken + \ 
3587                       ( '*' | columnNameList ).setResultsName( "columns" ) + \ 
3588                       fromToken + \ 
3589                       tableNameList.setResultsName( "tables" ) ) 
3590   
3591      test( "SELECT * from XYZZY, ABC" ) 
3592      test( "select * from SYS.XYZZY" ) 
3593      test( "Select A from Sys.dual" ) 
3594      test( "Select AA,BB,CC from Sys.dual" ) 
3595      test( "Select A, B, C from Sys.dual" ) 
3596      test( "Select A, B, C from Sys.dual" ) 
3597      test( "Xelect A, B, C from Sys.dual" ) 
3598      test( "Select A, B, C frox Sys.dual" ) 
3599      test( "Select" ) 
3600      test( "Select ^^^ frox Sys.dual" ) 
3601      test( "Select A, B, C from Sys.dual, Table2   " ) 
3602