BlockIt
pyparsing.py
Go to the documentation of this file.
00001 # module pyparsing.py
00002 #
00003 # Copyright (c) 2003-2009  Paul T. McGuire
00004 #
00005 # Permission is hereby granted, free of charge, to any person obtaining
00006 # a copy of this software and associated documentation files (the
00007 # "Software"), to deal in the Software without restriction, including
00008 # without limitation the rights to use, copy, modify, merge, publish,
00009 # distribute, sublicense, and/or sell copies of the Software, and to
00010 # permit persons to whom the Software is furnished to do so, subject to
00011 # the following conditions:
00012 #
00013 # The above copyright notice and this permission notice shall be
00014 # included in all copies or substantial portions of the Software.
00015 #
00016 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
00017 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
00018 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
00019 # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
00020 # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
00021 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
00022 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
00023 #
00024 #from __future__ import generators
00025 
00026 __doc__ = \
00027 """
00028 pyparsing module - Classes and methods to define and execute parsing grammars
00029 
00030 The pyparsing module is an alternative approach to creating and executing simple grammars,
00031 vs. the traditional lex/yacc approach, or the use of regular expressions.  With pyparsing, you
00032 don't need to learn a new syntax for defining grammars or matching expressions - the parsing module
00033 provides a library of classes that you use to construct the grammar directly in Python.
00034 
00035 Here is a program to parse "Hello, World!" (or any greeting of the form "<salutation>, <addressee>!")::
00036 
00037     from pyparsing import Word, alphas
00038 
00039     # define grammar of a greeting
00040     greet = Word( alphas ) + "," + Word( alphas ) + "!"
00041 
00042     hello = "Hello, World!"
00043     print hello, "->", greet.parseString( hello )
00044 
00045 The program outputs the following::
00046 
00047     Hello, World! -> ['Hello', ',', 'World', '!']
00048 
00049 The Python representation of the grammar is quite readable, owing to the self-explanatory
00050 class names, and the use of '+', '|' and '^' operators.
00051 
00052 The parsed results returned from parseString() can be accessed as a nested list, a dictionary, or an
00053 object with named attributes.
00054 
00055 The pyparsing module handles some of the problems that are typically vexing when writing text parsers:
00056  - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello  ,  World  !", etc.)
00057  - quoted strings
00058  - embedded comments
00059 """
00060 
00061 __version__ = "1.5.2"
00062 __versionTime__ = "17 February 2009 19:45"
00063 __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
00064 
00065 import string
00066 from weakref import ref as wkref
00067 import copy
00068 import sys
00069 import warnings
00070 import re
00071 import sre_constants
00072 #~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )
00073 
00074 __all__ = [
00075 'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty',
00076 'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal',
00077 'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or',
00078 'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException',
00079 'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException',
00080 'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', 'Upcase',
00081 'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore',
00082 'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col',
00083 'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString',
00084 'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'getTokensEndLoc', 'hexnums',
00085 'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno',
00086 'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',
00087 'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',
00088 'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity', 
00089 'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
00090 'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
00091 'indentedBlock', 'originalTextFor',
00092 ]
00093 
00094 
00095 """
00096 Detect if we are running version 3.X and make appropriate changes
00097 Robert A. Clark
00098 """
00099 if sys.version_info[0] > 2:
00100     _PY3K = True
00101     _MAX_INT = sys.maxsize
00102     basestring = str
00103 else:
00104     _PY3K = False
00105     _MAX_INT = sys.maxint
00106 
00107 if not _PY3K:
00108     def _ustr(obj):
00109         """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
00110            str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It
00111            then < returns the unicode object | encodes it with the default encoding | ... >.
00112         """
00113         if isinstance(obj,unicode):
00114             return obj
00115 
00116         try:
00117             # If this works, then _ustr(obj) has the same behaviour as str(obj), so
00118             # it won't break any existing code.
00119             return str(obj)
00120 
00121         except UnicodeEncodeError:
00122             # The Python docs (http://docs.python.org/ref/customization.html#l2h-182)
00123             # state that "The return value must be a string object". However, does a
00124             # unicode object (being a subclass of basestring) count as a "string
00125             # object"?
00126             # If so, then return a unicode object:
00127             return unicode(obj)
00128             # Else encode it... but how? There are many choices... :)
00129             # Replace unprintables with escape codes?
00130             #return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors')
00131             # Replace unprintables with question marks?
00132             #return unicode(obj).encode(sys.getdefaultencoding(), 'replace')
00133             # ...
00134 else:
00135     _ustr = str
00136     unichr = chr
00137 
00138 if not _PY3K:
00139         def _str2dict(strg):
00140             return dict( [(c,0) for c in strg] )
00141 else:
00142         _str2dict = set
00143 
00144 def _xml_escape(data):
00145     """Escape &, <, >, ", ', etc. in a string of data."""
00146 
00147     # ampersand must be replaced first
00148     from_symbols = '&><"\''
00149     to_symbols = ['&'+s+';' for s in "amp gt lt quot apos".split()]
00150     for from_,to_ in zip(from_symbols, to_symbols):
00151         data = data.replace(from_, to_)
00152     return data
00153 
00154 class _Constants(object):
00155     pass
00156 
00157 if not _PY3K:
00158     alphas     = string.lowercase + string.uppercase
00159 else:
00160     alphas     = string.ascii_lowercase + string.ascii_uppercase
00161 nums       = string.digits
00162 hexnums    = nums + "ABCDEFabcdef"
00163 alphanums  = alphas + nums
00164 _bslash = chr(92)
00165 printables = "".join( [ c for c in string.printable if c not in string.whitespace ] )
00166 
00167 class ParseBaseException(Exception):
00168     """base exception class for all parsing runtime exceptions"""
00169     # Performance tuning: we construct a *lot* of these, so keep this
00170     # constructor as small and fast as possible
00171     def __init__( self, pstr, loc=0, msg=None, elem=None ):
00172         self.loc = loc
00173         if msg is None:
00174             self.msg = pstr
00175             self.pstr = ""
00176         else:
00177             self.msg = msg
00178             self.pstr = pstr
00179         self.parserElement = elem
00180 
00181     def __getattr__( self, aname ):
00182         """supported attributes by name are:
00183             - lineno - returns the line number of the exception text
00184             - col - returns the column number of the exception text
00185             - line - returns the line containing the exception text
00186         """
00187         if( aname == "lineno" ):
00188             return lineno( self.loc, self.pstr )
00189         elif( aname in ("col", "column") ):
00190             return col( self.loc, self.pstr )
00191         elif( aname == "line" ):
00192             return line( self.loc, self.pstr )
00193         else:
00194             raise AttributeError(aname)
00195 
00196     def __str__( self ):
00197         return "%s (at char %d), (line:%d, col:%d)" % \
00198                 ( self.msg, self.loc, self.lineno, self.column )
00199     def __repr__( self ):
00200         return _ustr(self)
00201     def markInputline( self, markerString = ">!<" ):
00202         """Extracts the exception line from the input string, and marks
00203            the location of the exception with a special symbol.
00204         """
00205         line_str = self.line
00206         line_column = self.column - 1
00207         if markerString:
00208             line_str = "".join( [line_str[:line_column],
00209                                 markerString, line_str[line_column:]])
00210         return line_str.strip()
00211     def __dir__(self):
00212         return "loc msg pstr parserElement lineno col line " \
00213                "markInputLine __str__ __repr__".split()
00214 
00215 class ParseException(ParseBaseException):
00216     """exception thrown when parse expressions don't match class;
00217        supported attributes by name are:
00218         - lineno - returns the line number of the exception text
00219         - col - returns the column number of the exception text
00220         - line - returns the line containing the exception text
00221     """
00222     pass
00223 
00224 class ParseFatalException(ParseBaseException):
00225     """user-throwable exception thrown when inconsistent parse content
00226        is found; stops all parsing immediately"""
00227     pass
00228 
00229 class ParseSyntaxException(ParseFatalException):
00230     """just like ParseFatalException, but thrown internally when an
00231        ErrorStop indicates that parsing is to stop immediately because
00232        an unbacktrackable syntax error has been found"""
00233     def __init__(self, pe):
00234         super(ParseSyntaxException, self).__init__(
00235                                     pe.pstr, pe.loc, pe.msg, pe.parserElement)
00236 
00237 #~ class ReparseException(ParseBaseException):
00238     #~ """Experimental class - parse actions can raise this exception to cause
00239        #~ pyparsing to reparse the input string:
00240         #~ - with a modified input string, and/or
00241         #~ - with a modified start location
00242        #~ Set the values of the ReparseException in the constructor, and raise the
00243        #~ exception in a parse action to cause pyparsing to use the new string/location.
00244        #~ Setting the values as None causes no change to be made.
00245        #~ """
00246     #~ def __init_( self, newstring, restartLoc ):
00247         #~ self.newParseText = newstring
00248         #~ self.reparseLoc = restartLoc
00249 
00250 class RecursiveGrammarException(Exception):
00251     """exception thrown by validate() if the grammar could be improperly recursive"""
00252     def __init__( self, parseElementList ):
00253         self.parseElementTrace = parseElementList
00254 
00255     def __str__( self ):
00256         return "RecursiveGrammarException: %s" % self.parseElementTrace
00257 
00258 class _ParseResultsWithOffset(object):
00259     def __init__(self,p1,p2):
00260         self.tup = (p1,p2)
00261     def __getitem__(self,i):
00262         return self.tup[i]
00263     def __repr__(self):
00264         return repr(self.tup)
00265     def setOffset(self,i):
00266         self.tup = (self.tup[0],i)
00267 
00268 class ParseResults(object):
00269     """Structured parse results, to provide multiple means of access to the parsed data:
00270        - as a list (len(results))
00271        - by list index (results[0], results[1], etc.)
00272        - by attribute (results.<resultsName>)
00273        """
00274     __slots__ = ( "__toklist", "__tokdict", "__doinit", "__name", "__parent", "__accumNames", "__weakref__" )
00275     def __new__(cls, toklist, name=None, asList=True, modal=True ):
00276         if isinstance(toklist, cls):
00277             return toklist
00278         retobj = object.__new__(cls)
00279         retobj.__doinit = True
00280         return retobj
00281 
00282     # Performance tuning: we construct a *lot* of these, so keep this
00283     # constructor as small and fast as possible
00284     def __init__( self, toklist, name=None, asList=True, modal=True ):
00285         if self.__doinit:
00286             self.__doinit = False
00287             self.__name = None
00288             self.__parent = None
00289             self.__accumNames = {}
00290             if isinstance(toklist, list):
00291                 self.__toklist = toklist[:]
00292             else:
00293                 self.__toklist = [toklist]
00294             self.__tokdict = dict()
00295 
00296         if name:
00297             if not modal:
00298                 self.__accumNames[name] = 0
00299             if isinstance(name,int):
00300                 name = _ustr(name) # will always return a str, but use _ustr for consistency
00301             self.__name = name
00302             if not toklist in (None,'',[]):
00303                 if isinstance(toklist,basestring):
00304                     toklist = [ toklist ]
00305                 if asList:
00306                     if isinstance(toklist,ParseResults):
00307                         self[name] = _ParseResultsWithOffset(toklist.copy(),0)
00308                     else:
00309                         self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0)
00310                     self[name].__name = name
00311                 else:
00312                     try:
00313                         self[name] = toklist[0]
00314                     except (KeyError,TypeError,IndexError):
00315                         self[name] = toklist
00316 
00317     def __getitem__( self, i ):
00318         if isinstance( i, (int,slice) ):
00319             return self.__toklist[i]
00320         else:
00321             if i not in self.__accumNames:
00322                 return self.__tokdict[i][-1][0]
00323             else:
00324                 return ParseResults([ v[0] for v in self.__tokdict[i] ])
00325 
00326     def __setitem__( self, k, v ):
00327         if isinstance(v,_ParseResultsWithOffset):
00328             self.__tokdict[k] = self.__tokdict.get(k,list()) + [v]
00329             sub = v[0]
00330         elif isinstance(k,int):
00331             self.__toklist[k] = v
00332             sub = v
00333         else:
00334             self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)]
00335             sub = v
00336         if isinstance(sub,ParseResults):
00337             sub.__parent = wkref(self)
00338 
00339     def __delitem__( self, i ):
00340         if isinstance(i,(int,slice)):
00341             mylen = len( self.__toklist )
00342             del self.__toklist[i]
00343 
00344             # convert int to slice
00345             if isinstance(i, int):
00346                 if i < 0:
00347                     i += mylen
00348                 i = slice(i, i+1)
00349             # get removed indices
00350             removed = list(range(*i.indices(mylen)))
00351             removed.reverse()
00352             # fixup indices in token dictionary
00353             for name in self.__tokdict:
00354                 occurrences = self.__tokdict[name]
00355                 for j in removed:
00356                     for k, (value, position) in enumerate(occurrences):
00357                         occurrences[k] = _ParseResultsWithOffset(value, position - (position > j))
00358         else:
00359             del self.__tokdict[i]
00360 
00361     def __contains__( self, k ):
00362         return k in self.__tokdict
00363 
00364     def __len__( self ): return len( self.__toklist )
00365     def __bool__(self): return len( self.__toklist ) > 0
00366     __nonzero__ = __bool__
00367     def __iter__( self ): return iter( self.__toklist )
00368     def __reversed__( self ): return iter( reversed(self.__toklist) )
00369     def keys( self ):
00370         """Returns all named result keys."""
00371         return self.__tokdict.keys()
00372 
00373     def pop( self, index=-1 ):
00374         """Removes and returns item at specified index (default=last).
00375            Will work with either numeric indices or dict-key indicies."""
00376         ret = self[index]
00377         del self[index]
00378         return ret
00379 
00380     def get(self, key, defaultValue=None):
00381         """Returns named result matching the given key, or if there is no
00382            such name, then returns the given defaultValue or None if no
00383            defaultValue is specified."""
00384         if key in self:
00385             return self[key]
00386         else:
00387             return defaultValue
00388 
00389     def insert( self, index, insStr ):
00390         self.__toklist.insert(index, insStr)
00391         # fixup indices in token dictionary
00392         for name in self.__tokdict:
00393             occurrences = self.__tokdict[name]
00394             for k, (value, position) in enumerate(occurrences):
00395                 occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
00396 
00397     def items( self ):
00398         """Returns all named result keys and values as a list of tuples."""
00399         return [(k,self[k]) for k in self.__tokdict]
00400 
00401     def values( self ):
00402         """Returns all named result values."""
00403         return [ v[-1][0] for v in self.__tokdict.values() ]
00404 
00405     def __getattr__( self, name ):
00406         if name not in self.__slots__:
00407             if name in self.__tokdict:
00408                 if name not in self.__accumNames:
00409                     return self.__tokdict[name][-1][0]
00410                 else:
00411                     return ParseResults([ v[0] for v in self.__tokdict[name] ])
00412             else:
00413                 return ""
00414         return None
00415 
00416     def __add__( self, other ):
00417         ret = self.copy()
00418         ret += other
00419         return ret
00420 
00421     def __iadd__( self, other ):
00422         if other.__tokdict:
00423             offset = len(self.__toklist)
00424             addoffset = ( lambda a: (a<0 and offset) or (a+offset) )
00425             otheritems = other.__tokdict.items()
00426             otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) )
00427                                 for (k,vlist) in otheritems for v in vlist]
00428             for k,v in otherdictitems:
00429                 self[k] = v
00430                 if isinstance(v[0],ParseResults):
00431                     v[0].__parent = wkref(self)
00432             
00433         self.__toklist += other.__toklist
00434         self.__accumNames.update( other.__accumNames )
00435         del other
00436         return self
00437 
00438     def __repr__( self ):
00439         return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
00440 
00441     def __str__( self ):
00442         out = "["
00443         sep = ""
00444         for i in self.__toklist:
00445             if isinstance(i, ParseResults):
00446                 out += sep + _ustr(i)
00447             else:
00448                 out += sep + repr(i)
00449             sep = ", "
00450         out += "]"
00451         return out
00452 
00453     def _asStringList( self, sep='' ):
00454         out = []
00455         for item in self.__toklist:
00456             if out and sep:
00457                 out.append(sep)
00458             if isinstance( item, ParseResults ):
00459                 out += item._asStringList()
00460             else:
00461                 out.append( _ustr(item) )
00462         return out
00463 
00464     def asList( self ):
00465         """Returns the parse results as a nested list of matching tokens, all converted to strings."""
00466         out = []
00467         for res in self.__toklist:
00468             if isinstance(res,ParseResults):
00469                 out.append( res.asList() )
00470             else:
00471                 out.append( res )
00472         return out
00473 
00474     def asDict( self ):
00475         """Returns the named parse results as dictionary."""
00476         return dict( self.items() )
00477 
00478     def copy( self ):
00479         """Returns a new copy of a ParseResults object."""
00480         ret = ParseResults( self.__toklist )
00481         ret.__tokdict = self.__tokdict.copy()
00482         ret.__parent = self.__parent
00483         ret.__accumNames.update( self.__accumNames )
00484         ret.__name = self.__name
00485         return ret
00486 
00487     def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
00488         """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names."""
00489         nl = "\n"
00490         out = []
00491         namedItems = dict( [ (v[1],k) for (k,vlist) in self.__tokdict.items()
00492                                                             for v in vlist ] )
00493         nextLevelIndent = indent + "  "
00494 
00495         # collapse out indents if formatting is not desired
00496         if not formatted:
00497             indent = ""
00498             nextLevelIndent = ""
00499             nl = ""
00500 
00501         selfTag = None
00502         if doctag is not None:
00503             selfTag = doctag
00504         else:
00505             if self.__name:
00506                 selfTag = self.__name
00507 
00508         if not selfTag:
00509             if namedItemsOnly:
00510                 return ""
00511             else:
00512                 selfTag = "ITEM"
00513 
00514         out += [ nl, indent, "<", selfTag, ">" ]
00515 
00516         worklist = self.__toklist
00517         for i,res in enumerate(worklist):
00518             if isinstance(res,ParseResults):
00519                 if i in namedItems:
00520                     out += [ res.asXML(namedItems[i],
00521                                         namedItemsOnly and doctag is None,
00522                                         nextLevelIndent,
00523                                         formatted)]
00524                 else:
00525                     out += [ res.asXML(None,
00526                                         namedItemsOnly and doctag is None,
00527                                         nextLevelIndent,
00528                                         formatted)]
00529             else:
00530                 # individual token, see if there is a name for it
00531                 resTag = None
00532                 if i in namedItems:
00533                     resTag = namedItems[i]
00534                 if not resTag:
00535                     if namedItemsOnly:
00536                         continue
00537                     else:
00538                         resTag = "ITEM"
00539                 xmlBodyText = _xml_escape(_ustr(res))
00540                 out += [ nl, nextLevelIndent, "<", resTag, ">",
00541                                                 xmlBodyText,
00542                                                 "</", resTag, ">" ]
00543 
00544         out += [ nl, indent, "</", selfTag, ">" ]
00545         return "".join(out)
00546 
00547     def __lookup(self,sub):
00548         for k,vlist in self.__tokdict.items():
00549             for v,loc in vlist:
00550                 if sub is v:
00551                     return k
00552         return None
00553 
00554     def getName(self):
00555         """Returns the results name for this token expression."""
00556         if self.__name:
00557             return self.__name
00558         elif self.__parent:
00559             par = self.__parent()
00560             if par:
00561                 return par.__lookup(self)
00562             else:
00563                 return None
00564         elif (len(self) == 1 and
00565                len(self.__tokdict) == 1 and
00566                self.__tokdict.values()[0][0][1] in (0,-1)):
00567             return self.__tokdict.keys()[0]
00568         else:
00569             return None
00570 
00571     def dump(self,indent='',depth=0):
00572         """Diagnostic method for listing out the contents of a ParseResults.
00573            Accepts an optional indent argument so that this string can be embedded
00574            in a nested display of other data."""
00575         out = []
00576         out.append( indent+_ustr(self.asList()) )
00577         keys = self.items()
00578         keys.sort()
00579         for k,v in keys:
00580             if out:
00581                 out.append('\n')
00582             out.append( "%s%s- %s: " % (indent,('  '*depth), k) )
00583             if isinstance(v,ParseResults):
00584                 if v.keys():
00585                     #~ out.append('\n')
00586                     out.append( v.dump(indent,depth+1) )
00587                     #~ out.append('\n')
00588                 else:
00589                     out.append(_ustr(v))
00590             else:
00591                 out.append(_ustr(v))
00592         #~ out.append('\n')
00593         return "".join(out)
00594 
00595     # add support for pickle protocol
00596     def __getstate__(self):
00597         return ( self.__toklist,
00598                  ( self.__tokdict.copy(),
00599                    self.__parent is not None and self.__parent() or None,
00600                    self.__accumNames,
00601                    self.__name ) )
00602 
00603     def __setstate__(self,state):
00604         self.__toklist = state[0]
00605         self.__tokdict, \
00606         par, \
00607         inAccumNames, \
00608         self.__name = state[1]
00609         self.__accumNames = {}
00610         self.__accumNames.update(inAccumNames)
00611         if par is not None:
00612             self.__parent = wkref(par)
00613         else:
00614             self.__parent = None
00615 
00616     def __dir__(self):
00617         return dir(super(ParseResults,self)) + self.keys()
00618 
00619 def col (loc,strg):
00620     """Returns current column within a string, counting newlines as line separators.
00621    The first column is number 1.
00622 
00623    Note: the default parsing behavior is to expand tabs in the input string
00624    before starting the parsing process.  See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
00625    on parsing strings containing <TAB>s, and suggested methods to maintain a
00626    consistent view of the parsed string, the parse location, and line and column
00627    positions within the parsed string.
00628    """
00629     return (loc<len(strg) and strg[loc] == '\n') and 1 or loc - strg.rfind("\n", 0, loc)
00630 
00631 def lineno(loc,strg):
00632     """Returns current line number within a string, counting newlines as line separators.
00633    The first line is number 1.
00634 
00635    Note: the default parsing behavior is to expand tabs in the input string
00636    before starting the parsing process.  See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
00637    on parsing strings containing <TAB>s, and suggested methods to maintain a
00638    consistent view of the parsed string, the parse location, and line and column
00639    positions within the parsed string.
00640    """
00641     return strg.count("\n",0,loc) + 1
00642 
00643 def line( loc, strg ):
00644     """Returns the line of text containing loc within a string, counting newlines as line separators.
00645        """
00646     lastCR = strg.rfind("\n", 0, loc)
00647     nextCR = strg.find("\n", loc)
00648     if nextCR > 0:
00649         return strg[lastCR+1:nextCR]
00650     else:
00651         return strg[lastCR+1:]
00652 
00653 def _defaultStartDebugAction( instring, loc, expr ):
00654     print ("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))
00655 
00656 def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
00657     print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
00658 
00659 def _defaultExceptionDebugAction( instring, loc, expr, exc ):
00660     print ("Exception raised:" + _ustr(exc))
00661 
00662 def nullDebugAction(*args):
00663     """'Do-nothing' debug action, to suppress debugging output during parsing."""
00664     pass
00665 
00666 class ParserElement(object):
00667     """Abstract base level parser element class."""
00668     DEFAULT_WHITE_CHARS = " \n\t\r"
00669 
00670     def setDefaultWhitespaceChars( chars ):
00671         """Overrides the default whitespace chars
00672         """
00673         ParserElement.DEFAULT_WHITE_CHARS = chars
00674     setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars)
00675 
00676     def __init__( self, savelist=False ):
00677         self.parseAction = list()
00678         self.failAction = None
00679         #~ self.name = "<unknown>"  # don't define self.name, let subclasses try/except upcall
00680         self.strRepr = None
00681         self.resultsName = None
00682         self.saveAsList = savelist
00683         self.skipWhitespace = True
00684         self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
00685         self.copyDefaultWhiteChars = True
00686         self.mayReturnEmpty = False # used when checking for left-recursion
00687         self.keepTabs = False
00688         self.ignoreExprs = list()
00689         self.debug = False
00690         self.streamlined = False
00691         self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index
00692         self.errmsg = ""
00693         self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all)
00694         self.debugActions = ( None, None, None ) #custom debug actions
00695         self.re = None
00696         self.callPreparse = True # used to avoid redundant calls to preParse
00697         self.callDuringTry = False
00698 
00699     def copy( self ):
00700         """Make a copy of this ParserElement.  Useful for defining different parse actions
00701            for the same parsing pattern, using copies of the original parse element."""
00702         cpy = copy.copy( self )
00703         cpy.parseAction = self.parseAction[:]
00704         cpy.ignoreExprs = self.ignoreExprs[:]
00705         if self.copyDefaultWhiteChars:
00706             cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
00707         return cpy
00708 
00709     def setName( self, name ):
00710         """Define name for this expression, for use in debugging."""
00711         self.name = name
00712         self.errmsg = "Expected " + self.name
00713         if hasattr(self,"exception"):
00714             self.exception.msg = self.errmsg
00715         return self
00716 
00717     def setResultsName( self, name, listAllMatches=False ):
00718         """Define name for referencing matching tokens as a nested attribute
00719            of the returned parse results.
00720            NOTE: this returns a *copy* of the original ParserElement object;
00721            this is so that the client can define a basic element, such as an
00722            integer, and reference it in multiple places with different names.
00723         """
00724         newself = self.copy()
00725         newself.resultsName = name
00726         newself.modalResults = not listAllMatches
00727         return newself
00728 
00729     def setBreak(self,breakFlag = True):
00730         """Method to invoke the Python pdb debugger when this element is
00731            about to be parsed. Set breakFlag to True to enable, False to
00732            disable.
00733         """
00734         if breakFlag:
00735             _parseMethod = self._parse
00736             def breaker(instring, loc, doActions=True, callPreParse=True):
00737                 import pdb
00738                 pdb.set_trace()
00739                 return _parseMethod( instring, loc, doActions, callPreParse )
00740             breaker._originalParseMethod = _parseMethod
00741             self._parse = breaker
00742         else:
00743             if hasattr(self._parse,"_originalParseMethod"):
00744                 self._parse = self._parse._originalParseMethod
00745         return self
00746 
00747     def _normalizeParseActionArgs( f ):
00748         """Internal method used to decorate parse actions that take fewer than 3 arguments,
00749            so that all parse actions can be called as f(s,l,t)."""
00750         STAR_ARGS = 4
00751 
00752         try:
00753             restore = None
00754             if isinstance(f,type):
00755                 restore = f
00756                 f = f.__init__
00757             if not _PY3K:
00758                 codeObj = f.func_code
00759             else:
00760                 codeObj = f.code
00761             if codeObj.co_flags & STAR_ARGS:
00762                 return f
00763             numargs = codeObj.co_argcount
00764             if not _PY3K:
00765                 if hasattr(f,"im_self"):
00766                     numargs -= 1
00767             else:
00768                 if hasattr(f,"__self__"):
00769                     numargs -= 1
00770             if restore:
00771                 f = restore
00772         except AttributeError:
00773             try:
00774                 if not _PY3K:
00775                     call_im_func_code = f.__call__.im_func.func_code
00776                 else:
00777                     call_im_func_code = f.__code__
00778 
00779                 # not a function, must be a callable object, get info from the
00780                 # im_func binding of its bound __call__ method
00781                 if call_im_func_code.co_flags & STAR_ARGS:
00782                     return f
00783                 numargs = call_im_func_code.co_argcount
00784                 if not _PY3K:
00785                     if hasattr(f.__call__,"im_self"):
00786                         numargs -= 1
00787                 else:
00788                     if hasattr(f.__call__,"__self__"):
00789                         numargs -= 0
00790             except AttributeError:
00791                 if not _PY3K:
00792                     call_func_code = f.__call__.func_code
00793                 else:
00794                     call_func_code = f.__call__.__code__
00795                 # not a bound method, get info directly from __call__ method
00796                 if call_func_code.co_flags & STAR_ARGS:
00797                     return f
00798                 numargs = call_func_code.co_argcount
00799                 if not _PY3K:
00800                     if hasattr(f.__call__,"im_self"):
00801                         numargs -= 1
00802                 else:
00803                     if hasattr(f.__call__,"__self__"):
00804                         numargs -= 1
00805 
00806 
00807         #~ print ("adding function %s with %d args" % (f.func_name,numargs))
00808         if numargs == 3:
00809             return f
00810         else:
00811             if numargs > 3:
00812                 def tmp(s,l,t):
00813                     return f(f.__call__.__self__, s,l,t)
00814             if numargs == 2:
00815                 def tmp(s,l,t):
00816                     return f(l,t)
00817             elif numargs == 1:
00818                 def tmp(s,l,t):
00819                     return f(t)
00820             else: #~ numargs == 0:
00821                 def tmp(s,l,t):
00822                     return f()
00823             try:
00824                 tmp.__name__ = f.__name__
00825             except (AttributeError,TypeError):
00826                 # no need for special handling if attribute doesnt exist
00827                 pass
00828             try:
00829                 tmp.__doc__ = f.__doc__
00830             except (AttributeError,TypeError):
00831                 # no need for special handling if attribute doesnt exist
00832                 pass
00833             try:
00834                 tmp.__dict__.update(f.__dict__)
00835             except (AttributeError,TypeError):
00836                 # no need for special handling if attribute doesnt exist
00837                 pass
00838             return tmp
00839     _normalizeParseActionArgs = staticmethod(_normalizeParseActionArgs)
00840 
00841     def setParseAction( self, *fns, **kwargs ):
00842         """Define action to perform when successfully matching parse element definition.
00843            Parse action fn is a callable method with 0-3 arguments, called as fn(s,loc,toks),
00844            fn(loc,toks), fn(toks), or just fn(), where:
00845             - s   = the original string being parsed (see note below)
00846             - loc = the location of the matching substring
00847             - toks = a list of the matched tokens, packaged as a ParseResults object
00848            If the functions in fns modify the tokens, they can return them as the return
00849            value from fn, and the modified list of tokens will replace the original.
00850            Otherwise, fn does not need to return any value.
00851 
00852            Note: the default parsing behavior is to expand tabs in the input string
00853            before starting the parsing process.  See L{I{parseString}<parseString>} for more information
00854            on parsing strings containing <TAB>s, and suggested methods to maintain a
00855            consistent view of the parsed string, the parse location, and line and column
00856            positions within the parsed string.
00857            """
00858         self.parseAction = list(map(self._normalizeParseActionArgs, list(fns)))
00859         self.callDuringTry = ("callDuringTry" in kwargs and kwargs["callDuringTry"])
00860         return self
00861 
00862     def addParseAction( self, *fns, **kwargs ):
00863         """Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}."""
00864         self.parseAction += list(map(self._normalizeParseActionArgs, list(fns)))
00865         self.callDuringTry = self.callDuringTry or ("callDuringTry" in kwargs and kwargs["callDuringTry"])
00866         return self
00867 
00868     def setFailAction( self, fn ):
00869         """Define action to perform if parsing fails at this expression.
00870            Fail acton fn is a callable function that takes the arguments
00871            fn(s,loc,expr,err) where:
00872             - s = string being parsed
00873             - loc = location where expression match was attempted and failed
00874             - expr = the parse expression that failed
00875             - err = the exception thrown
00876            The function returns no value.  It may throw ParseFatalException
00877            if it is desired to stop parsing immediately."""
00878         self.failAction = fn
00879         return self
00880 
00881     def _skipIgnorables( self, instring, loc ):
00882         exprsFound = True
00883         while exprsFound:
00884             exprsFound = False
00885             for e in self.ignoreExprs:
00886                 try:
00887                     while 1:
00888                         loc,dummy = e._parse( instring, loc )
00889                         exprsFound = True
00890                 except ParseException:
00891                     pass
00892         return loc
00893 
00894     def preParse( self, instring, loc ):
00895         if self.ignoreExprs:
00896             loc = self._skipIgnorables( instring, loc )
00897 
00898         if self.skipWhitespace:
00899             wt = self.whiteChars
00900             instrlen = len(instring)
00901             while loc < instrlen and instring[loc] in wt:
00902                 loc += 1
00903 
00904         return loc
00905 
00906     def parseImpl( self, instring, loc, doActions=True ):
00907         return loc, []
00908 
00909     def postParse( self, instring, loc, tokenlist ):
00910         return tokenlist
00911 
00912     #~ @profile
00913     def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
00914         debugging = ( self.debug ) #and doActions )
00915 
00916         if debugging or self.failAction:
00917             #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))
00918             if (self.debugActions[0] ):
00919                 self.debugActions[0]( instring, loc, self )
00920             if callPreParse and self.callPreparse:
00921                 preloc = self.preParse( instring, loc )
00922             else:
00923                 preloc = loc
00924             tokensStart = loc
00925             try:
00926                 try:
00927                     loc,tokens = self.parseImpl( instring, preloc, doActions )
00928                 except IndexError:
00929                     raise ParseException( instring, len(instring), self.errmsg, self )
00930             except ParseBaseException, err:
00931                 #~ print ("Exception raised:", err)
00932                 if self.debugActions[2]:
00933                     self.debugActions[2]( instring, tokensStart, self, err )
00934                 if self.failAction:
00935                     self.failAction( instring, tokensStart, self, err )
00936                 raise
00937         else:
00938             if callPreParse and self.callPreparse:
00939                 preloc = self.preParse( instring, loc )
00940             else:
00941                 preloc = loc
00942             tokensStart = loc
00943             if self.mayIndexError or loc >= len(instring):
00944                 try:
00945                     loc,tokens = self.parseImpl( instring, preloc, doActions )
00946                 except IndexError:
00947                     raise ParseException( instring, len(instring), self.errmsg, self )
00948             else:
00949                 loc,tokens = self.parseImpl( instring, preloc, doActions )
00950 
00951         tokens = self.postParse( instring, loc, tokens )
00952 
00953         retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults )
00954         if self.parseAction and (doActions or self.callDuringTry):
00955             if debugging:
00956                 try:
00957                     for fn in self.parseAction:
00958                         tokens = fn( instring, tokensStart, retTokens )
00959                         if tokens is not None:
00960                             retTokens = ParseResults( tokens,
00961                                                       self.resultsName,
00962                                                       asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
00963                                                       modal=self.modalResults )
00964                 except ParseBaseException, err:
00965                     #~ print "Exception raised in user parse action:", err
00966                     if (self.debugActions[2] ):
00967                         self.debugActions[2]( instring, tokensStart, self, err )
00968                     raise
00969             else:
00970                 for fn in self.parseAction:
00971                     tokens = fn( instring, tokensStart, retTokens )
00972                     if tokens is not None:
00973                         retTokens = ParseResults( tokens,
00974                                                   self.resultsName,
00975                                                   asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
00976                                                   modal=self.modalResults )
00977 
00978         if debugging:
00979             #~ print ("Matched",self,"->",retTokens.asList())
00980             if (self.debugActions[1] ):
00981                 self.debugActions[1]( instring, tokensStart, loc, self, retTokens )
00982 
00983         return loc, retTokens
00984 
00985     def tryParse( self, instring, loc ):
00986         try:
00987             return self._parse( instring, loc, doActions=False )[0]
00988         except ParseFatalException:
00989             raise ParseException( instring, loc, self.errmsg, self)
00990 
00991     # this method gets repeatedly called during backtracking with the same arguments -
00992     # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
00993     def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
00994         lookup = (self,instring,loc,callPreParse,doActions)
00995         if lookup in ParserElement._exprArgCache:
00996             value = ParserElement._exprArgCache[ lookup ]
00997             if isinstance(value,Exception):
00998                 raise value
00999             return value
01000         else:
01001             try:
01002                 value = self._parseNoCache( instring, loc, doActions, callPreParse )
01003                 ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy())
01004                 return value
01005             except ParseBaseException, pe:
01006                 ParserElement._exprArgCache[ lookup ] = pe
01007                 raise
01008 
01009     _parse = _parseNoCache
01010 
01011     # argument cache for optimizing repeated calls when backtracking through recursive expressions
01012     _exprArgCache = {}
01013     def resetCache():
01014         ParserElement._exprArgCache.clear()
01015     resetCache = staticmethod(resetCache)
01016 
01017     _packratEnabled = False
01018     def enablePackrat():
01019         """Enables "packrat" parsing, which adds memoizing to the parsing logic.
01020            Repeated parse attempts at the same string location (which happens
01021            often in many complex grammars) can immediately return a cached value,
01022            instead of re-executing parsing/validating code.  Memoizing is done of
01023            both valid results and parsing exceptions.
01024 
01025            This speedup may break existing programs that use parse actions that
01026            have side-effects.  For this reason, packrat parsing is disabled when
01027            you first import pyparsing.  To activate the packrat feature, your
01028            program must call the class method ParserElement.enablePackrat().  If
01029            your program uses psyco to "compile as you go", you must call
01030            enablePackrat before calling psyco.full().  If you do not do this,
01031            Python will crash.  For best results, call enablePackrat() immediately
01032            after importing pyparsing.
01033         """
01034         if not ParserElement._packratEnabled:
01035             ParserElement._packratEnabled = True
01036             ParserElement._parse = ParserElement._parseCache
01037     enablePackrat = staticmethod(enablePackrat)
01038 
01039     def parseString( self, instring, parseAll=False ):
01040         """Execute the parse expression with the given string.
01041            This is the main interface to the client code, once the complete
01042            expression has been built.
01043 
01044            If you want the grammar to require that the entire input string be
01045            successfully parsed, then set parseAll to True (equivalent to ending
01046            the grammar with StringEnd()).
01047 
01048            Note: parseString implicitly calls expandtabs() on the input string,
01049            in order to report proper column numbers in parse actions.
01050            If the input string contains tabs and
01051            the grammar uses parse actions that use the loc argument to index into the
01052            string being parsed, you can ensure you have a consistent view of the input
01053            string by:
01054             - calling parseWithTabs on your grammar before calling parseString
01055               (see L{I{parseWithTabs}<parseWithTabs>})
01056             - define your parse action using the full (s,loc,toks) signature, and
01057               reference the input string using the parse action's s argument
01058             - explictly expand the tabs in your input string before calling
01059               parseString
01060         """
01061         ParserElement.resetCache()
01062         if not self.streamlined:
01063             self.streamline()
01064             #~ self.saveAsList = True
01065         for e in self.ignoreExprs:
01066             e.streamline()
01067         if not self.keepTabs:
01068             instring = instring.expandtabs()
01069         try:
01070             loc, tokens = self._parse( instring, 0 )
01071             if parseAll:
01072                 loc = self.preParse( instring, loc )
01073                 StringEnd()._parse( instring, loc )
01074         except ParseBaseException, exc:
01075             # catch and re-raise exception from here, clears out pyparsing internal stack trace
01076             raise exc
01077         else:
01078             return tokens
01079 
01080     def scanString( self, instring, maxMatches=_MAX_INT ):
01081         """Scan the input string for expression matches.  Each match will return the
01082            matching tokens, start location, and end location.  May be called with optional
01083            maxMatches argument, to clip scanning after 'n' matches are found.
01084 
01085            Note that the start and end locations are reported relative to the string
01086            being parsed.  See L{I{parseString}<parseString>} for more information on parsing
01087            strings with embedded tabs."""
01088         if not self.streamlined:
01089             self.streamline()
01090         for e in self.ignoreExprs:
01091             e.streamline()
01092 
01093         if not self.keepTabs:
01094             instring = _ustr(instring).expandtabs()
01095         instrlen = len(instring)
01096         loc = 0
01097         preparseFn = self.preParse
01098         parseFn = self._parse
01099         ParserElement.resetCache()
01100         matches = 0
01101         try:
01102             while loc <= instrlen and matches < maxMatches:
01103                 try:
01104                     preloc = preparseFn( instring, loc )
01105                     nextLoc,tokens = parseFn( instring, preloc, callPreParse=False )
01106                 except ParseException:
01107                     loc = preloc+1
01108                 else:
01109                     matches += 1
01110                     yield tokens, preloc, nextLoc
01111                     loc = nextLoc
01112         except ParseBaseException, pe:
01113             raise pe
01114 
01115     def transformString( self, instring ):
01116         """Extension to scanString, to modify matching text with modified tokens that may
01117            be returned from a parse action.  To use transformString, define a grammar and
01118            attach a parse action to it that modifies the returned token list.
01119            Invoking transformString() on a target string will then scan for matches,
01120            and replace the matched text patterns according to the logic in the parse
01121            action.  transformString() returns the resulting transformed string."""
01122         out = []
01123         lastE = 0
01124         # force preservation of <TAB>s, to minimize unwanted transformation of string, and to
01125         # keep string locs straight between transformString and scanString
01126         self.keepTabs = True
01127         try:
01128             for t,s,e in self.scanString( instring ):
01129                 out.append( instring[lastE:s] )
01130                 if t:
01131                     if isinstance(t,ParseResults):
01132                         out += t.asList()
01133                     elif isinstance(t,list):
01134                         out += t
01135                     else:
01136                         out.append(t)
01137                 lastE = e
01138             out.append(instring[lastE:])
01139             return "".join(map(_ustr,out))
01140         except ParseBaseException, pe:
01141             raise pe
01142 
01143     def searchString( self, instring, maxMatches=_MAX_INT ):
01144         """Another extension to scanString, simplifying the access to the tokens found
01145            to match the given parse expression.  May be called with optional
01146            maxMatches argument, to clip searching after 'n' matches are found.
01147         """
01148         try:
01149             return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])
01150         except ParseBaseException, pe:
01151             raise pe
01152 
01153     def __add__(self, other ):
01154         """Implementation of + operator - returns And"""
01155         if isinstance( other, basestring ):
01156             other = Literal( other )
01157         if not isinstance( other, ParserElement ):
01158             warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
01159                     SyntaxWarning, stacklevel=2)
01160             return None
01161         return And( [ self, other ] )
01162 
01163     def __radd__(self, other ):
01164         """Implementation of + operator when left operand is not a ParserElement"""
01165         if isinstance( other, basestring ):
01166             other = Literal( other )
01167         if not isinstance( other, ParserElement ):
01168             warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
01169                     SyntaxWarning, stacklevel=2)
01170             return None
01171         return other + self
01172 
01173     def __sub__(self, other):
01174         """Implementation of - operator, returns And with error stop"""
01175         if isinstance( other, basestring ):
01176             other = Literal( other )
01177         if not isinstance( other, ParserElement ):
01178             warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
01179                     SyntaxWarning, stacklevel=2)
01180             return None
01181         return And( [ self, And._ErrorStop(), other ] )
01182 
01183     def __rsub__(self, other ):
01184         """Implementation of - operator when left operand is not a ParserElement"""
01185         if isinstance( other, basestring ):
01186             other = Literal( other )
01187         if not isinstance( other, ParserElement ):
01188             warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
01189                     SyntaxWarning, stacklevel=2)
01190             return None
01191         return other - self
01192 
01193     def __mul__(self,other):
01194         if isinstance(other,int):
01195             minElements, optElements = other,0
01196         elif isinstance(other,tuple):
01197             other = (other + (None, None))[:2]
01198             if other[0] is None:
01199                 other = (0, other[1])
01200             if isinstance(other[0],int) and other[1] is None:
01201                 if other[0] == 0:
01202                     return ZeroOrMore(self)
01203                 if other[0] == 1:
01204                     return OneOrMore(self)
01205                 else:
01206                     return self*other[0] + ZeroOrMore(self)
01207             elif isinstance(other[0],int) and isinstance(other[1],int):
01208                 minElements, optElements = other
01209                 optElements -= minElements
01210             else:
01211                 raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1]))
01212         else:
01213             raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))
01214 
01215         if minElements < 0:
01216             raise ValueError("cannot multiply ParserElement by negative value")
01217         if optElements < 0:
01218             raise ValueError("second tuple value must be greater or equal to first tuple value")
01219         if minElements == optElements == 0:
01220             raise ValueError("cannot multiply ParserElement by 0 or (0,0)")
01221 
01222         if (optElements):
01223             def makeOptionalList(n):
01224                 if n>1:
01225                     return Optional(self + makeOptionalList(n-1))
01226                 else:
01227                     return Optional(self)
01228             if minElements:
01229                 if minElements == 1:
01230                     ret = self + makeOptionalList(optElements)
01231                 else:
01232                     ret = And([self]*minElements) + makeOptionalList(optElements)
01233             else:
01234                 ret = makeOptionalList(optElements)
01235         else:
01236             if minElements == 1:
01237                 ret = self
01238             else:
01239                 ret = And([self]*minElements)
01240         return ret
01241 
01242     def __rmul__(self, other):
01243         return self.__mul__(other)
01244 
01245     def __or__(self, other ):
01246         """Implementation of | operator - returns MatchFirst"""
01247         if isinstance( other, basestring ):
01248             other = Literal( other )
01249         if not isinstance( other, ParserElement ):
01250             warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
01251                     SyntaxWarning, stacklevel=2)
01252             return None
01253         return MatchFirst( [ self, other ] )
01254 
01255     def __ror__(self, other ):
01256         """Implementation of | operator when left operand is not a ParserElement"""
01257         if isinstance( other, basestring ):
01258             other = Literal( other )
01259         if not isinstance( other, ParserElement ):
01260             warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
01261                     SyntaxWarning, stacklevel=2)
01262             return None
01263         return other | self
01264 
01265     def __xor__(self, other ):
01266         """Implementation of ^ operator - returns Or"""
01267         if isinstance( other, basestring ):
01268             other = Literal( other )
01269         if not isinstance( other, ParserElement ):
01270             warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
01271                     SyntaxWarning, stacklevel=2)
01272             return None
01273         return Or( [ self, other ] )
01274 
01275     def __rxor__(self, other ):
01276         """Implementation of ^ operator when left operand is not a ParserElement"""
01277         if isinstance( other, basestring ):
01278             other = Literal( other )
01279         if not isinstance( other, ParserElement ):
01280             warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
01281                     SyntaxWarning, stacklevel=2)
01282             return None
01283         return other ^ self
01284 
01285     def __and__(self, other ):
01286         """Implementation of & operator - returns Each"""
01287         if isinstance( other, basestring ):
01288             other = Literal( other )
01289         if not isinstance( other, ParserElement ):
01290             warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
01291                     SyntaxWarning, stacklevel=2)
01292             return None
01293         return Each( [ self, other ] )
01294 
01295     def __rand__(self, other ):
01296         """Implementation of & operator when left operand is not a ParserElement"""
01297         if isinstance( other, basestring ):
01298             other = Literal( other )
01299         if not isinstance( other, ParserElement ):
01300             warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
01301                     SyntaxWarning, stacklevel=2)
01302             return None
01303         return other & self
01304 
01305     def __invert__( self ):
01306         """Implementation of ~ operator - returns NotAny"""
01307         return NotAny( self )
01308 
01309     def __call__(self, name):
01310         """Shortcut for setResultsName, with listAllMatches=default::
01311              userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")
01312            could be written as::
01313              userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")
01314            """
01315         return self.setResultsName(name)
01316 
01317     def suppress( self ):
01318         """Suppresses the output of this ParserElement; useful to keep punctuation from
01319            cluttering up returned output.
01320         """
01321         return Suppress( self )
01322 
01323     def leaveWhitespace( self ):
01324         """Disables the skipping of whitespace before matching the characters in the
01325            ParserElement's defined pattern.  This is normally only used internally by
01326            the pyparsing module, but may be needed in some whitespace-sensitive grammars.
01327         """
01328         self.skipWhitespace = False
01329         return self
01330 
01331     def setWhitespaceChars( self, chars ):
01332         """Overrides the default whitespace chars
01333         """
01334         self.skipWhitespace = True
01335         self.whiteChars = chars
01336         self.copyDefaultWhiteChars = False
01337         return self
01338 
01339     def parseWithTabs( self ):
01340         """Overrides default behavior to expand <TAB>s to spaces before parsing the input string.
01341            Must be called before parseString when the input grammar contains elements that
01342            match <TAB> characters."""
01343         self.keepTabs = True
01344         return self
01345 
01346     def ignore( self, other ):
01347         """Define expression to be ignored (e.g., comments) while doing pattern
01348            matching; may be called repeatedly, to define multiple comment or other
01349            ignorable patterns.
01350         """
01351         if isinstance( other, Suppress ):
01352             if other not in self.ignoreExprs:
01353                 self.ignoreExprs.append( other )
01354         else:
01355             self.ignoreExprs.append( Suppress( other ) )
01356         return self
01357 
01358     def setDebugActions( self, startAction, successAction, exceptionAction ):
01359         """Enable display of debugging messages while doing pattern matching."""
01360         self.debugActions = (startAction or _defaultStartDebugAction,
01361                              successAction or _defaultSuccessDebugAction,
01362                              exceptionAction or _defaultExceptionDebugAction)
01363         self.debug = True
01364         return self
01365 
01366     def setDebug( self, flag=True ):
01367         """Enable display of debugging messages while doing pattern matching.
01368            Set flag to True to enable, False to disable."""
01369         if flag:
01370             self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction )
01371         else:
01372             self.debug = False
01373         return self
01374 
01375     def __str__( self ):
01376         return self.name
01377 
01378     def __repr__( self ):
01379         return _ustr(self)
01380 
01381     def streamline( self ):
01382         self.streamlined = True
01383         self.strRepr = None
01384         return self
01385 
01386     def checkRecursion( self, parseElementList ):
01387         pass
01388 
01389     def validate( self, validateTrace=[] ):
01390         """Check defined expressions for valid structure, check for infinite recursive definitions."""
01391         self.checkRecursion( [] )
01392 
01393     def parseFile( self, file_or_filename, parseAll=False ):
01394         """Execute the parse expression on the given file or filename.
01395            If a filename is specified (instead of a file object),
01396            the entire file is opened, read, and closed before parsing.
01397         """
01398         try:
01399             file_contents = file_or_filename.read()
01400         except AttributeError:
01401             f = open(file_or_filename, "rb")
01402             file_contents = f.read()
01403             f.close()
01404         try:
01405             return self.parseString(file_contents, parseAll)
01406         except ParseBaseException, exc:
01407             # catch and re-raise exception from here, clears out pyparsing internal stack trace
01408             raise exc
01409 
01410     def getException(self):
01411         return ParseException("",0,self.errmsg,self)
01412 
01413     def __getattr__(self,aname):
01414         if aname == "myException":
01415             self.myException = ret = self.getException();
01416             return ret;
01417         else:
01418             raise AttributeError("no such attribute " + aname)
01419 
01420     def __eq__(self,other):
01421         if isinstance(other, ParserElement):
01422             return self is other or self.__dict__ == other.__dict__
01423         elif isinstance(other, basestring):
01424             try:
01425                 self.parseString(_ustr(other), parseAll=True)
01426                 return True
01427             except ParseBaseException:
01428                 return False
01429         else:
01430             return super(ParserElement,self)==other
01431 
01432     def __ne__(self,other):
01433         return not (self == other)
01434 
01435     def __hash__(self):
01436         return hash(id(self))
01437 
01438     def __req__(self,other):
01439         return self == other
01440 
01441     def __rne__(self,other):
01442         return not (self == other)
01443 
01444 
01445 class Token(ParserElement):
01446     """Abstract ParserElement subclass, for defining atomic matching patterns."""
01447     def __init__( self ):
01448         super(Token,self).__init__( savelist=False )
01449         #self.myException = ParseException("",0,"",self)
01450 
01451     def setName(self, name):
01452         s = super(Token,self).setName(name)
01453         self.errmsg = "Expected " + self.name
01454         #s.myException.msg = self.errmsg
01455         return s
01456 
01457 
01458 class Empty(Token):
01459     """An empty token, will always match."""
01460     def __init__( self ):
01461         super(Empty,self).__init__()
01462         self.name = "Empty"
01463         self.mayReturnEmpty = True
01464         self.mayIndexError = False
01465 
01466 
01467 class NoMatch(Token):
01468     """A token that will never match."""
01469     def __init__( self ):
01470         super(NoMatch,self).__init__()
01471         self.name = "NoMatch"
01472         self.mayReturnEmpty = True
01473         self.mayIndexError = False
01474         self.errmsg = "Unmatchable token"
01475         #self.myException.msg = self.errmsg
01476 
01477     def parseImpl( self, instring, loc, doActions=True ):
01478         exc = self.myException
01479         exc.loc = loc
01480         exc.pstr = instring
01481         raise exc
01482 
01483 
01484 class Literal(Token):
01485     """Token to exactly match a specified string."""
01486     def __init__( self, matchString ):
01487         super(Literal,self).__init__()
01488         self.match = matchString
01489         self.matchLen = len(matchString)
01490         try:
01491             self.firstMatchChar = matchString[0]
01492         except IndexError:
01493             warnings.warn("null string passed to Literal; use Empty() instead",
01494                             SyntaxWarning, stacklevel=2)
01495             self.__class__ = Empty
01496         self.name = '"%s"' % _ustr(self.match)
01497         self.errmsg = "Expected " + self.name
01498         self.mayReturnEmpty = False
01499         #self.myException.msg = self.errmsg
01500         self.mayIndexError = False
01501 
01502     # Performance tuning: this routine gets called a *lot*
01503     # if this is a single character match string  and the first character matches,
01504     # short-circuit as quickly as possible, and avoid calling startswith
01505     #~ @profile
01506     def parseImpl( self, instring, loc, doActions=True ):
01507         if (instring[loc] == self.firstMatchChar and
01508             (self.matchLen==1 or instring.startswith(self.match,loc)) ):
01509             return loc+self.matchLen, self.match
01510         #~ raise ParseException( instring, loc, self.errmsg )
01511         exc = self.myException
01512         exc.loc = loc
01513         exc.pstr = instring
01514         raise exc
01515 _L = Literal
01516 
01517 class Keyword(Token):
01518     """Token to exactly match a specified string as a keyword, that is, it must be
01519        immediately followed by a non-keyword character.  Compare with Literal::
01520          Literal("if") will match the leading 'if' in 'ifAndOnlyIf'.
01521          Keyword("if") will not; it will only match the leading 'if in 'if x=1', or 'if(y==2)'
01522        Accepts two optional constructor arguments in addition to the keyword string:
01523        identChars is a string of characters that would be valid identifier characters,
01524        defaulting to all alphanumerics + "_" and "$"; caseless allows case-insensitive
01525        matching, default is False.
01526     """
01527     DEFAULT_KEYWORD_CHARS = alphanums+"_$"
01528 
01529     def __init__( self, matchString, identChars=DEFAULT_KEYWORD_CHARS, caseless=False ):
01530         super(Keyword,self).__init__()
01531         self.match = matchString
01532         self.matchLen = len(matchString)
01533         try:
01534             self.firstMatchChar = matchString[0]
01535         except IndexError:
01536             warnings.warn("null string passed to Keyword; use Empty() instead",
01537                             SyntaxWarning, stacklevel=2)
01538         self.name = '"%s"' % self.match
01539         self.errmsg = "Expected " + self.name
01540         self.mayReturnEmpty = False
01541         #self.myException.msg = self.errmsg
01542         self.mayIndexError = False
01543         self.caseless = caseless
01544         if caseless:
01545             self.caselessmatch = matchString.upper()
01546             identChars = identChars.upper()
01547         self.identChars = _str2dict(identChars)
01548 
01549     def parseImpl( self, instring, loc, doActions=True ):
01550         if self.caseless:
01551             if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
01552                  (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and
01553                  (loc == 0 or instring[loc-1].upper() not in self.identChars) ):
01554                 return loc+self.matchLen, self.match
01555         else:
01556             if (instring[loc] == self.firstMatchChar and
01557                 (self.matchLen==1 or instring.startswith(self.match,loc)) and
01558                 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and
01559                 (loc == 0 or instring[loc-1] not in self.identChars) ):
01560                 return loc+self.matchLen, self.match
01561         #~ raise ParseException( instring, loc, self.errmsg )
01562         exc = self.myException
01563         exc.loc = loc
01564         exc.pstr = instring
01565         raise exc
01566 
01567     def copy(self):
01568         c = super(Keyword,self).copy()
01569         c.identChars = Keyword.DEFAULT_KEYWORD_CHARS
01570         return c
01571 
01572     def setDefaultKeywordChars( chars ):
01573         """Overrides the default Keyword chars
01574         """
01575         Keyword.DEFAULT_KEYWORD_CHARS = chars
01576     setDefaultKeywordChars = staticmethod(setDefaultKeywordChars)
01577 
01578 class CaselessLiteral(Literal):
01579     """Token to match a specified string, ignoring case of letters.
01580        Note: the matched results will always be in the case of the given
01581        match string, NOT the case of the input text.
01582     """
01583     def __init__( self, matchString ):
01584         super(CaselessLiteral,self).__init__( matchString.upper() )
01585         # Preserve the defining literal.
01586         self.returnString = matchString
01587         self.name = "'%s'" % self.returnString
01588         self.errmsg = "Expected " + self.name
01589         #self.myException.msg = self.errmsg
01590 
01591     def parseImpl( self, instring, loc, doActions=True ):
01592         if instring[ loc:loc+self.matchLen ].upper() == self.match:
01593             return loc+self.matchLen, self.returnString
01594         #~ raise ParseException( instring, loc, self.errmsg )
01595         exc = self.myException
01596         exc.loc = loc
01597         exc.pstr = instring
01598         raise exc
01599 
01600 class CaselessKeyword(Keyword):
01601     def __init__( self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS ):
01602         super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )
01603 
01604     def parseImpl( self, instring, loc, doActions=True ):
01605         if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
01606              (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ):
01607             return loc+self.matchLen, self.match
01608         #~ raise ParseException( instring, loc, self.errmsg )
01609         exc = self.myException
01610         exc.loc = loc
01611         exc.pstr = instring
01612         raise exc
01613 
01614 class Word(Token):
01615     """Token for matching words composed of allowed character sets.
01616        Defined with string containing all allowed initial characters,
01617        an optional string containing allowed body characters (if omitted,
01618        defaults to the initial character set), and an optional minimum,
01619        maximum, and/or exact length.  The default value for min is 1 (a
01620        minimum value < 1 is not valid); the default values for max and exact
01621        are 0, meaning no maximum or exact length restriction.
01622     """
01623     def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False ):
01624         super(Word,self).__init__()
01625         self.initCharsOrig = initChars
01626         self.initChars = _str2dict(initChars)
01627         if bodyChars :
01628             self.bodyCharsOrig = bodyChars
01629             self.bodyChars = _str2dict(bodyChars)
01630         else:
01631             self.bodyCharsOrig = initChars
01632             self.bodyChars = _str2dict(initChars)
01633 
01634         self.maxSpecified = max > 0
01635 
01636         if min < 1:
01637             raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted")
01638 
01639         self.minLen = min
01640 
01641         if max > 0:
01642             self.maxLen = max
01643         else:
01644             self.maxLen = _MAX_INT
01645 
01646         if exact > 0:
01647             self.maxLen = exact
01648             self.minLen = exact
01649 
01650         self.name = _ustr(self)
01651         self.errmsg = "Expected " + self.name
01652         #self.myException.msg = self.errmsg
01653         self.mayIndexError = False
01654         self.asKeyword = asKeyword
01655 
01656         if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0):
01657             if self.bodyCharsOrig == self.initCharsOrig:
01658                 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)
01659             elif len(self.bodyCharsOrig) == 1:
01660                 self.reString = "%s[%s]*" % \
01661                                       (re.escape(self.initCharsOrig),
01662                                       _escapeRegexRangeChars(self.bodyCharsOrig),)
01663             else:
01664                 self.reString = "[%s][%s]*" % \
01665                                       (_escapeRegexRangeChars(self.initCharsOrig),
01666                                       _escapeRegexRangeChars(self.bodyCharsOrig),)
01667             if self.asKeyword:
01668                 self.reString = r"\b"+self.reString+r"\b"
01669             try:
01670                 self.re = re.compile( self.reString )
01671             except:
01672                 self.re = None
01673 
01674     def parseImpl( self, instring, loc, doActions=True ):
01675         if self.re:
01676             result = self.re.match(instring,loc)
01677             if not result:
01678                 exc = self.myException
01679                 exc.loc = loc
01680                 exc.pstr = instring
01681                 raise exc
01682 
01683             loc = result.end()
01684             return loc,result.group()
01685 
01686         if not(instring[ loc ] in self.initChars):
01687             #~ raise ParseException( instring, loc, self.errmsg )
01688             exc = self.myException
01689             exc.loc = loc
01690             exc.pstr = instring
01691             raise exc
01692         start = loc
01693         loc += 1
01694         instrlen = len(instring)
01695         bodychars = self.bodyChars
01696         maxloc = start + self.maxLen
01697         maxloc = min( maxloc, instrlen )
01698         while loc < maxloc and instring[loc] in bodychars:
01699             loc += 1
01700 
01701         throwException = False
01702         if loc - start < self.minLen:
01703             throwException = True
01704         if self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
01705             throwException = True
01706         if self.asKeyword:
01707             if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars):
01708                 throwException = True
01709 
01710         if throwException:
01711             #~ raise ParseException( instring, loc, self.errmsg )
01712             exc = self.myException
01713             exc.loc = loc
01714             exc.pstr = instring
01715             raise exc
01716 
01717         return loc, instring[start:loc]
01718 
01719     def __str__( self ):
01720         try:
01721             return super(Word,self).__str__()
01722         except:
01723             pass
01724 
01725 
01726         if self.strRepr is None:
01727 
01728             def charsAsStr(s):
01729                 if len(s)>4:
01730                     return s[:4]+"..."
01731                 else:
01732                     return s
01733 
01734             if ( self.initCharsOrig != self.bodyCharsOrig ):
01735                 self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) )
01736             else:
01737                 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
01738 
01739         return self.strRepr
01740 
01741 
01742 class Regex(Token):
01743     """Token for matching strings that match a given regular expression.
01744        Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module.
01745     """
01746     def __init__( self, pattern, flags=0):
01747         """The parameters pattern and flags are passed to the re.compile() function as-is. See the Python re module for an explanation of the acceptable patterns and flags."""
01748         super(Regex,self).__init__()
01749 
01750         if len(pattern) == 0:
01751             warnings.warn("null string passed to Regex; use Empty() instead",
01752                     SyntaxWarning, stacklevel=2)
01753 
01754         self.pattern = pattern
01755         self.flags = flags
01756 
01757         try:
01758             self.re = re.compile(self.pattern, self.flags)
01759             self.reString = self.pattern
01760         except sre_constants.error:
01761             warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
01762                 SyntaxWarning, stacklevel=2)
01763             raise
01764 
01765         self.name = _ustr(self)
01766         self.errmsg = "Expected " + self.name
01767         #self.myException.msg = self.errmsg
01768         self.mayIndexError = False
01769         self.mayReturnEmpty = True
01770 
01771     def parseImpl( self, instring, loc, doActions=True ):
01772         result = self.re.match(instring,loc)
01773         if not result:
01774             exc = self.myException
01775             exc.loc = loc
01776             exc.pstr = instring
01777             raise exc
01778 
01779         loc = result.end()
01780         d = result.groupdict()
01781         ret = ParseResults(result.group())
01782         if d:
01783             for k in d:
01784                 ret[k] = d[k]
01785         return loc,ret
01786 
01787     def __str__( self ):
01788         try:
01789             return super(Regex,self).__str__()
01790         except:
01791             pass
01792 
01793         if self.strRepr is None:
01794             self.strRepr = "Re:(%s)" % repr(self.pattern)
01795 
01796         return self.strRepr
01797 
01798 
01799 class QuotedString(Token):
01800     """Token for matching strings that are delimited by quoting characters.
01801     """
01802     def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None):
01803         """
01804            Defined with the following parameters:
01805             - quoteChar - string of one or more characters defining the quote delimiting string
01806             - escChar - character to escape quotes, typically backslash (default=None)
01807             - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None)
01808             - multiline - boolean indicating whether quotes can span multiple lines (default=False)
01809             - unquoteResults - boolean indicating whether the matched text should be unquoted (default=True)
01810             - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=None => same as quoteChar)
01811         """
01812         super(QuotedString,self).__init__()
01813 
01814         # remove white space from quote chars - wont work anyway
01815         quoteChar = quoteChar.strip()
01816         if len(quoteChar) == 0:
01817             warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
01818             raise SyntaxError()
01819 
01820         if endQuoteChar is None:
01821             endQuoteChar = quoteChar
01822         else:
01823             endQuoteChar = endQuoteChar.strip()
01824             if len(endQuoteChar) == 0:
01825                 warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
01826                 raise SyntaxError()
01827 
01828         self.quoteChar = quoteChar
01829         self.quoteCharLen = len(quoteChar)
01830         self.firstQuoteChar = quoteChar[0]
01831         self.endQuoteChar = endQuoteChar
01832         self.endQuoteCharLen = len(endQuoteChar)
01833         self.escChar = escChar
01834         self.escQuote = escQuote
01835         self.unquoteResults = unquoteResults
01836 
01837         if multiline:
01838             self.flags = re.MULTILINE | re.DOTALL
01839             self.pattern = r'%s(?:[^%s%s]' % \
01840                 ( re.escape(self.quoteChar),
01841                   _escapeRegexRangeChars(self.endQuoteChar[0]),
01842                   (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
01843         else:
01844             self.flags = 0
01845             self.pattern = r'%s(?:[^%s\n\r%s]' % \
01846                 ( re.escape(self.quoteChar),
01847                   _escapeRegexRangeChars(self.endQuoteChar[0]),
01848                   (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
01849         if len(self.endQuoteChar) > 1:
01850             self.pattern += (
01851                 '|(?:' + ')|(?:'.join(["%s[^%s]" % (re.escape(self.endQuoteChar[:i]),
01852                                                _escapeRegexRangeChars(self.endQuoteChar[i]))
01853                                     for i in range(len(self.endQuoteChar)-1,0,-1)]) + ')'
01854                 )
01855         if escQuote:
01856             self.pattern += (r'|(?:%s)' % re.escape(escQuote))
01857         if escChar:
01858             self.pattern += (r'|(?:%s.)' % re.escape(escChar))
01859             self.escCharReplacePattern = re.escape(self.escChar)+"(.)"
01860         self.pattern += (r')*%s' % re.escape(self.endQuoteChar))
01861 
01862         try:
01863             self.re = re.compile(self.pattern, self.flags)
01864             self.reString = self.pattern
01865         except sre_constants.error:
01866             warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern,
01867                 SyntaxWarning, stacklevel=2)
01868             raise
01869 
01870         self.name = _ustr(self)
01871         self.errmsg = "Expected " + self.name
01872         #self.myException.msg = self.errmsg
01873         self.mayIndexError = False
01874         self.mayReturnEmpty = True
01875 
01876     def parseImpl( self, instring, loc, doActions=True ):
01877         result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None
01878         if not result:
01879             exc = self.myException
01880             exc.loc = loc
01881             exc.pstr = instring
01882             raise exc
01883 
01884         loc = result.end()
01885         ret = result.group()
01886 
01887         if self.unquoteResults:
01888 
01889             # strip off quotes
01890             ret = ret[self.quoteCharLen:-self.endQuoteCharLen]
01891 
01892             if isinstance(ret,basestring):
01893                 # replace escaped characters
01894                 if self.escChar:
01895                     ret = re.sub(self.escCharReplacePattern,"\g<1>",ret)
01896 
01897                 # replace escaped quotes
01898                 if self.escQuote:
01899                     ret = ret.replace(self.escQuote, self.endQuoteChar)
01900 
01901         return loc, ret
01902 
01903     def __str__( self ):
01904         try:
01905             return super(QuotedString,self).__str__()
01906         except:
01907             pass
01908 
01909         if self.strRepr is None:
01910             self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar)
01911 
01912         return self.strRepr
01913 
01914 
01915 class CharsNotIn(Token):
01916     """Token for matching words composed of characters *not* in a given set.
01917        Defined with string containing all disallowed characters, and an optional
01918        minimum, maximum, and/or exact length.  The default value for min is 1 (a
01919        minimum value < 1 is not valid); the default values for max and exact
01920        are 0, meaning no maximum or exact length restriction.
01921     """
01922     def __init__( self, notChars, min=1, max=0, exact=0 ):
01923         super(CharsNotIn,self).__init__()
01924         self.skipWhitespace = False
01925         self.notChars = notChars
01926 
01927         if min < 1:
01928             raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted")
01929 
01930         self.minLen = min
01931 
01932         if max > 0:
01933             self.maxLen = max
01934         else:
01935             self.maxLen = _MAX_INT
01936 
01937         if exact > 0:
01938             self.maxLen = exact
01939             self.minLen = exact
01940 
01941         self.name = _ustr(self)
01942         self.errmsg = "Expected " + self.name
01943         self.mayReturnEmpty = ( self.minLen == 0 )
01944         #self.myException.msg = self.errmsg
01945         self.mayIndexError = False
01946 
01947     def parseImpl( self, instring, loc, doActions=True ):
01948         if instring[loc] in self.notChars:
01949             #~ raise ParseException( instring, loc, self.errmsg )
01950             exc = self.myException
01951             exc.loc = loc
01952             exc.pstr = instring
01953             raise exc
01954 
01955         start = loc
01956         loc += 1
01957         notchars = self.notChars
01958         maxlen = min( start+self.maxLen, len(instring) )
01959         while loc < maxlen and \
01960               (instring[loc] not in notchars):
01961             loc += 1
01962 
01963         if loc - start < self.minLen:
01964             #~ raise ParseException( instring, loc, self.errmsg )
01965             exc = self.myException
01966             exc.loc = loc
01967             exc.pstr = instring
01968             raise exc
01969 
01970         return loc, instring[start:loc]
01971 
01972     def __str__( self ):
01973         try:
01974             return super(CharsNotIn, self).__str__()
01975         except:
01976             pass
01977 
01978         if self.strRepr is None:
01979             if len(self.notChars) > 4:
01980                 self.strRepr = "!W:(%s...)" % self.notChars[:4]
01981             else:
01982                 self.strRepr = "!W:(%s)" % self.notChars
01983 
01984         return self.strRepr
01985 
01986 class White(Token):
01987     """Special matching class for matching whitespace.  Normally, whitespace is ignored
01988        by pyparsing grammars.  This class is included when some whitespace structures
01989        are significant.  Define with a string containing the whitespace characters to be
01990        matched; default is " \\t\\r\\n".  Also takes optional min, max, and exact arguments,
01991        as defined for the Word class."""
01992     whiteStrs = {
01993         " " : "<SPC>",
01994         "\t": "<TAB>",
01995         "\n": "<LF>",
01996         "\r": "<CR>",
01997         "\f": "<FF>",
01998         }
01999     def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
02000         super(White,self).__init__()
02001         self.matchWhite = ws
02002         self.setWhitespaceChars( "".join([c for c in self.whiteChars if c not in self.matchWhite]) )
02003         #~ self.leaveWhitespace()
02004         self.name = ("".join([White.whiteStrs[c] for c in self.matchWhite]))
02005         self.mayReturnEmpty = True
02006         self.errmsg = "Expected " + self.name
02007         #self.myException.msg = self.errmsg
02008 
02009         self.minLen = min
02010 
02011         if max > 0:
02012             self.maxLen = max
02013         else:
02014             self.maxLen = _MAX_INT
02015 
02016         if exact > 0:
02017             self.maxLen = exact
02018             self.minLen = exact
02019 
02020     def parseImpl( self, instring, loc, doActions=True ):
02021         if not(instring[ loc ] in self.matchWhite):
02022             #~ raise ParseException( instring, loc, self.errmsg )
02023             exc = self.myException
02024             exc.loc = loc
02025             exc.pstr = instring
02026             raise exc
02027         start = loc
02028         loc += 1
02029         maxloc = start + self.maxLen
02030         maxloc = min( maxloc, len(instring) )
02031         while loc < maxloc and instring[loc] in self.matchWhite:
02032             loc += 1
02033 
02034         if loc - start < self.minLen:
02035             #~ raise ParseException( instring, loc, self.errmsg )
02036             exc = self.myException
02037             exc.loc = loc
02038             exc.pstr = instring
02039             raise exc
02040 
02041         return loc, instring[start:loc]
02042 
02043 
02044 class _PositionToken(Token):
02045     def __init__( self ):
02046         super(_PositionToken,self).__init__()
02047         self.name=self.__class__.__name__
02048         self.mayReturnEmpty = True
02049         self.mayIndexError = False
02050 
02051 class GoToColumn(_PositionToken):
02052     """Token to advance to a specific column of input text; useful for tabular report scraping."""
02053     def __init__( self, colno ):
02054         super(GoToColumn,self).__init__()
02055         self.col = colno
02056 
02057     def preParse( self, instring, loc ):
02058         if col(loc,instring) != self.col:
02059             instrlen = len(instring)
02060             if self.ignoreExprs:
02061                 loc = self._skipIgnorables( instring, loc )
02062             while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col :
02063                 loc += 1
02064         return loc
02065 
02066     def parseImpl( self, instring, loc, doActions=True ):
02067         thiscol = col( loc, instring )
02068         if thiscol > self.col:
02069             raise ParseException( instring, loc, "Text not in expected column", self )
02070         newloc = loc + self.col - thiscol
02071         ret = instring[ loc: newloc ]
02072         return newloc, ret
02073 
02074 class LineStart(_PositionToken):
02075     """Matches if current position is at the beginning of a line within the parse string"""
02076     def __init__( self ):
02077         super(LineStart,self).__init__()
02078         self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
02079         self.errmsg = "Expected start of line"
02080         #self.myException.msg = self.errmsg
02081 
02082     def preParse( self, instring, loc ):
02083         preloc = super(LineStart,self).preParse(instring,loc)
02084         if instring[preloc] == "\n":
02085             loc += 1
02086         return loc
02087 
02088     def parseImpl( self, instring, loc, doActions=True ):
02089         if not( loc==0 or
02090             (loc == self.preParse( instring, 0 )) or
02091             (instring[loc-1] == "\n") ): #col(loc, instring) != 1:
02092             #~ raise ParseException( instring, loc, "Expected start of line" )
02093             exc = self.myException
02094             exc.loc = loc
02095             exc.pstr = instring
02096             raise exc
02097         return loc, []
02098 
02099 class LineEnd(_PositionToken):
02100     """Matches if current position is at the end of a line within the parse string"""
02101     def __init__( self ):
02102         super(LineEnd,self).__init__()
02103         self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
02104         self.errmsg = "Expected end of line"
02105         #self.myException.msg = self.errmsg
02106 
02107     def parseImpl( self, instring, loc, doActions=True ):
02108         if loc<len(instring):
02109             if instring[loc] == "\n":
02110                 return loc+1, "\n"
02111             else:
02112                 #~ raise ParseException( instring, loc, "Expected end of line" )
02113                 exc = self.myException
02114                 exc.loc = loc
02115                 exc.pstr = instring
02116                 raise exc
02117         elif loc == len(instring):
02118             return loc+1, []
02119         else:
02120             exc = self.myException
02121             exc.loc = loc
02122             exc.pstr = instring
02123             raise exc
02124 
02125 class StringStart(_PositionToken):
02126     """Matches if current position is at the beginning of the parse string"""
02127     def __init__( self ):
02128         super(StringStart,self).__init__()
02129         self.errmsg = "Expected start of text"
02130         #self.myException.msg = self.errmsg
02131 
02132     def parseImpl( self, instring, loc, doActions=True ):
02133         if loc != 0:
02134             # see if entire string up to here is just whitespace and ignoreables
02135             if loc != self.preParse( instring, 0 ):
02136                 #~ raise ParseException( instring, loc, "Expected start of text" )
02137                 exc = self.myException
02138                 exc.loc = loc
02139                 exc.pstr = instring
02140                 raise exc
02141         return loc, []
02142 
02143 class StringEnd(_PositionToken):
02144     """Matches if current position is at the end of the parse string"""
02145     def __init__( self ):
02146         super(StringEnd,self).__init__()
02147         self.errmsg = "Expected end of text"
02148         #self.myException.msg = self.errmsg
02149 
02150     def parseImpl( self, instring, loc, doActions=True ):
02151         if loc < len(instring):
02152             #~ raise ParseException( instring, loc, "Expected end of text" )
02153             exc = self.myException
02154             exc.loc = loc
02155             exc.pstr = instring
02156             raise exc
02157         elif loc == len(instring):
02158             return loc+1, []
02159         elif loc > len(instring):
02160             return loc, []
02161         else:
02162             exc = self.myException
02163             exc.loc = loc
02164             exc.pstr = instring
02165             raise exc
02166 
02167 class WordStart(_PositionToken):
02168     """Matches if the current position is at the beginning of a Word, and
02169        is not preceded by any character in a given set of wordChars
02170        (default=printables). To emulate the \b behavior of regular expressions,
02171        use WordStart(alphanums). WordStart will also match at the beginning of
02172        the string being parsed, or at the beginning of a line.
02173     """
02174     def __init__(self, wordChars = printables):
02175         super(WordStart,self).__init__()
02176         self.wordChars = _str2dict(wordChars)
02177         self.errmsg = "Not at the start of a word"
02178 
02179     def parseImpl(self, instring, loc, doActions=True ):
02180         if loc != 0:
02181             if (instring[loc-1] in self.wordChars or
02182                 instring[loc] not in self.wordChars):
02183                 exc = self.myException
02184                 exc.loc = loc
02185                 exc.pstr = instring
02186                 raise exc
02187         return loc, []
02188 
02189 class WordEnd(_PositionToken):
02190     """Matches if the current position is at the end of a Word, and
02191        is not followed by any character in a given set of wordChars
02192        (default=printables). To emulate the \b behavior of regular expressions,
02193        use WordEnd(alphanums). WordEnd will also match at the end of
02194        the string being parsed, or at the end of a line.
02195     """
02196     def __init__(self, wordChars = printables):
02197         super(WordEnd,self).__init__()
02198         self.wordChars = _str2dict(wordChars)
02199         self.skipWhitespace = False
02200         self.errmsg = "Not at the end of a word"
02201 
02202     def parseImpl(self, instring, loc, doActions=True ):
02203         instrlen = len(instring)
02204         if instrlen>0 and loc<instrlen:
02205             if (instring[loc] in self.wordChars or
02206                 instring[loc-1] not in self.wordChars):
02207                 #~ raise ParseException( instring, loc, "Expected end of word" )
02208                 exc = self.myException
02209                 exc.loc = loc
02210                 exc.pstr = instring
02211                 raise exc
02212         return loc, []
02213 
02214 
02215 class ParseExpression(ParserElement):
02216     """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
02217     def __init__( self, exprs, savelist = False ):
02218         super(ParseExpression,self).__init__(savelist)
02219         if isinstance( exprs, list ):
02220             self.exprs = exprs
02221         elif isinstance( exprs, basestring ):
02222             self.exprs = [ Literal( exprs ) ]
02223         else:
02224             try:
02225                 self.exprs = list( exprs )
02226             except TypeError:
02227                 self.exprs = [ exprs ]
02228         self.callPreparse = False
02229 
02230     def __getitem__( self, i ):
02231         return self.exprs[i]
02232 
02233     def append( self, other ):
02234         self.exprs.append( other )
02235         self.strRepr = None
02236         return self
02237 
02238     def leaveWhitespace( self ):
02239         """Extends leaveWhitespace defined in base class, and also invokes leaveWhitespace on
02240            all contained expressions."""
02241         self.skipWhitespace = False
02242         self.exprs = [ e.copy() for e in self.exprs ]
02243         for e in self.exprs:
02244             e.leaveWhitespace()
02245         return self
02246 
02247     def ignore( self, other ):
02248         if isinstance( other, Suppress ):
02249             if other not in self.ignoreExprs:
02250                 super( ParseExpression, self).ignore( other )
02251                 for e in self.exprs:
02252                     e.ignore( self.ignoreExprs[-1] )
02253         else:
02254             super( ParseExpression, self).ignore( other )
02255             for e in self.exprs:
02256                 e.ignore( self.ignoreExprs[-1] )
02257         return self
02258 
02259     def __str__( self ):
02260         try:
02261             return super(ParseExpression,self).__str__()
02262         except:
02263             pass
02264 
02265         if self.strRepr is None:
02266             self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) )
02267         return self.strRepr
02268 
02269     def streamline( self ):
02270         super(ParseExpression,self).streamline()
02271 
02272         for e in self.exprs:
02273             e.streamline()
02274 
02275         # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d )
02276         # but only if there are no parse actions or resultsNames on the nested And's
02277         # (likewise for Or's and MatchFirst's)
02278         if ( len(self.exprs) == 2 ):
02279             other = self.exprs[0]
02280             if ( isinstance( other, self.__class__ ) and
02281                   not(other.parseAction) and
02282                   other.resultsName is None and
02283                   not other.debug ):
02284                 self.exprs = other.exprs[:] + [ self.exprs[1] ]
02285                 self.strRepr = None
02286                 self.mayReturnEmpty |= other.mayReturnEmpty
02287                 self.mayIndexError  |= other.mayIndexError
02288 
02289             other = self.exprs[-1]
02290             if ( isinstance( other, self.__class__ ) and
02291                   not(other.parseAction) and
02292                   other.resultsName is None and
02293                   not other.debug ):
02294                 self.exprs = self.exprs[:-1] + other.exprs[:]
02295                 self.strRepr = None
02296                 self.mayReturnEmpty |= other.mayReturnEmpty
02297                 self.mayIndexError  |= other.mayIndexError
02298 
02299         return self
02300 
02301     def setResultsName( self, name, listAllMatches=False ):
02302         ret = super(ParseExpression,self).setResultsName(name,listAllMatches)
02303         return ret
02304 
02305     def validate( self, validateTrace=[] ):
02306         tmp = validateTrace[:]+[self]
02307         for e in self.exprs:
02308             e.validate(tmp)
02309         self.checkRecursion( [] )
02310 
02311 class And(ParseExpression):
02312     """Requires all given ParseExpressions to be found in the given order.
02313        Expressions may be separated by whitespace.
02314        May be constructed using the '+' operator.
02315     """
02316 
02317     class _ErrorStop(Empty):
02318         def __init__(self, *args, **kwargs):
02319             super(Empty,self).__init__(*args, **kwargs)
02320             self.leaveWhitespace()
02321 
02322     def __init__( self, exprs, savelist = True ):
02323         super(And,self).__init__(exprs, savelist)
02324         self.mayReturnEmpty = True
02325         for e in self.exprs:
02326             if not e.mayReturnEmpty:
02327                 self.mayReturnEmpty = False
02328                 break
02329         self.setWhitespaceChars( exprs[0].whiteChars )
02330         self.skipWhitespace = exprs[0].skipWhitespace
02331         self.callPreparse = True
02332 
02333     def parseImpl( self, instring, loc, doActions=True ):
02334         # pass False as last arg to _parse for first element, since we already
02335         # pre-parsed the string as part of our And pre-parsing
02336         loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )
02337         errorStop = False
02338         for e in self.exprs[1:]:
02339             if isinstance(e, And._ErrorStop):
02340                 errorStop = True
02341                 continue
02342             if errorStop:
02343                 try:
02344                     loc, exprtokens = e._parse( instring, loc, doActions )
02345                 except ParseSyntaxException:
02346                     raise
02347                 except ParseBaseException, pe:
02348                     raise ParseSyntaxException(pe)
02349                 except IndexError, ie:
02350                     raise ParseSyntaxException( ParseException(instring, len(instring), self.errmsg, self) )
02351             else:
02352                 loc, exprtokens = e._parse( instring, loc, doActions )
02353             if exprtokens or exprtokens.keys():
02354                 resultlist += exprtokens
02355         return loc, resultlist
02356 
02357     def __iadd__(self, other ):
02358         if isinstance( other, basestring ):
02359             other = Literal( other )
02360         return self.append( other ) #And( [ self, other ] )
02361 
02362     def checkRecursion( self, parseElementList ):
02363         subRecCheckList = parseElementList[:] + [ self ]
02364         for e in self.exprs:
02365             e.checkRecursion( subRecCheckList )
02366             if not e.mayReturnEmpty:
02367                 break
02368 
02369     def __str__( self ):
02370         if hasattr(self,"name"):
02371             return self.name
02372 
02373         if self.strRepr is None:
02374             self.strRepr = "{" + " ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
02375 
02376         return self.strRepr
02377 
02378 
02379 class Or(ParseExpression):
02380     """Requires that at least one ParseExpression is found.
02381        If two expressions match, the expression that matches the longest string will be used.
02382        May be constructed using the '^' operator.
02383     """
02384     def __init__( self, exprs, savelist = False ):
02385         super(Or,self).__init__(exprs, savelist)
02386         self.mayReturnEmpty = False
02387         for e in self.exprs:
02388             if e.mayReturnEmpty:
02389                 self.mayReturnEmpty = True
02390                 break
02391 
02392     def parseImpl( self, instring, loc, doActions=True ):
02393         maxExcLoc = -1
02394         maxMatchLoc = -1
02395         maxException = None
02396         for e in self.exprs:
02397             try:
02398                 loc2 = e.tryParse( instring, loc )
02399             except ParseException, err:
02400                 if err.loc > maxExcLoc:
02401                     maxException = err
02402                     maxExcLoc = err.loc
02403             except IndexError:
02404                 if len(instring) > maxExcLoc:
02405                     maxException = ParseException(instring,len(instring),e.errmsg,self)
02406                     maxExcLoc = len(instring)
02407             else:
02408                 if loc2 > maxMatchLoc:
02409                     maxMatchLoc = loc2
02410                     maxMatchExp = e
02411 
02412         if maxMatchLoc < 0:
02413             if maxException is not None:
02414                 raise maxException
02415             else:
02416                 raise ParseException(instring, loc, "no defined alternatives to match", self)
02417 
02418         return maxMatchExp._parse( instring, loc, doActions )
02419 
02420     def __ixor__(self, other ):
02421         if isinstance( other, basestring ):
02422             other = Literal( other )
02423         return self.append( other ) #Or( [ self, other ] )
02424 
02425     def __str__( self ):
02426         if hasattr(self,"name"):
02427             return self.name
02428 
02429         if self.strRepr is None:
02430             self.strRepr = "{" + " ^ ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
02431 
02432         return self.strRepr
02433 
02434     def checkRecursion( self, parseElementList ):
02435         subRecCheckList = parseElementList[:] + [ self ]
02436         for e in self.exprs:
02437             e.checkRecursion( subRecCheckList )
02438 
02439 
02440 class MatchFirst(ParseExpression):
02441     """Requires that at least one ParseExpression is found.
02442        If two expressions match, the first one listed is the one that will match.
02443        May be constructed using the '|' operator.
02444     """
02445     def __init__( self, exprs, savelist = False ):
02446         super(MatchFirst,self).__init__(exprs, savelist)
02447         if exprs:
02448             self.mayReturnEmpty = False
02449             for e in self.exprs:
02450                 if e.mayReturnEmpty:
02451                     self.mayReturnEmpty = True
02452                     break
02453         else:
02454             self.mayReturnEmpty = True
02455 
02456     def parseImpl( self, instring, loc, doActions=True ):
02457         maxExcLoc = -1
02458         maxException = None
02459         for e in self.exprs:
02460             try:
02461                 ret = e._parse( instring, loc, doActions )
02462                 return ret
02463             except ParseException, err:
02464                 if err.loc > maxExcLoc:
02465                     maxException = err
02466                     maxExcLoc = err.loc
02467             except IndexError:
02468                 if len(instring) > maxExcLoc:
02469                     maxException = ParseException(instring,len(instring),e.errmsg,self)
02470                     maxExcLoc = len(instring)
02471 
02472         # only got here if no expression matched, raise exception for match that made it the furthest
02473         else:
02474             if maxException is not None:
02475                 raise maxException
02476             else:
02477                 raise ParseException(instring, loc, "no defined alternatives to match", self)
02478 
02479     def __ior__(self, other ):
02480         if isinstance( other, basestring ):
02481             other = Literal( other )
02482         return self.append( other ) #MatchFirst( [ self, other ] )
02483 
02484     def __str__( self ):
02485         if hasattr(self,"name"):
02486             return self.name
02487 
02488         if self.strRepr is None:
02489             self.strRepr = "{" + " | ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
02490 
02491         return self.strRepr
02492 
02493     def checkRecursion( self, parseElementList ):
02494         subRecCheckList = parseElementList[:] + [ self ]
02495         for e in self.exprs:
02496             e.checkRecursion( subRecCheckList )
02497 
02498 
02499 class Each(ParseExpression):
02500     """Requires all given ParseExpressions to be found, but in any order.
02501        Expressions may be separated by whitespace.
02502        May be constructed using the '&' operator.
02503     """
02504     def __init__( self, exprs, savelist = True ):
02505         super(Each,self).__init__(exprs, savelist)
02506         self.mayReturnEmpty = True
02507         for e in self.exprs:
02508             if not e.mayReturnEmpty:
02509                 self.mayReturnEmpty = False
02510                 break
02511         self.skipWhitespace = True
02512         self.initExprGroups = True
02513 
02514     def parseImpl( self, instring, loc, doActions=True ):
02515         if self.initExprGroups:
02516             self.optionals = [ e.expr for e in self.exprs if isinstance(e,Optional) ]
02517             self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ]
02518             self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ]
02519             self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ]
02520             self.required += self.multirequired
02521             self.initExprGroups = False
02522         tmpLoc = loc
02523         tmpReqd = self.required[:]
02524         tmpOpt  = self.optionals[:]
02525         matchOrder = []
02526 
02527         keepMatching = True
02528         while keepMatching:
02529             tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired
02530             failed = []
02531             for e in tmpExprs:
02532                 try:
02533                     tmpLoc = e.tryParse( instring, tmpLoc )
02534                 except ParseException:
02535                     failed.append(e)
02536                 else:
02537                     matchOrder.append(e)
02538                     if e in tmpReqd:
02539                         tmpReqd.remove(e)
02540                     elif e in tmpOpt:
02541                         tmpOpt.remove(e)
02542             if len(failed) == len(tmpExprs):
02543                 keepMatching = False
02544 
02545         if tmpReqd:
02546             missing = ", ".join( [ _ustr(e) for e in tmpReqd ] )
02547             raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )
02548 
02549         # add any unmatched Optionals, in case they have default values defined
02550         matchOrder += list(e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt)
02551 
02552         resultlist = []
02553         for e in matchOrder:
02554             loc,results = e._parse(instring,loc,doActions)
02555             resultlist.append(results)
02556 
02557         finalResults = ParseResults([])
02558         for r in resultlist:
02559             dups = {}
02560             for k in r.keys():
02561                 if k in finalResults.keys():
02562                     tmp = ParseResults(finalResults[k])
02563                     tmp += ParseResults(r[k])
02564                     dups[k] = tmp
02565             finalResults += ParseResults(r)
02566             for k,v in dups.items():
02567                 finalResults[k] = v
02568         return loc, finalResults
02569 
02570     def __str__( self ):
02571         if hasattr(self,"name"):
02572             return self.name
02573 
02574         if self.strRepr is None:
02575             self.strRepr = "{" + " & ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
02576 
02577         return self.strRepr
02578 
02579     def checkRecursion( self, parseElementList ):
02580         subRecCheckList = parseElementList[:] + [ self ]
02581         for e in self.exprs:
02582             e.checkRecursion( subRecCheckList )
02583 
02584 
02585 class ParseElementEnhance(ParserElement):
02586     """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
02587     def __init__( self, expr, savelist=False ):
02588         super(ParseElementEnhance,self).__init__(savelist)
02589         if isinstance( expr, basestring ):
02590             expr = Literal(expr)
02591         self.expr = expr
02592         self.strRepr = None
02593         if expr is not None:
02594             self.mayIndexError = expr.mayIndexError
02595             self.mayReturnEmpty = expr.mayReturnEmpty
02596             self.setWhitespaceChars( expr.whiteChars )
02597             self.skipWhitespace = expr.skipWhitespace
02598             self.saveAsList = expr.saveAsList
02599             self.callPreparse = expr.callPreparse
02600             self.ignoreExprs.extend(expr.ignoreExprs)
02601 
02602     def parseImpl( self, instring, loc, doActions=True ):
02603         if self.expr is not None:
02604             return self.expr._parse( instring, loc, doActions, callPreParse=False )
02605         else:
02606             raise ParseException("",loc,self.errmsg,self)
02607 
02608     def leaveWhitespace( self ):
02609         self.skipWhitespace = False
02610         self.expr = self.expr.copy()
02611         if self.expr is not None:
02612             self.expr.leaveWhitespace()
02613         return self
02614 
02615     def ignore( self, other ):
02616         if isinstance( other, Suppress ):
02617             if other not in self.ignoreExprs:
02618                 super( ParseElementEnhance, self).ignore( other )
02619                 if self.expr is not None:
02620                     self.expr.ignore( self.ignoreExprs[-1] )
02621         else:
02622             super( ParseElementEnhance, self).ignore( other )
02623             if self.expr is not None:
02624                 self.expr.ignore( self.ignoreExprs[-1] )
02625         return self
02626 
02627     def streamline( self ):
02628         super(ParseElementEnhance,self).streamline()
02629         if self.expr is not None:
02630             self.expr.streamline()
02631         return self
02632 
02633     def checkRecursion( self, parseElementList ):
02634         if self in parseElementList:
02635             raise RecursiveGrammarException( parseElementList+[self] )
02636         subRecCheckList = parseElementList[:] + [ self ]
02637         if self.expr is not None:
02638             self.expr.checkRecursion( subRecCheckList )
02639 
02640     def validate( self, validateTrace=[] ):
02641         tmp = validateTrace[:]+[self]
02642         if self.expr is not None:
02643             self.expr.validate(tmp)
02644         self.checkRecursion( [] )
02645 
02646     def __str__( self ):
02647         try:
02648             return super(ParseElementEnhance,self).__str__()
02649         except:
02650             pass
02651 
02652         if self.strRepr is None and self.expr is not None:
02653             self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) )
02654         return self.strRepr
02655 
02656 
02657 class FollowedBy(ParseElementEnhance):
02658     """Lookahead matching of the given parse expression.  FollowedBy
02659     does *not* advance the parsing position within the input string, it only
02660     verifies that the specified parse expression matches at the current
02661     position.  FollowedBy always returns a null token list."""
02662     def __init__( self, expr ):
02663         super(FollowedBy,self).__init__(expr)
02664         self.mayReturnEmpty = True
02665 
02666     def parseImpl( self, instring, loc, doActions=True ):
02667         self.expr.tryParse( instring, loc )
02668         return loc, []
02669 
02670 
02671 class NotAny(ParseElementEnhance):
02672     """Lookahead to disallow matching with the given parse expression.  NotAny
02673     does *not* advance the parsing position within the input string, it only
02674     verifies that the specified parse expression does *not* match at the current
02675     position.  Also, NotAny does *not* skip over leading whitespace. NotAny
02676     always returns a null token list.  May be constructed using the '~' operator."""
02677     def __init__( self, expr ):
02678         super(NotAny,self).__init__(expr)
02679         #~ self.leaveWhitespace()
02680         self.skipWhitespace = False  # do NOT use self.leaveWhitespace(), don't want to propagate to exprs
02681         self.mayReturnEmpty = True
02682         self.errmsg = "Found unwanted token, "+_ustr(self.expr)
02683         #self.myException = ParseException("",0,self.errmsg,self)
02684 
02685     def parseImpl( self, instring, loc, doActions=True ):
02686         try:
02687             self.expr.tryParse( instring, loc )
02688         except (ParseException,IndexError):
02689             pass
02690         else:
02691             #~ raise ParseException(instring, loc, self.errmsg )
02692             exc = self.myException
02693             exc.loc = loc
02694             exc.pstr = instring
02695             raise exc
02696         return loc, []
02697 
02698     def __str__( self ):
02699         if hasattr(self,"name"):
02700             return self.name
02701 
02702         if self.strRepr is None:
02703             self.strRepr = "~{" + _ustr(self.expr) + "}"
02704 
02705         return self.strRepr
02706 
02707 
02708 class ZeroOrMore(ParseElementEnhance):
02709     """Optional repetition of zero or more of the given expression."""
02710     def __init__( self, expr ):
02711         super(ZeroOrMore,self).__init__(expr)
02712         self.mayReturnEmpty = True
02713 
02714     def parseImpl( self, instring, loc, doActions=True ):
02715         tokens = []
02716         try:
02717             loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
02718             hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )
02719             while 1:
02720                 if hasIgnoreExprs:
02721                     preloc = self._skipIgnorables( instring, loc )
02722                 else:
02723                     preloc = loc
02724                 loc, tmptokens = self.expr._parse( instring, preloc, doActions )
02725                 if tmptokens or tmptokens.keys():
02726                     tokens += tmptokens
02727         except (ParseException,IndexError):
02728             pass
02729 
02730         return loc, tokens
02731 
02732     def __str__( self ):
02733         if hasattr(self,"name"):
02734             return self.name
02735 
02736         if self.strRepr is None:
02737             self.strRepr = "[" + _ustr(self.expr) + "]..."
02738 
02739         return self.strRepr
02740 
02741     def setResultsName( self, name, listAllMatches=False ):
02742         ret = super(ZeroOrMore,self).setResultsName(name,listAllMatches)
02743         ret.saveAsList = True
02744         return ret
02745 
02746 
02747 class OneOrMore(ParseElementEnhance):
02748     """Repetition of one or more of the given expression."""
02749     def parseImpl( self, instring, loc, doActions=True ):
02750         # must be at least one
02751         loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
02752         try:
02753             hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )
02754             while 1:
02755                 if hasIgnoreExprs:
02756                     preloc = self._skipIgnorables( instring, loc )
02757                 else:
02758                     preloc = loc
02759                 loc, tmptokens = self.expr._parse( instring, preloc, doActions )
02760                 if tmptokens or tmptokens.keys():
02761                     tokens += tmptokens
02762         except (ParseException,IndexError):
02763             pass
02764 
02765         return loc, tokens
02766 
02767     def __str__( self ):
02768         if hasattr(self,"name"):
02769             return self.name
02770 
02771         if self.strRepr is None:
02772             self.strRepr = "{" + _ustr(self.expr) + "}..."
02773 
02774         return self.strRepr
02775 
02776     def setResultsName( self, name, listAllMatches=False ):
02777         ret = super(OneOrMore,self).setResultsName(name,listAllMatches)
02778         ret.saveAsList = True
02779         return ret
02780 
02781 class _NullToken(object):
02782     def __bool__(self):
02783         return False
02784     __nonzero__ = __bool__
02785     def __str__(self):
02786         return ""
02787 
02788 _optionalNotMatched = _NullToken()
02789 class Optional(ParseElementEnhance):
02790     """Optional matching of the given expression.
02791        A default return string can also be specified, if the optional expression
02792        is not found.
02793     """
02794     def __init__( self, exprs, default=_optionalNotMatched ):
02795         super(Optional,self).__init__( exprs, savelist=False )
02796         self.defaultValue = default
02797         self.mayReturnEmpty = True
02798 
02799     def parseImpl( self, instring, loc, doActions=True ):
02800         try:
02801             loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
02802         except (ParseException,IndexError):
02803             if self.defaultValue is not _optionalNotMatched:
02804                 if self.expr.resultsName:
02805                     tokens = ParseResults([ self.defaultValue ])
02806                     tokens[self.expr.resultsName] = self.defaultValue
02807                 else:
02808                     tokens = [ self.defaultValue ]
02809             else:
02810                 tokens = []
02811         return loc, tokens
02812 
02813     def __str__( self ):
02814         if hasattr(self,"name"):
02815             return self.name
02816 
02817         if self.strRepr is None:
02818             self.strRepr = "[" + _ustr(self.expr) + "]"
02819 
02820         return self.strRepr
02821 
02822 
02823 class SkipTo(ParseElementEnhance):
02824     """Token for skipping over all undefined text until the matched expression is found.
02825        If include is set to true, the matched expression is also parsed (the skipped text
02826        and matched expression are returned as a 2-element list).  The ignore
02827        argument is used to define grammars (typically quoted strings and comments) that
02828        might contain false matches.
02829     """
02830     def __init__( self, other, include=False, ignore=None, failOn=None ):
02831         super( SkipTo, self ).__init__( other )
02832         self.ignoreExpr = ignore
02833         self.mayReturnEmpty = True
02834         self.mayIndexError = False
02835         self.includeMatch = include
02836         self.asList = False
02837         if failOn is not None and isinstance(failOn, basestring):
02838             self.failOn = Literal(failOn)
02839         else:
02840             self.failOn = failOn
02841         self.errmsg = "No match found for "+_ustr(self.expr)
02842         #self.myException = ParseException("",0,self.errmsg,self)
02843 
02844     def parseImpl( self, instring, loc, doActions=True ):
02845         startLoc = loc
02846         instrlen = len(instring)
02847         expr = self.expr
02848         failParse = False
02849         while loc <= instrlen:
02850             try:
02851                 if self.failOn:
02852                     try:
02853                         self.failOn.tryParse(instring, loc)
02854                     except ParseBaseException:
02855                         pass
02856                     else:
02857                         failParse = True
02858                         raise ParseException(instring, loc, "Found expression " + str(self.failOn))
02859                     failParse = False
02860                 if self.ignoreExpr is not None:
02861                     while 1:
02862                         try:
02863                             loc = self.ignoreExpr.tryParse(instring,loc)
02864                             print "found ignoreExpr, advance to", loc
02865                         except ParseBaseException:
02866                             break
02867                 expr._parse( instring, loc, doActions=False, callPreParse=False )
02868                 skipText = instring[startLoc:loc]
02869                 if self.includeMatch:
02870                     loc,mat = expr._parse(instring,loc,doActions,callPreParse=False)
02871                     if mat:
02872                         skipRes = ParseResults( skipText )
02873                         skipRes += mat
02874                         return loc, [ skipRes ]
02875                     else:
02876                         return loc, [ skipText ]
02877                 else:
02878                     return loc, [ skipText ]
02879             except (ParseException,IndexError):
02880                 if failParse:
02881                     raise
02882                 else:
02883                     loc += 1
02884         exc = self.myException
02885         exc.loc = loc
02886         exc.pstr = instring
02887         raise exc
02888 
02889 class Forward(ParseElementEnhance):
02890     """Forward declaration of an expression to be defined later -
02891        used for recursive grammars, such as algebraic infix notation.
02892        When the expression is known, it is assigned to the Forward variable using the '<<' operator.
02893 
02894        Note: take care when assigning to Forward not to overlook precedence of operators.
02895        Specifically, '|' has a lower precedence than '<<', so that::
02896           fwdExpr << a | b | c
02897        will actually be evaluated as::
02898           (fwdExpr << a) | b | c
02899        thereby leaving b and c out as parseable alternatives.  It is recommended that you
02900        explicitly group the values inserted into the Forward::
02901           fwdExpr << (a | b | c)
02902     """
02903     def __init__( self, other=None ):
02904         super(Forward,self).__init__( other, savelist=False )
02905 
02906     def __lshift__( self, other ):
02907         if isinstance( other, basestring ):
02908             other = Literal(other)
02909         self.expr = other
02910         self.mayReturnEmpty = other.mayReturnEmpty
02911         self.strRepr = None
02912         self.mayIndexError = self.expr.mayIndexError
02913         self.mayReturnEmpty = self.expr.mayReturnEmpty
02914         self.setWhitespaceChars( self.expr.whiteChars )
02915         self.skipWhitespace = self.expr.skipWhitespace
02916         self.saveAsList = self.expr.saveAsList
02917         self.ignoreExprs.extend(self.expr.ignoreExprs)
02918         return None
02919 
02920     def leaveWhitespace( self ):
02921         self.skipWhitespace = False
02922         return self
02923 
02924     def streamline( self ):
02925         if not self.streamlined:
02926             self.streamlined = True
02927             if self.expr is not None:
02928                 self.expr.streamline()
02929         return self
02930 
02931     def validate( self, validateTrace=[] ):
02932         if self not in validateTrace:
02933             tmp = validateTrace[:]+[self]
02934             if self.expr is not None:
02935                 self.expr.validate(tmp)
02936         self.checkRecursion([])
02937 
02938     def __str__( self ):
02939         if hasattr(self,"name"):
02940             return self.name
02941 
02942         self._revertClass = self.__class__
02943         self.__class__ = _ForwardNoRecurse
02944         try:
02945             if self.expr is not None:
02946                 retString = _ustr(self.expr)
02947             else:
02948                 retString = "None"
02949         finally:
02950             self.__class__ = self._revertClass
02951         return self.__class__.__name__ + ": " + retString
02952 
02953     def copy(self):
02954         if self.expr is not None:
02955             return super(Forward,self).copy()
02956         else:
02957             ret = Forward()
02958             ret << self
02959             return ret
02960 
02961 class _ForwardNoRecurse(Forward):
02962     def __str__( self ):
02963         return "..."
02964 
02965 class TokenConverter(ParseElementEnhance):
02966     """Abstract subclass of ParseExpression, for converting parsed results."""
02967     def __init__( self, expr, savelist=False ):
02968         super(TokenConverter,self).__init__( expr )#, savelist )
02969         self.saveAsList = False
02970 
02971 class Upcase(TokenConverter):
02972     """Converter to upper case all matching tokens."""
02973     def __init__(self, *args):
02974         super(Upcase,self).__init__(*args)
02975         warnings.warn("Upcase class is deprecated, use upcaseTokens parse action instead",
02976                        DeprecationWarning,stacklevel=2)
02977 
02978     def postParse( self, instring, loc, tokenlist ):
02979         return list(map( string.upper, tokenlist ))
02980 
02981 
02982 class Combine(TokenConverter):
02983     """Converter to concatenate all matching tokens to a single string.
02984        By default, the matching patterns must also be contiguous in the input string;
02985        this can be disabled by specifying 'adjacent=False' in the constructor.
02986     """
02987     def __init__( self, expr, joinString="", adjacent=True ):
02988         super(Combine,self).__init__( expr )
02989         # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself
02990         if adjacent:
02991             self.leaveWhitespace()
02992         self.adjacent = adjacent
02993         self.skipWhitespace = True
02994         self.joinString = joinString
02995 
02996     def ignore( self, other ):
02997         if self.adjacent:
02998             ParserElement.ignore(self, other)
02999         else:
03000             super( Combine, self).ignore( other )
03001         return self
03002 
03003     def postParse( self, instring, loc, tokenlist ):
03004         retToks = tokenlist.copy()
03005         del retToks[:]
03006         retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults)
03007 
03008         if self.resultsName and len(retToks.keys())>0:
03009             return [ retToks ]
03010         else:
03011             return retToks
03012 
03013 class Group(TokenConverter):
03014     """Converter to return the matched tokens as a list - useful for returning tokens of ZeroOrMore and OneOrMore expressions."""
03015     def __init__( self, expr ):
03016         super(Group,self).__init__( expr )
03017         self.saveAsList = True
03018 
03019     def postParse( self, instring, loc, tokenlist ):
03020         return [ tokenlist ]
03021 
03022 class Dict(TokenConverter):
03023     """Converter to return a repetitive expression as a list, but also as a dictionary.
03024        Each element can also be referenced using the first token in the expression as its key.
03025        Useful for tabular report scraping when the first column can be used as a item key.
03026     """
03027     def __init__( self, exprs ):
03028         super(Dict,self).__init__( exprs )
03029         self.saveAsList = True
03030 
03031     def postParse( self, instring, loc, tokenlist ):
03032         for i,tok in enumerate(tokenlist):
03033             if len(tok) == 0:
03034                 continue
03035             ikey = tok[0]
03036             if isinstance(ikey,int):
03037                 ikey = _ustr(tok[0]).strip()
03038             if len(tok)==1:
03039                 tokenlist[ikey] = _ParseResultsWithOffset("",i)
03040             elif len(tok)==2 and not isinstance(tok[1],ParseResults):
03041                 tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i)
03042             else:
03043                 dictvalue = tok.copy() #ParseResults(i)
03044                 del dictvalue[0]
03045                 if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.keys()):
03046                     tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i)
03047                 else:
03048                     tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i)
03049 
03050         if self.resultsName:
03051             return [ tokenlist ]
03052         else:
03053             return tokenlist
03054 
03055 
03056 class Suppress(TokenConverter):
03057     """Converter for ignoring the results of a parsed expression."""
03058     def postParse( self, instring, loc, tokenlist ):
03059         return []
03060 
03061     def suppress( self ):
03062         return self
03063 
03064 
03065 class OnlyOnce(object):
03066     """Wrapper for parse actions, to ensure they are only called once."""
03067     def __init__(self, methodCall):
03068         self.callable = ParserElement._normalizeParseActionArgs(methodCall)
03069         self.called = False
03070     def __call__(self,s,l,t):
03071         if not self.called:
03072             results = self.callable(s,l,t)
03073             self.called = True
03074             return results
03075         raise ParseException(s,l,"")
03076     def reset(self):
03077         self.called = False
03078 
03079 def traceParseAction(f):
03080     """Decorator for debugging parse actions."""
03081     f = ParserElement._normalizeParseActionArgs(f)
03082     def z(*paArgs):
03083         thisFunc = f.func_name
03084         s,l,t = paArgs[-3:]
03085         if len(paArgs)>3:
03086             thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc
03087         sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) )
03088         try:
03089             ret = f(*paArgs)
03090         except Exception, exc:
03091             sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) )
03092             raise
03093         sys.stderr.write( "<<leaving %s (ret: %s)\n" % (thisFunc,ret) )
03094         return ret
03095     try:
03096         z.__name__ = f.__name__
03097     except AttributeError:
03098         pass
03099     return z
03100 
03101 #
03102 # global helpers
03103 #
03104 def delimitedList( expr, delim=",", combine=False ):
03105     """Helper to define a delimited list of expressions - the delimiter defaults to ','.
03106        By default, the list elements and delimiters can have intervening whitespace, and
03107        comments, but this can be overridden by passing 'combine=True' in the constructor.
03108        If combine is set to True, the matching tokens are returned as a single token
03109        string, with the delimiters included; otherwise, the matching tokens are returned
03110        as a list of tokens, with the delimiters suppressed.
03111     """
03112     dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..."
03113     if combine:
03114         return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName)
03115     else:
03116         return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
03117 
03118 def countedArray( expr ):
03119     """Helper to define a counted list of expressions.
03120        This helper defines a pattern of the form::
03121            integer expr expr expr...
03122        where the leading integer tells how many expr expressions follow.
03123        The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed.
03124     """
03125     arrayExpr = Forward()
03126     def countFieldParseAction(s,l,t):
03127         n = int(t[0])
03128         arrayExpr << (n and Group(And([expr]*n)) or Group(empty))
03129         return []
03130     return ( Word(nums).setName("arrayLen").setParseAction(countFieldParseAction, callDuringTry=True) + arrayExpr )
03131 
03132 def _flatten(L):
03133     if type(L) is not list: return [L]
03134     if L == []: return L
03135     return _flatten(L[0]) + _flatten(L[1:])
03136 
03137 def matchPreviousLiteral(expr):
03138     """Helper to define an expression that is indirectly defined from
03139        the tokens matched in a previous expression, that is, it looks
03140        for a 'repeat' of a previous expression.  For example::
03141            first = Word(nums)
03142            second = matchPreviousLiteral(first)
03143            matchExpr = first + ":" + second
03144        will match "1:1", but not "1:2".  Because this matches a
03145        previous literal, will also match the leading "1:1" in "1:10".
03146        If this is not desired, use matchPreviousExpr.
03147        Do *not* use with packrat parsing enabled.
03148     """
03149     rep = Forward()
03150     def copyTokenToRepeater(s,l,t):
03151         if t:
03152             if len(t) == 1:
03153                 rep << t[0]
03154             else:
03155                 # flatten t tokens
03156                 tflat = _flatten(t.asList())
03157                 rep << And( [ Literal(tt) for tt in tflat ] )
03158         else:
03159             rep << Empty()
03160     expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
03161     return rep
03162 
03163 def matchPreviousExpr(expr):
03164     """Helper to define an expression that is indirectly defined from
03165        the tokens matched in a previous expression, that is, it looks
03166        for a 'repeat' of a previous expression.  For example::
03167            first = Word(nums)
03168            second = matchPreviousExpr(first)
03169            matchExpr = first + ":" + second
03170        will match "1:1", but not "1:2".  Because this matches by
03171        expressions, will *not* match the leading "1:1" in "1:10";
03172        the expressions are evaluated first, and then compared, so
03173        "1" is compared with "10".
03174        Do *not* use with packrat parsing enabled.
03175     """
03176     rep = Forward()
03177     e2 = expr.copy()
03178     rep << e2
03179     def copyTokenToRepeater(s,l,t):
03180         matchTokens = _flatten(t.asList())
03181         def mustMatchTheseTokens(s,l,t):
03182             theseTokens = _flatten(t.asList())
03183             if  theseTokens != matchTokens:
03184                 raise ParseException("",0,"")
03185         rep.setParseAction( mustMatchTheseTokens, callDuringTry=True )
03186     expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
03187     return rep
03188 
03189 def _escapeRegexRangeChars(s):
03190     #~  escape these chars: ^-]
03191     for c in r"\^-]":
03192         s = s.replace(c,_bslash+c)
03193     s = s.replace("\n",r"\n")
03194     s = s.replace("\t",r"\t")
03195     return _ustr(s)
03196 
03197 def oneOf( strs, caseless=False, useRegex=True ):
03198     """Helper to quickly define a set of alternative Literals, and makes sure to do
03199        longest-first testing when there is a conflict, regardless of the input order,
03200        but returns a MatchFirst for best performance.
03201 
03202        Parameters:
03203         - strs - a string of space-delimited literals, or a list of string literals
03204         - caseless - (default=False) - treat all literals as caseless
03205         - useRegex - (default=True) - as an optimization, will generate a Regex
03206           object; otherwise, will generate a MatchFirst object (if caseless=True, or
03207           if creating a Regex raises an exception)
03208     """
03209     if caseless:
03210         isequal = ( lambda a,b: a.upper() == b.upper() )
03211         masks = ( lambda a,b: b.upper().startswith(a.upper()) )
03212         parseElementClass = CaselessLiteral
03213     else:
03214         isequal = ( lambda a,b: a == b )
03215         masks = ( lambda a,b: b.startswith(a) )
03216         parseElementClass = Literal
03217 
03218     if isinstance(strs,(list,tuple)):
03219         symbols = list(strs[:])
03220     elif isinstance(strs,basestring):
03221         symbols = strs.split()
03222     else:
03223         warnings.warn("Invalid argument to oneOf, expected string or list",
03224                 SyntaxWarning, stacklevel=2)
03225 
03226     i = 0
03227     while i < len(symbols)-1:
03228         cur = symbols[i]
03229         for j,other in enumerate(symbols[i+1:]):
03230             if ( isequal(other, cur) ):
03231                 del symbols[i+j+1]
03232                 break
03233             elif ( masks(cur, other) ):
03234                 del symbols[i+j+1]
03235                 symbols.insert(i,other)
03236                 cur = other
03237                 break
03238         else:
03239             i += 1
03240 
03241     if not caseless and useRegex:
03242         #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] ))
03243         try:
03244             if len(symbols)==len("".join(symbols)):
03245                 return Regex( "[%s]" % "".join( [ _escapeRegexRangeChars(sym) for sym in symbols] ) )
03246             else:
03247                 return Regex( "|".join( [ re.escape(sym) for sym in symbols] ) )
03248         except:
03249             warnings.warn("Exception creating Regex for oneOf, building MatchFirst",
03250                     SyntaxWarning, stacklevel=2)
03251 
03252 
03253     # last resort, just use MatchFirst
03254     return MatchFirst( [ parseElementClass(sym) for sym in symbols ] )
03255 
03256 def dictOf( key, value ):
03257     """Helper to easily and clearly define a dictionary by specifying the respective patterns
03258        for the key and value.  Takes care of defining the Dict, ZeroOrMore, and Group tokens
03259        in the proper order.  The key pattern can include delimiting markers or punctuation,
03260        as long as they are suppressed, thereby leaving the significant key text.  The value
03261        pattern can include named results, so that the Dict results can include named token
03262        fields.
03263     """
03264     return Dict( ZeroOrMore( Group ( key + value ) ) )
03265 
03266 def originalTextFor(expr, asString=True):
03267     """Helper to return the original, untokenized text for a given expression.  Useful to
03268        restore the parsed fields of an HTML start tag into the raw tag text itself, or to
03269        revert separate tokens with intervening whitespace back to the original matching
03270        input text. Simpler to use than the parse action keepOriginalText, and does not
03271        require the inspect module to chase up the call stack.  By default, returns a 
03272        string containing the original parsed text.  
03273        
03274        If the optional asString argument is passed as False, then the return value is a 
03275        ParseResults containing any results names that were originally matched, and a 
03276        single token containing the original matched text from the input string.  So if 
03277        the expression passed to originalTextFor contains expressions with defined
03278        results names, you must set asString to False if you want to preserve those
03279        results name values."""
03280     locMarker = Empty().setParseAction(lambda s,loc,t: loc)
03281     matchExpr = locMarker("_original_start") + expr + locMarker("_original_end")
03282     if asString:
03283         extractText = lambda s,l,t: s[t._original_start:t._original_end]
03284     else:
03285         def extractText(s,l,t):
03286             del t[:]
03287             t.insert(0, s[t._original_start:t._original_end])
03288             del t["_original_start"]
03289             del t["_original_end"]
03290     matchExpr.setParseAction(extractText)
03291     return matchExpr
03292     
03293 # convenience constants for positional expressions
03294 empty       = Empty().setName("empty")
03295 lineStart   = LineStart().setName("lineStart")
03296 lineEnd     = LineEnd().setName("lineEnd")
03297 stringStart = StringStart().setName("stringStart")
03298 stringEnd   = StringEnd().setName("stringEnd")
03299 
03300 _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])
03301 _printables_less_backslash = "".join([ c for c in printables if c not in  r"\]" ])
03302 _escapedHexChar = Combine( Suppress(_bslash + "0x") + Word(hexnums) ).setParseAction(lambda s,l,t:unichr(int(t[0],16)))
03303 _escapedOctChar = Combine( Suppress(_bslash) + Word("0","01234567") ).setParseAction(lambda s,l,t:unichr(int(t[0],8)))
03304 _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(_printables_less_backslash,exact=1)
03305 _charRange = Group(_singleChar + Suppress("-") + _singleChar)
03306 _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
03307 
03308 _expanded = lambda p: (isinstance(p,ParseResults) and ''.join([ unichr(c) for c in range(ord(p[0]),ord(p[1])+1) ]) or p)
03309 
03310 def srange(s):
03311     r"""Helper to easily define string ranges for use in Word construction.  Borrows
03312        syntax from regexp '[]' string range definitions::
03313           srange("[0-9]")   -> "0123456789"
03314           srange("[a-z]")   -> "abcdefghijklmnopqrstuvwxyz"
03315           srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
03316        The input string must be enclosed in []'s, and the returned string is the expanded
03317        character set joined into a single string.
03318        The values enclosed in the []'s may be::
03319           a single character
03320           an escaped character with a leading backslash (such as \- or \])
03321           an escaped hex character with a leading '\0x' (\0x21, which is a '!' character)
03322           an escaped octal character with a leading '\0' (\041, which is a '!' character)
03323           a range of any of the above, separated by a dash ('a-z', etc.)
03324           any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.)
03325     """
03326     try:
03327         return "".join([_expanded(part) for part in _reBracketExpr.parseString(s).body])
03328     except:
03329         return ""
03330 
03331 def matchOnlyAtCol(n):
03332     """Helper method for defining parse actions that require matching at a specific
03333        column in the input text.
03334     """
03335     def verifyCol(strg,locn,toks):
03336         if col(locn,strg) != n:
03337             raise ParseException(strg,locn,"matched token not at column %d" % n)
03338     return verifyCol
03339 
03340 def replaceWith(replStr):
03341     """Helper method for common parse actions that simply return a literal value.  Especially
03342        useful when used with transformString().
03343     """
03344     def _replFunc(*args):
03345         return [replStr]
03346     return _replFunc
03347 
03348 def removeQuotes(s,l,t):
03349     """Helper parse action for removing quotation marks from parsed quoted strings.
03350        To use, add this parse action to quoted string using::
03351          quotedString.setParseAction( removeQuotes )
03352     """
03353     return t[0][1:-1]
03354 
03355 def upcaseTokens(s,l,t):
03356     """Helper parse action to convert tokens to upper case."""
03357     return [ tt.upper() for tt in map(_ustr,t) ]
03358 
03359 def downcaseTokens(s,l,t):
03360     """Helper parse action to convert tokens to lower case."""
03361     return [ tt.lower() for tt in map(_ustr,t) ]
03362 
03363 def keepOriginalText(s,startLoc,t):
03364     """Helper parse action to preserve original parsed text,
03365        overriding any nested parse actions."""
03366     try:
03367         endloc = getTokensEndLoc()
03368     except ParseException:
03369         raise ParseFatalException("incorrect usage of keepOriginalText - may only be called as a parse action")
03370     del t[:]
03371     t += ParseResults(s[startLoc:endloc])
03372     return t
03373 
03374 def getTokensEndLoc():
03375     """Method to be called from within a parse action to determine the end
03376        location of the parsed tokens."""
03377     import inspect
03378     fstack = inspect.stack()
03379     try:
03380         # search up the stack (through intervening argument normalizers) for correct calling routine
03381         for f in fstack[2:]:
03382             if f[3] == "_parseNoCache":
03383                 endloc = f[0].f_locals["loc"]
03384                 return endloc
03385         else:
03386             raise ParseFatalException("incorrect usage of getTokensEndLoc - may only be called from within a parse action")
03387     finally:
03388         del fstack
03389 
03390 def _makeTags(tagStr, xml):
03391     """Internal helper to construct opening and closing tag expressions, given a tag name"""
03392     if isinstance(tagStr,basestring):
03393         resname = tagStr
03394         tagStr = Keyword(tagStr, caseless=not xml)
03395     else:
03396         resname = tagStr.name
03397 
03398     tagAttrName = Word(alphas,alphanums+"_-:")
03399     if (xml):
03400         tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes )
03401         openTag = Suppress("<") + tagStr + \
03402                 Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \
03403                 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
03404     else:
03405         printablesLessRAbrack = "".join( [ c for c in printables if c not in ">" ] )
03406         tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack)
03407         openTag = Suppress("<") + tagStr + \
03408                 Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \
03409                 Optional( Suppress("=") + tagAttrValue ) ))) + \
03410                 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
03411     closeTag = Combine(_L("</") + tagStr + ">")
03412 
03413     openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % tagStr)
03414     closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % tagStr)
03415 
03416     return openTag, closeTag
03417 
03418 def makeHTMLTags(tagStr):
03419     """Helper to construct opening and closing tag expressions for HTML, given a tag name"""
03420     return _makeTags( tagStr, False )
03421 
03422 def makeXMLTags(tagStr):
03423     """Helper to construct opening and closing tag expressions for XML, given a tag name"""
03424     return _makeTags( tagStr, True )
03425 
03426 def withAttribute(*args,**attrDict):
03427     """Helper to create a validating parse action to be used with start tags created
03428        with makeXMLTags or makeHTMLTags. Use withAttribute to qualify a starting tag
03429        with a required attribute value, to avoid false matches on common tags such as
03430        <TD> or <DIV>.
03431 
03432        Call withAttribute with a series of attribute names and values. Specify the list
03433        of filter attributes names and values as:
03434         - keyword arguments, as in (class="Customer",align="right"), or
03435         - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") )
03436        For attribute names with a namespace prefix, you must use the second form.  Attribute
03437        names are matched insensitive to upper/lower case.
03438 
03439        To verify that the attribute exists, but without specifying a value, pass
03440        withAttribute.ANY_VALUE as the value.
03441        """
03442     if args:
03443         attrs = args[:]
03444     else:
03445         attrs = attrDict.items()
03446     attrs = [(k,v) for k,v in attrs]
03447     def pa(s,l,tokens):
03448         for attrName,attrValue in attrs:
03449             if attrName not in tokens:
03450                 raise ParseException(s,l,"no matching attribute " + attrName)
03451             if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue:
03452                 raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" %
03453                                             (attrName, tokens[attrName], attrValue))
03454     return pa
03455 withAttribute.ANY_VALUE = object()
03456 
03457 opAssoc = _Constants()
03458 opAssoc.LEFT = object()
03459 opAssoc.RIGHT = object()
03460 
03461 def operatorPrecedence( baseExpr, opList ):
03462     """Helper method for constructing grammars of expressions made up of
03463        operators working in a precedence hierarchy.  Operators may be unary or
03464        binary, left- or right-associative.  Parse actions can also be attached
03465        to operator expressions.
03466 
03467        Parameters:
03468         - baseExpr - expression representing the most basic element for the nested
03469         - opList - list of tuples, one for each operator precedence level in the
03470           expression grammar; each tuple is of the form
03471           (opExpr, numTerms, rightLeftAssoc, parseAction), where:
03472            - opExpr is the pyparsing expression for the operator;
03473               may also be a string, which will be converted to a Literal;
03474               if numTerms is 3, opExpr is a tuple of two expressions, for the
03475               two operators separating the 3 terms
03476            - numTerms is the number of terms for this operator (must
03477               be 1, 2, or 3)
03478            - rightLeftAssoc is the indicator whether the operator is
03479               right or left associative, using the pyparsing-defined
03480               constants opAssoc.RIGHT and opAssoc.LEFT.
03481            - parseAction is the parse action to be associated with
03482               expressions matching this operator expression (the
03483               parse action tuple member may be omitted)
03484     """
03485     ret = Forward()
03486     lastExpr = baseExpr | ( Suppress('(') + ret + Suppress(')') )
03487     for i,operDef in enumerate(opList):
03488         opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4]
03489         if arity == 3:
03490             if opExpr is None or len(opExpr) != 2:
03491                 raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions")
03492             opExpr1, opExpr2 = opExpr
03493         thisExpr = Forward()#.setName("expr%d" % i)
03494         if rightLeftAssoc == opAssoc.LEFT:
03495             if arity == 1:
03496                 matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) )
03497             elif arity == 2:
03498                 if opExpr is not None:
03499                     matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) )
03500                 else:
03501                     matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) )
03502             elif arity == 3:
03503                 matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \
03504                             Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr )
03505             else:
03506                 raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
03507         elif rightLeftAssoc == opAssoc.RIGHT:
03508             if arity == 1:
03509                 # try to avoid LR with this extra test
03510                 if not isinstance(opExpr, Optional):
03511                     opExpr = Optional(opExpr)
03512                 matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr )
03513             elif arity == 2:
03514                 if opExpr is not None:
03515                     matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) )
03516                 else:
03517                     matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) )
03518             elif arity == 3:
03519                 matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \
03520                             Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr )
03521             else:
03522                 raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
03523         else:
03524             raise ValueError("operator must indicate right or left associativity")
03525         if pa:
03526             matchExpr.setParseAction( pa )
03527         thisExpr << ( matchExpr | lastExpr )
03528         lastExpr = thisExpr
03529     ret << lastExpr
03530     return ret
03531 
03532 dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName("string enclosed in double quotes")
03533 sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName("string enclosed in single quotes")
03534 quotedString = Regex(r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*')''').setName("quotedString using single or double quotes")
03535 unicodeString = Combine(_L('u') + quotedString.copy())
03536 
03537 def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString):
03538     """Helper method for defining nested lists enclosed in opening and closing
03539        delimiters ("(" and ")" are the default).
03540 
03541        Parameters:
03542         - opener - opening character for a nested list (default="("); can also be a pyparsing expression
03543         - closer - closing character for a nested list (default=")"); can also be a pyparsing expression
03544         - content - expression for items within the nested lists (default=None)
03545         - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString)
03546 
03547        If an expression is not provided for the content argument, the nested
03548        expression will capture all whitespace-delimited content between delimiters
03549        as a list of separate values.
03550 
03551        Use the ignoreExpr argument to define expressions that may contain
03552        opening or closing characters that should not be treated as opening
03553        or closing characters for nesting, such as quotedString or a comment
03554        expression.  Specify multiple expressions using an Or or MatchFirst.
03555        The default is quotedString, but if no expressions are to be ignored,
03556        then pass None for this argument.
03557     """
03558     if opener == closer:
03559         raise ValueError("opening and closing strings cannot be the same")
03560     if content is None:
03561         if isinstance(opener,basestring) and isinstance(closer,basestring):
03562             if len(opener) == 1 and len(closer)==1:
03563                 if ignoreExpr is not None:
03564                     content = (Combine(OneOrMore(~ignoreExpr +
03565                                     CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1))
03566                                 ).setParseAction(lambda t:t[0].strip()))
03567                 else:
03568                     content = (empty+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS
03569                                 ).setParseAction(lambda t:t[0].strip()))
03570             else:
03571                 if ignoreExpr is not None:
03572                     content = (Combine(OneOrMore(~ignoreExpr + 
03573                                     ~Literal(opener) + ~Literal(closer) +
03574                                     CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
03575                                 ).setParseAction(lambda t:t[0].strip()))
03576                 else:
03577                     content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) +
03578                                     CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
03579                                 ).setParseAction(lambda t:t[0].strip()))
03580         else:
03581             raise ValueError("opening and closing arguments must be strings if no content expression is given")
03582     ret = Forward()
03583     if ignoreExpr is not None:
03584         ret << Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) )
03585     else:
03586         ret << Group( Suppress(opener) + ZeroOrMore( ret | content )  + Suppress(closer) )
03587     return ret
03588 
03589 def indentedBlock(blockStatementExpr, indentStack, indent=True):
03590     """Helper method for defining space-delimited indentation blocks, such as
03591        those used to define block statements in Python source code.
03592 
03593        Parameters:
03594         - blockStatementExpr - expression defining syntax of statement that
03595             is repeated within the indented block
03596         - indentStack - list created by caller to manage indentation stack
03597             (multiple statementWithIndentedBlock expressions within a single grammar
03598             should share a common indentStack)
03599         - indent - boolean indicating whether block must be indented beyond the
03600             the current level; set to False for block of left-most statements
03601             (default=True)
03602 
03603        A valid block must contain at least one blockStatement.
03604     """
03605     def checkPeerIndent(s,l,t):
03606         if l >= len(s): return
03607         curCol = col(l,s)
03608         if curCol != indentStack[-1]:
03609             if curCol > indentStack[-1]:
03610                 raise ParseFatalException(s,l,"illegal nesting")
03611             raise ParseException(s,l,"not a peer entry")
03612 
03613     def checkSubIndent(s,l,t):
03614         curCol = col(l,s)
03615         if curCol > indentStack[-1]:
03616             indentStack.append( curCol )
03617         else:
03618             raise ParseException(s,l,"not a subentry")
03619 
03620     def checkUnindent(s,l,t):
03621         if l >= len(s): return
03622         curCol = col(l,s)
03623         if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]):
03624             raise ParseException(s,l,"not an unindent")
03625         indentStack.pop()
03626 
03627     NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress())
03628     INDENT = Empty() + Empty().setParseAction(checkSubIndent)
03629     PEER   = Empty().setParseAction(checkPeerIndent)
03630     UNDENT = Empty().setParseAction(checkUnindent)
03631     if indent:
03632         smExpr = Group( Optional(NL) +
03633             FollowedBy(blockStatementExpr) +
03634             INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT)
03635     else:
03636         smExpr = Group( Optional(NL) +
03637             (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) )
03638     blockStatementExpr.ignore(_bslash + LineEnd())
03639     return smExpr
03640 
03641 alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
03642 punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
03643 
03644 anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:"))
03645 commonHTMLEntity = Combine(_L("&") + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";").streamline()
03646 _htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),'><& "'))
03647 replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None
03648 
03649 # it's easy to get these comment structures wrong - they're very common, so may as well make them available
03650 cStyleComment = Regex(r"/\*(?:[^*]*\*+)+?/").setName("C style comment")
03651 
03652 htmlComment = Regex(r"<!--[\s\S]*?-->")
03653 restOfLine = Regex(r".*").leaveWhitespace()
03654 dblSlashComment = Regex(r"\/\/(\\\n|.)*").setName("// comment")
03655 cppStyleComment = Regex(r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?<!\\)|\Z))").setName("C++ style comment")
03656 
03657 javaStyleComment = cppStyleComment
03658 pythonStyleComment = Regex(r"#.*").setName("Python style comment")
03659 _noncomma = "".join( [ c for c in printables if c != "," ] )
03660 _commasepitem = Combine(OneOrMore(Word(_noncomma) +
03661                                   Optional( Word(" \t") +
03662                                             ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem")
03663 commaSeparatedList = delimitedList( Optional( quotedString | _commasepitem, default="") ).setName("commaSeparatedList")
03664 
03665 
03666 if __name__ == "__main__":
03667 
03668     def test( teststring ):
03669         try:
03670             tokens = simpleSQL.parseString( teststring )
03671             tokenlist = tokens.asList()
03672             print (teststring + "->"   + str(tokenlist))
03673             print ("tokens = "         + str(tokens))
03674             print ("tokens.columns = " + str(tokens.columns))
03675             print ("tokens.tables = "  + str(tokens.tables))
03676             print (tokens.asXML("SQL",True))
03677         except ParseBaseException,err:
03678             print (teststring + "->")
03679             print (err.line)
03680             print (" "*(err.column-1) + "^")
03681             print (err)
03682         print()
03683 
03684     selectToken    = CaselessLiteral( "select" )
03685     fromToken      = CaselessLiteral( "from" )
03686 
03687     ident          = Word( alphas, alphanums + "_$" )
03688     columnName     = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
03689     columnNameList = Group( delimitedList( columnName ) )#.setName("columns")
03690     tableName      = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
03691     tableNameList  = Group( delimitedList( tableName ) )#.setName("tables")
03692     simpleSQL      = ( selectToken + \
03693                      ( '*' | columnNameList ).setResultsName( "columns" ) + \
03694                      fromToken + \
03695                      tableNameList.setResultsName( "tables" ) )
03696 
03697     test( "SELECT * from XYZZY, ABC" )
03698     test( "select * from SYS.XYZZY" )
03699     test( "Select A from Sys.dual" )
03700     test( "Select AA,BB,CC from Sys.dual" )
03701     test( "Select A, B, C from Sys.dual" )
03702     test( "Select A, B, C from Sys.dual" )
03703     test( "Xelect A, B, C from Sys.dual" )
03704     test( "Select A, B, C frox Sys.dual" )
03705     test( "Select" )
03706     test( "Select ^^^ frox Sys.dual" )
03707     test( "Select A, B, C from Sys.dual, Table2   " )
 All Classes Namespaces Files Functions Variables Properties