BlockIt
|
00001 # module pyparsing.py 00002 # 00003 # Copyright (c) 2003-2009 Paul T. McGuire 00004 # 00005 # Permission is hereby granted, free of charge, to any person obtaining 00006 # a copy of this software and associated documentation files (the 00007 # "Software"), to deal in the Software without restriction, including 00008 # without limitation the rights to use, copy, modify, merge, publish, 00009 # distribute, sublicense, and/or sell copies of the Software, and to 00010 # permit persons to whom the Software is furnished to do so, subject to 00011 # the following conditions: 00012 # 00013 # The above copyright notice and this permission notice shall be 00014 # included in all copies or substantial portions of the Software. 00015 # 00016 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 00017 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 00018 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 00019 # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 00020 # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 00021 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 00022 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 00023 # 00024 #from __future__ import generators 00025 00026 __doc__ = \ 00027 """ 00028 pyparsing module - Classes and methods to define and execute parsing grammars 00029 00030 The pyparsing module is an alternative approach to creating and executing simple grammars, 00031 vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you 00032 don't need to learn a new syntax for defining grammars or matching expressions - the parsing module 00033 provides a library of classes that you use to construct the grammar directly in Python. 00034 00035 Here is a program to parse "Hello, World!" (or any greeting of the form "<salutation>, <addressee>!"):: 00036 00037 from pyparsing import Word, alphas 00038 00039 # define grammar of a greeting 00040 greet = Word( alphas ) + "," + Word( alphas ) + "!" 00041 00042 hello = "Hello, World!" 00043 print hello, "->", greet.parseString( hello ) 00044 00045 The program outputs the following:: 00046 00047 Hello, World! -> ['Hello', ',', 'World', '!'] 00048 00049 The Python representation of the grammar is quite readable, owing to the self-explanatory 00050 class names, and the use of '+', '|' and '^' operators. 00051 00052 The parsed results returned from parseString() can be accessed as a nested list, a dictionary, or an 00053 object with named attributes. 00054 00055 The pyparsing module handles some of the problems that are typically vexing when writing text parsers: 00056 - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.) 00057 - quoted strings 00058 - embedded comments 00059 """ 00060 00061 __version__ = "1.5.2" 00062 __versionTime__ = "17 February 2009 19:45" 00063 __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>" 00064 00065 import string 00066 from weakref import ref as wkref 00067 import copy 00068 import sys 00069 import warnings 00070 import re 00071 import sre_constants 00072 #~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) ) 00073 00074 __all__ = [ 00075 'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty', 00076 'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal', 00077 'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or', 00078 'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException', 00079 'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException', 00080 'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', 'Upcase', 00081 'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', 00082 'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col', 00083 'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString', 00084 'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'getTokensEndLoc', 'hexnums', 00085 'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno', 00086 'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral', 00087 'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables', 00088 'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity', 00089 'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd', 00090 'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute', 00091 'indentedBlock', 'originalTextFor', 00092 ] 00093 00094 00095 """ 00096 Detect if we are running version 3.X and make appropriate changes 00097 Robert A. Clark 00098 """ 00099 if sys.version_info[0] > 2: 00100 _PY3K = True 00101 _MAX_INT = sys.maxsize 00102 basestring = str 00103 else: 00104 _PY3K = False 00105 _MAX_INT = sys.maxint 00106 00107 if not _PY3K: 00108 def _ustr(obj): 00109 """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries 00110 str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It 00111 then < returns the unicode object | encodes it with the default encoding | ... >. 00112 """ 00113 if isinstance(obj,unicode): 00114 return obj 00115 00116 try: 00117 # If this works, then _ustr(obj) has the same behaviour as str(obj), so 00118 # it won't break any existing code. 00119 return str(obj) 00120 00121 except UnicodeEncodeError: 00122 # The Python docs (http://docs.python.org/ref/customization.html#l2h-182) 00123 # state that "The return value must be a string object". However, does a 00124 # unicode object (being a subclass of basestring) count as a "string 00125 # object"? 00126 # If so, then return a unicode object: 00127 return unicode(obj) 00128 # Else encode it... but how? There are many choices... :) 00129 # Replace unprintables with escape codes? 00130 #return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors') 00131 # Replace unprintables with question marks? 00132 #return unicode(obj).encode(sys.getdefaultencoding(), 'replace') 00133 # ... 00134 else: 00135 _ustr = str 00136 unichr = chr 00137 00138 if not _PY3K: 00139 def _str2dict(strg): 00140 return dict( [(c,0) for c in strg] ) 00141 else: 00142 _str2dict = set 00143 00144 def _xml_escape(data): 00145 """Escape &, <, >, ", ', etc. in a string of data.""" 00146 00147 # ampersand must be replaced first 00148 from_symbols = '&><"\'' 00149 to_symbols = ['&'+s+';' for s in "amp gt lt quot apos".split()] 00150 for from_,to_ in zip(from_symbols, to_symbols): 00151 data = data.replace(from_, to_) 00152 return data 00153 00154 class _Constants(object): 00155 pass 00156 00157 if not _PY3K: 00158 alphas = string.lowercase + string.uppercase 00159 else: 00160 alphas = string.ascii_lowercase + string.ascii_uppercase 00161 nums = string.digits 00162 hexnums = nums + "ABCDEFabcdef" 00163 alphanums = alphas + nums 00164 _bslash = chr(92) 00165 printables = "".join( [ c for c in string.printable if c not in string.whitespace ] ) 00166 00167 class ParseBaseException(Exception): 00168 """base exception class for all parsing runtime exceptions""" 00169 # Performance tuning: we construct a *lot* of these, so keep this 00170 # constructor as small and fast as possible 00171 def __init__( self, pstr, loc=0, msg=None, elem=None ): 00172 self.loc = loc 00173 if msg is None: 00174 self.msg = pstr 00175 self.pstr = "" 00176 else: 00177 self.msg = msg 00178 self.pstr = pstr 00179 self.parserElement = elem 00180 00181 def __getattr__( self, aname ): 00182 """supported attributes by name are: 00183 - lineno - returns the line number of the exception text 00184 - col - returns the column number of the exception text 00185 - line - returns the line containing the exception text 00186 """ 00187 if( aname == "lineno" ): 00188 return lineno( self.loc, self.pstr ) 00189 elif( aname in ("col", "column") ): 00190 return col( self.loc, self.pstr ) 00191 elif( aname == "line" ): 00192 return line( self.loc, self.pstr ) 00193 else: 00194 raise AttributeError(aname) 00195 00196 def __str__( self ): 00197 return "%s (at char %d), (line:%d, col:%d)" % \ 00198 ( self.msg, self.loc, self.lineno, self.column ) 00199 def __repr__( self ): 00200 return _ustr(self) 00201 def markInputline( self, markerString = ">!<" ): 00202 """Extracts the exception line from the input string, and marks 00203 the location of the exception with a special symbol. 00204 """ 00205 line_str = self.line 00206 line_column = self.column - 1 00207 if markerString: 00208 line_str = "".join( [line_str[:line_column], 00209 markerString, line_str[line_column:]]) 00210 return line_str.strip() 00211 def __dir__(self): 00212 return "loc msg pstr parserElement lineno col line " \ 00213 "markInputLine __str__ __repr__".split() 00214 00215 class ParseException(ParseBaseException): 00216 """exception thrown when parse expressions don't match class; 00217 supported attributes by name are: 00218 - lineno - returns the line number of the exception text 00219 - col - returns the column number of the exception text 00220 - line - returns the line containing the exception text 00221 """ 00222 pass 00223 00224 class ParseFatalException(ParseBaseException): 00225 """user-throwable exception thrown when inconsistent parse content 00226 is found; stops all parsing immediately""" 00227 pass 00228 00229 class ParseSyntaxException(ParseFatalException): 00230 """just like ParseFatalException, but thrown internally when an 00231 ErrorStop indicates that parsing is to stop immediately because 00232 an unbacktrackable syntax error has been found""" 00233 def __init__(self, pe): 00234 super(ParseSyntaxException, self).__init__( 00235 pe.pstr, pe.loc, pe.msg, pe.parserElement) 00236 00237 #~ class ReparseException(ParseBaseException): 00238 #~ """Experimental class - parse actions can raise this exception to cause 00239 #~ pyparsing to reparse the input string: 00240 #~ - with a modified input string, and/or 00241 #~ - with a modified start location 00242 #~ Set the values of the ReparseException in the constructor, and raise the 00243 #~ exception in a parse action to cause pyparsing to use the new string/location. 00244 #~ Setting the values as None causes no change to be made. 00245 #~ """ 00246 #~ def __init_( self, newstring, restartLoc ): 00247 #~ self.newParseText = newstring 00248 #~ self.reparseLoc = restartLoc 00249 00250 class RecursiveGrammarException(Exception): 00251 """exception thrown by validate() if the grammar could be improperly recursive""" 00252 def __init__( self, parseElementList ): 00253 self.parseElementTrace = parseElementList 00254 00255 def __str__( self ): 00256 return "RecursiveGrammarException: %s" % self.parseElementTrace 00257 00258 class _ParseResultsWithOffset(object): 00259 def __init__(self,p1,p2): 00260 self.tup = (p1,p2) 00261 def __getitem__(self,i): 00262 return self.tup[i] 00263 def __repr__(self): 00264 return repr(self.tup) 00265 def setOffset(self,i): 00266 self.tup = (self.tup[0],i) 00267 00268 class ParseResults(object): 00269 """Structured parse results, to provide multiple means of access to the parsed data: 00270 - as a list (len(results)) 00271 - by list index (results[0], results[1], etc.) 00272 - by attribute (results.<resultsName>) 00273 """ 00274 __slots__ = ( "__toklist", "__tokdict", "__doinit", "__name", "__parent", "__accumNames", "__weakref__" ) 00275 def __new__(cls, toklist, name=None, asList=True, modal=True ): 00276 if isinstance(toklist, cls): 00277 return toklist 00278 retobj = object.__new__(cls) 00279 retobj.__doinit = True 00280 return retobj 00281 00282 # Performance tuning: we construct a *lot* of these, so keep this 00283 # constructor as small and fast as possible 00284 def __init__( self, toklist, name=None, asList=True, modal=True ): 00285 if self.__doinit: 00286 self.__doinit = False 00287 self.__name = None 00288 self.__parent = None 00289 self.__accumNames = {} 00290 if isinstance(toklist, list): 00291 self.__toklist = toklist[:] 00292 else: 00293 self.__toklist = [toklist] 00294 self.__tokdict = dict() 00295 00296 if name: 00297 if not modal: 00298 self.__accumNames[name] = 0 00299 if isinstance(name,int): 00300 name = _ustr(name) # will always return a str, but use _ustr for consistency 00301 self.__name = name 00302 if not toklist in (None,'',[]): 00303 if isinstance(toklist,basestring): 00304 toklist = [ toklist ] 00305 if asList: 00306 if isinstance(toklist,ParseResults): 00307 self[name] = _ParseResultsWithOffset(toklist.copy(),0) 00308 else: 00309 self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0) 00310 self[name].__name = name 00311 else: 00312 try: 00313 self[name] = toklist[0] 00314 except (KeyError,TypeError,IndexError): 00315 self[name] = toklist 00316 00317 def __getitem__( self, i ): 00318 if isinstance( i, (int,slice) ): 00319 return self.__toklist[i] 00320 else: 00321 if i not in self.__accumNames: 00322 return self.__tokdict[i][-1][0] 00323 else: 00324 return ParseResults([ v[0] for v in self.__tokdict[i] ]) 00325 00326 def __setitem__( self, k, v ): 00327 if isinstance(v,_ParseResultsWithOffset): 00328 self.__tokdict[k] = self.__tokdict.get(k,list()) + [v] 00329 sub = v[0] 00330 elif isinstance(k,int): 00331 self.__toklist[k] = v 00332 sub = v 00333 else: 00334 self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)] 00335 sub = v 00336 if isinstance(sub,ParseResults): 00337 sub.__parent = wkref(self) 00338 00339 def __delitem__( self, i ): 00340 if isinstance(i,(int,slice)): 00341 mylen = len( self.__toklist ) 00342 del self.__toklist[i] 00343 00344 # convert int to slice 00345 if isinstance(i, int): 00346 if i < 0: 00347 i += mylen 00348 i = slice(i, i+1) 00349 # get removed indices 00350 removed = list(range(*i.indices(mylen))) 00351 removed.reverse() 00352 # fixup indices in token dictionary 00353 for name in self.__tokdict: 00354 occurrences = self.__tokdict[name] 00355 for j in removed: 00356 for k, (value, position) in enumerate(occurrences): 00357 occurrences[k] = _ParseResultsWithOffset(value, position - (position > j)) 00358 else: 00359 del self.__tokdict[i] 00360 00361 def __contains__( self, k ): 00362 return k in self.__tokdict 00363 00364 def __len__( self ): return len( self.__toklist ) 00365 def __bool__(self): return len( self.__toklist ) > 0 00366 __nonzero__ = __bool__ 00367 def __iter__( self ): return iter( self.__toklist ) 00368 def __reversed__( self ): return iter( reversed(self.__toklist) ) 00369 def keys( self ): 00370 """Returns all named result keys.""" 00371 return self.__tokdict.keys() 00372 00373 def pop( self, index=-1 ): 00374 """Removes and returns item at specified index (default=last). 00375 Will work with either numeric indices or dict-key indicies.""" 00376 ret = self[index] 00377 del self[index] 00378 return ret 00379 00380 def get(self, key, defaultValue=None): 00381 """Returns named result matching the given key, or if there is no 00382 such name, then returns the given defaultValue or None if no 00383 defaultValue is specified.""" 00384 if key in self: 00385 return self[key] 00386 else: 00387 return defaultValue 00388 00389 def insert( self, index, insStr ): 00390 self.__toklist.insert(index, insStr) 00391 # fixup indices in token dictionary 00392 for name in self.__tokdict: 00393 occurrences = self.__tokdict[name] 00394 for k, (value, position) in enumerate(occurrences): 00395 occurrences[k] = _ParseResultsWithOffset(value, position + (position > index)) 00396 00397 def items( self ): 00398 """Returns all named result keys and values as a list of tuples.""" 00399 return [(k,self[k]) for k in self.__tokdict] 00400 00401 def values( self ): 00402 """Returns all named result values.""" 00403 return [ v[-1][0] for v in self.__tokdict.values() ] 00404 00405 def __getattr__( self, name ): 00406 if name not in self.__slots__: 00407 if name in self.__tokdict: 00408 if name not in self.__accumNames: 00409 return self.__tokdict[name][-1][0] 00410 else: 00411 return ParseResults([ v[0] for v in self.__tokdict[name] ]) 00412 else: 00413 return "" 00414 return None 00415 00416 def __add__( self, other ): 00417 ret = self.copy() 00418 ret += other 00419 return ret 00420 00421 def __iadd__( self, other ): 00422 if other.__tokdict: 00423 offset = len(self.__toklist) 00424 addoffset = ( lambda a: (a<0 and offset) or (a+offset) ) 00425 otheritems = other.__tokdict.items() 00426 otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) ) 00427 for (k,vlist) in otheritems for v in vlist] 00428 for k,v in otherdictitems: 00429 self[k] = v 00430 if isinstance(v[0],ParseResults): 00431 v[0].__parent = wkref(self) 00432 00433 self.__toklist += other.__toklist 00434 self.__accumNames.update( other.__accumNames ) 00435 del other 00436 return self 00437 00438 def __repr__( self ): 00439 return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) ) 00440 00441 def __str__( self ): 00442 out = "[" 00443 sep = "" 00444 for i in self.__toklist: 00445 if isinstance(i, ParseResults): 00446 out += sep + _ustr(i) 00447 else: 00448 out += sep + repr(i) 00449 sep = ", " 00450 out += "]" 00451 return out 00452 00453 def _asStringList( self, sep='' ): 00454 out = [] 00455 for item in self.__toklist: 00456 if out and sep: 00457 out.append(sep) 00458 if isinstance( item, ParseResults ): 00459 out += item._asStringList() 00460 else: 00461 out.append( _ustr(item) ) 00462 return out 00463 00464 def asList( self ): 00465 """Returns the parse results as a nested list of matching tokens, all converted to strings.""" 00466 out = [] 00467 for res in self.__toklist: 00468 if isinstance(res,ParseResults): 00469 out.append( res.asList() ) 00470 else: 00471 out.append( res ) 00472 return out 00473 00474 def asDict( self ): 00475 """Returns the named parse results as dictionary.""" 00476 return dict( self.items() ) 00477 00478 def copy( self ): 00479 """Returns a new copy of a ParseResults object.""" 00480 ret = ParseResults( self.__toklist ) 00481 ret.__tokdict = self.__tokdict.copy() 00482 ret.__parent = self.__parent 00483 ret.__accumNames.update( self.__accumNames ) 00484 ret.__name = self.__name 00485 return ret 00486 00487 def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ): 00488 """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.""" 00489 nl = "\n" 00490 out = [] 00491 namedItems = dict( [ (v[1],k) for (k,vlist) in self.__tokdict.items() 00492 for v in vlist ] ) 00493 nextLevelIndent = indent + " " 00494 00495 # collapse out indents if formatting is not desired 00496 if not formatted: 00497 indent = "" 00498 nextLevelIndent = "" 00499 nl = "" 00500 00501 selfTag = None 00502 if doctag is not None: 00503 selfTag = doctag 00504 else: 00505 if self.__name: 00506 selfTag = self.__name 00507 00508 if not selfTag: 00509 if namedItemsOnly: 00510 return "" 00511 else: 00512 selfTag = "ITEM" 00513 00514 out += [ nl, indent, "<", selfTag, ">" ] 00515 00516 worklist = self.__toklist 00517 for i,res in enumerate(worklist): 00518 if isinstance(res,ParseResults): 00519 if i in namedItems: 00520 out += [ res.asXML(namedItems[i], 00521 namedItemsOnly and doctag is None, 00522 nextLevelIndent, 00523 formatted)] 00524 else: 00525 out += [ res.asXML(None, 00526 namedItemsOnly and doctag is None, 00527 nextLevelIndent, 00528 formatted)] 00529 else: 00530 # individual token, see if there is a name for it 00531 resTag = None 00532 if i in namedItems: 00533 resTag = namedItems[i] 00534 if not resTag: 00535 if namedItemsOnly: 00536 continue 00537 else: 00538 resTag = "ITEM" 00539 xmlBodyText = _xml_escape(_ustr(res)) 00540 out += [ nl, nextLevelIndent, "<", resTag, ">", 00541 xmlBodyText, 00542 "</", resTag, ">" ] 00543 00544 out += [ nl, indent, "</", selfTag, ">" ] 00545 return "".join(out) 00546 00547 def __lookup(self,sub): 00548 for k,vlist in self.__tokdict.items(): 00549 for v,loc in vlist: 00550 if sub is v: 00551 return k 00552 return None 00553 00554 def getName(self): 00555 """Returns the results name for this token expression.""" 00556 if self.__name: 00557 return self.__name 00558 elif self.__parent: 00559 par = self.__parent() 00560 if par: 00561 return par.__lookup(self) 00562 else: 00563 return None 00564 elif (len(self) == 1 and 00565 len(self.__tokdict) == 1 and 00566 self.__tokdict.values()[0][0][1] in (0,-1)): 00567 return self.__tokdict.keys()[0] 00568 else: 00569 return None 00570 00571 def dump(self,indent='',depth=0): 00572 """Diagnostic method for listing out the contents of a ParseResults. 00573 Accepts an optional indent argument so that this string can be embedded 00574 in a nested display of other data.""" 00575 out = [] 00576 out.append( indent+_ustr(self.asList()) ) 00577 keys = self.items() 00578 keys.sort() 00579 for k,v in keys: 00580 if out: 00581 out.append('\n') 00582 out.append( "%s%s- %s: " % (indent,(' '*depth), k) ) 00583 if isinstance(v,ParseResults): 00584 if v.keys(): 00585 #~ out.append('\n') 00586 out.append( v.dump(indent,depth+1) ) 00587 #~ out.append('\n') 00588 else: 00589 out.append(_ustr(v)) 00590 else: 00591 out.append(_ustr(v)) 00592 #~ out.append('\n') 00593 return "".join(out) 00594 00595 # add support for pickle protocol 00596 def __getstate__(self): 00597 return ( self.__toklist, 00598 ( self.__tokdict.copy(), 00599 self.__parent is not None and self.__parent() or None, 00600 self.__accumNames, 00601 self.__name ) ) 00602 00603 def __setstate__(self,state): 00604 self.__toklist = state[0] 00605 self.__tokdict, \ 00606 par, \ 00607 inAccumNames, \ 00608 self.__name = state[1] 00609 self.__accumNames = {} 00610 self.__accumNames.update(inAccumNames) 00611 if par is not None: 00612 self.__parent = wkref(par) 00613 else: 00614 self.__parent = None 00615 00616 def __dir__(self): 00617 return dir(super(ParseResults,self)) + self.keys() 00618 00619 def col (loc,strg): 00620 """Returns current column within a string, counting newlines as line separators. 00621 The first column is number 1. 00622 00623 Note: the default parsing behavior is to expand tabs in the input string 00624 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 00625 on parsing strings containing <TAB>s, and suggested methods to maintain a 00626 consistent view of the parsed string, the parse location, and line and column 00627 positions within the parsed string. 00628 """ 00629 return (loc<len(strg) and strg[loc] == '\n') and 1 or loc - strg.rfind("\n", 0, loc) 00630 00631 def lineno(loc,strg): 00632 """Returns current line number within a string, counting newlines as line separators. 00633 The first line is number 1. 00634 00635 Note: the default parsing behavior is to expand tabs in the input string 00636 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 00637 on parsing strings containing <TAB>s, and suggested methods to maintain a 00638 consistent view of the parsed string, the parse location, and line and column 00639 positions within the parsed string. 00640 """ 00641 return strg.count("\n",0,loc) + 1 00642 00643 def line( loc, strg ): 00644 """Returns the line of text containing loc within a string, counting newlines as line separators. 00645 """ 00646 lastCR = strg.rfind("\n", 0, loc) 00647 nextCR = strg.find("\n", loc) 00648 if nextCR > 0: 00649 return strg[lastCR+1:nextCR] 00650 else: 00651 return strg[lastCR+1:] 00652 00653 def _defaultStartDebugAction( instring, loc, expr ): 00654 print ("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )) 00655 00656 def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ): 00657 print ("Matched " + _ustr(expr) + " -> " + str(toks.asList())) 00658 00659 def _defaultExceptionDebugAction( instring, loc, expr, exc ): 00660 print ("Exception raised:" + _ustr(exc)) 00661 00662 def nullDebugAction(*args): 00663 """'Do-nothing' debug action, to suppress debugging output during parsing.""" 00664 pass 00665 00666 class ParserElement(object): 00667 """Abstract base level parser element class.""" 00668 DEFAULT_WHITE_CHARS = " \n\t\r" 00669 00670 def setDefaultWhitespaceChars( chars ): 00671 """Overrides the default whitespace chars 00672 """ 00673 ParserElement.DEFAULT_WHITE_CHARS = chars 00674 setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars) 00675 00676 def __init__( self, savelist=False ): 00677 self.parseAction = list() 00678 self.failAction = None 00679 #~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall 00680 self.strRepr = None 00681 self.resultsName = None 00682 self.saveAsList = savelist 00683 self.skipWhitespace = True 00684 self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 00685 self.copyDefaultWhiteChars = True 00686 self.mayReturnEmpty = False # used when checking for left-recursion 00687 self.keepTabs = False 00688 self.ignoreExprs = list() 00689 self.debug = False 00690 self.streamlined = False 00691 self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index 00692 self.errmsg = "" 00693 self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all) 00694 self.debugActions = ( None, None, None ) #custom debug actions 00695 self.re = None 00696 self.callPreparse = True # used to avoid redundant calls to preParse 00697 self.callDuringTry = False 00698 00699 def copy( self ): 00700 """Make a copy of this ParserElement. Useful for defining different parse actions 00701 for the same parsing pattern, using copies of the original parse element.""" 00702 cpy = copy.copy( self ) 00703 cpy.parseAction = self.parseAction[:] 00704 cpy.ignoreExprs = self.ignoreExprs[:] 00705 if self.copyDefaultWhiteChars: 00706 cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 00707 return cpy 00708 00709 def setName( self, name ): 00710 """Define name for this expression, for use in debugging.""" 00711 self.name = name 00712 self.errmsg = "Expected " + self.name 00713 if hasattr(self,"exception"): 00714 self.exception.msg = self.errmsg 00715 return self 00716 00717 def setResultsName( self, name, listAllMatches=False ): 00718 """Define name for referencing matching tokens as a nested attribute 00719 of the returned parse results. 00720 NOTE: this returns a *copy* of the original ParserElement object; 00721 this is so that the client can define a basic element, such as an 00722 integer, and reference it in multiple places with different names. 00723 """ 00724 newself = self.copy() 00725 newself.resultsName = name 00726 newself.modalResults = not listAllMatches 00727 return newself 00728 00729 def setBreak(self,breakFlag = True): 00730 """Method to invoke the Python pdb debugger when this element is 00731 about to be parsed. Set breakFlag to True to enable, False to 00732 disable. 00733 """ 00734 if breakFlag: 00735 _parseMethod = self._parse 00736 def breaker(instring, loc, doActions=True, callPreParse=True): 00737 import pdb 00738 pdb.set_trace() 00739 return _parseMethod( instring, loc, doActions, callPreParse ) 00740 breaker._originalParseMethod = _parseMethod 00741 self._parse = breaker 00742 else: 00743 if hasattr(self._parse,"_originalParseMethod"): 00744 self._parse = self._parse._originalParseMethod 00745 return self 00746 00747 def _normalizeParseActionArgs( f ): 00748 """Internal method used to decorate parse actions that take fewer than 3 arguments, 00749 so that all parse actions can be called as f(s,l,t).""" 00750 STAR_ARGS = 4 00751 00752 try: 00753 restore = None 00754 if isinstance(f,type): 00755 restore = f 00756 f = f.__init__ 00757 if not _PY3K: 00758 codeObj = f.func_code 00759 else: 00760 codeObj = f.code 00761 if codeObj.co_flags & STAR_ARGS: 00762 return f 00763 numargs = codeObj.co_argcount 00764 if not _PY3K: 00765 if hasattr(f,"im_self"): 00766 numargs -= 1 00767 else: 00768 if hasattr(f,"__self__"): 00769 numargs -= 1 00770 if restore: 00771 f = restore 00772 except AttributeError: 00773 try: 00774 if not _PY3K: 00775 call_im_func_code = f.__call__.im_func.func_code 00776 else: 00777 call_im_func_code = f.__code__ 00778 00779 # not a function, must be a callable object, get info from the 00780 # im_func binding of its bound __call__ method 00781 if call_im_func_code.co_flags & STAR_ARGS: 00782 return f 00783 numargs = call_im_func_code.co_argcount 00784 if not _PY3K: 00785 if hasattr(f.__call__,"im_self"): 00786 numargs -= 1 00787 else: 00788 if hasattr(f.__call__,"__self__"): 00789 numargs -= 0 00790 except AttributeError: 00791 if not _PY3K: 00792 call_func_code = f.__call__.func_code 00793 else: 00794 call_func_code = f.__call__.__code__ 00795 # not a bound method, get info directly from __call__ method 00796 if call_func_code.co_flags & STAR_ARGS: 00797 return f 00798 numargs = call_func_code.co_argcount 00799 if not _PY3K: 00800 if hasattr(f.__call__,"im_self"): 00801 numargs -= 1 00802 else: 00803 if hasattr(f.__call__,"__self__"): 00804 numargs -= 1 00805 00806 00807 #~ print ("adding function %s with %d args" % (f.func_name,numargs)) 00808 if numargs == 3: 00809 return f 00810 else: 00811 if numargs > 3: 00812 def tmp(s,l,t): 00813 return f(f.__call__.__self__, s,l,t) 00814 if numargs == 2: 00815 def tmp(s,l,t): 00816 return f(l,t) 00817 elif numargs == 1: 00818 def tmp(s,l,t): 00819 return f(t) 00820 else: #~ numargs == 0: 00821 def tmp(s,l,t): 00822 return f() 00823 try: 00824 tmp.__name__ = f.__name__ 00825 except (AttributeError,TypeError): 00826 # no need for special handling if attribute doesnt exist 00827 pass 00828 try: 00829 tmp.__doc__ = f.__doc__ 00830 except (AttributeError,TypeError): 00831 # no need for special handling if attribute doesnt exist 00832 pass 00833 try: 00834 tmp.__dict__.update(f.__dict__) 00835 except (AttributeError,TypeError): 00836 # no need for special handling if attribute doesnt exist 00837 pass 00838 return tmp 00839 _normalizeParseActionArgs = staticmethod(_normalizeParseActionArgs) 00840 00841 def setParseAction( self, *fns, **kwargs ): 00842 """Define action to perform when successfully matching parse element definition. 00843 Parse action fn is a callable method with 0-3 arguments, called as fn(s,loc,toks), 00844 fn(loc,toks), fn(toks), or just fn(), where: 00845 - s = the original string being parsed (see note below) 00846 - loc = the location of the matching substring 00847 - toks = a list of the matched tokens, packaged as a ParseResults object 00848 If the functions in fns modify the tokens, they can return them as the return 00849 value from fn, and the modified list of tokens will replace the original. 00850 Otherwise, fn does not need to return any value. 00851 00852 Note: the default parsing behavior is to expand tabs in the input string 00853 before starting the parsing process. See L{I{parseString}<parseString>} for more information 00854 on parsing strings containing <TAB>s, and suggested methods to maintain a 00855 consistent view of the parsed string, the parse location, and line and column 00856 positions within the parsed string. 00857 """ 00858 self.parseAction = list(map(self._normalizeParseActionArgs, list(fns))) 00859 self.callDuringTry = ("callDuringTry" in kwargs and kwargs["callDuringTry"]) 00860 return self 00861 00862 def addParseAction( self, *fns, **kwargs ): 00863 """Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}.""" 00864 self.parseAction += list(map(self._normalizeParseActionArgs, list(fns))) 00865 self.callDuringTry = self.callDuringTry or ("callDuringTry" in kwargs and kwargs["callDuringTry"]) 00866 return self 00867 00868 def setFailAction( self, fn ): 00869 """Define action to perform if parsing fails at this expression. 00870 Fail acton fn is a callable function that takes the arguments 00871 fn(s,loc,expr,err) where: 00872 - s = string being parsed 00873 - loc = location where expression match was attempted and failed 00874 - expr = the parse expression that failed 00875 - err = the exception thrown 00876 The function returns no value. It may throw ParseFatalException 00877 if it is desired to stop parsing immediately.""" 00878 self.failAction = fn 00879 return self 00880 00881 def _skipIgnorables( self, instring, loc ): 00882 exprsFound = True 00883 while exprsFound: 00884 exprsFound = False 00885 for e in self.ignoreExprs: 00886 try: 00887 while 1: 00888 loc,dummy = e._parse( instring, loc ) 00889 exprsFound = True 00890 except ParseException: 00891 pass 00892 return loc 00893 00894 def preParse( self, instring, loc ): 00895 if self.ignoreExprs: 00896 loc = self._skipIgnorables( instring, loc ) 00897 00898 if self.skipWhitespace: 00899 wt = self.whiteChars 00900 instrlen = len(instring) 00901 while loc < instrlen and instring[loc] in wt: 00902 loc += 1 00903 00904 return loc 00905 00906 def parseImpl( self, instring, loc, doActions=True ): 00907 return loc, [] 00908 00909 def postParse( self, instring, loc, tokenlist ): 00910 return tokenlist 00911 00912 #~ @profile 00913 def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ): 00914 debugging = ( self.debug ) #and doActions ) 00915 00916 if debugging or self.failAction: 00917 #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )) 00918 if (self.debugActions[0] ): 00919 self.debugActions[0]( instring, loc, self ) 00920 if callPreParse and self.callPreparse: 00921 preloc = self.preParse( instring, loc ) 00922 else: 00923 preloc = loc 00924 tokensStart = loc 00925 try: 00926 try: 00927 loc,tokens = self.parseImpl( instring, preloc, doActions ) 00928 except IndexError: 00929 raise ParseException( instring, len(instring), self.errmsg, self ) 00930 except ParseBaseException, err: 00931 #~ print ("Exception raised:", err) 00932 if self.debugActions[2]: 00933 self.debugActions[2]( instring, tokensStart, self, err ) 00934 if self.failAction: 00935 self.failAction( instring, tokensStart, self, err ) 00936 raise 00937 else: 00938 if callPreParse and self.callPreparse: 00939 preloc = self.preParse( instring, loc ) 00940 else: 00941 preloc = loc 00942 tokensStart = loc 00943 if self.mayIndexError or loc >= len(instring): 00944 try: 00945 loc,tokens = self.parseImpl( instring, preloc, doActions ) 00946 except IndexError: 00947 raise ParseException( instring, len(instring), self.errmsg, self ) 00948 else: 00949 loc,tokens = self.parseImpl( instring, preloc, doActions ) 00950 00951 tokens = self.postParse( instring, loc, tokens ) 00952 00953 retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults ) 00954 if self.parseAction and (doActions or self.callDuringTry): 00955 if debugging: 00956 try: 00957 for fn in self.parseAction: 00958 tokens = fn( instring, tokensStart, retTokens ) 00959 if tokens is not None: 00960 retTokens = ParseResults( tokens, 00961 self.resultsName, 00962 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 00963 modal=self.modalResults ) 00964 except ParseBaseException, err: 00965 #~ print "Exception raised in user parse action:", err 00966 if (self.debugActions[2] ): 00967 self.debugActions[2]( instring, tokensStart, self, err ) 00968 raise 00969 else: 00970 for fn in self.parseAction: 00971 tokens = fn( instring, tokensStart, retTokens ) 00972 if tokens is not None: 00973 retTokens = ParseResults( tokens, 00974 self.resultsName, 00975 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 00976 modal=self.modalResults ) 00977 00978 if debugging: 00979 #~ print ("Matched",self,"->",retTokens.asList()) 00980 if (self.debugActions[1] ): 00981 self.debugActions[1]( instring, tokensStart, loc, self, retTokens ) 00982 00983 return loc, retTokens 00984 00985 def tryParse( self, instring, loc ): 00986 try: 00987 return self._parse( instring, loc, doActions=False )[0] 00988 except ParseFatalException: 00989 raise ParseException( instring, loc, self.errmsg, self) 00990 00991 # this method gets repeatedly called during backtracking with the same arguments - 00992 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression 00993 def _parseCache( self, instring, loc, doActions=True, callPreParse=True ): 00994 lookup = (self,instring,loc,callPreParse,doActions) 00995 if lookup in ParserElement._exprArgCache: 00996 value = ParserElement._exprArgCache[ lookup ] 00997 if isinstance(value,Exception): 00998 raise value 00999 return value 01000 else: 01001 try: 01002 value = self._parseNoCache( instring, loc, doActions, callPreParse ) 01003 ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy()) 01004 return value 01005 except ParseBaseException, pe: 01006 ParserElement._exprArgCache[ lookup ] = pe 01007 raise 01008 01009 _parse = _parseNoCache 01010 01011 # argument cache for optimizing repeated calls when backtracking through recursive expressions 01012 _exprArgCache = {} 01013 def resetCache(): 01014 ParserElement._exprArgCache.clear() 01015 resetCache = staticmethod(resetCache) 01016 01017 _packratEnabled = False 01018 def enablePackrat(): 01019 """Enables "packrat" parsing, which adds memoizing to the parsing logic. 01020 Repeated parse attempts at the same string location (which happens 01021 often in many complex grammars) can immediately return a cached value, 01022 instead of re-executing parsing/validating code. Memoizing is done of 01023 both valid results and parsing exceptions. 01024 01025 This speedup may break existing programs that use parse actions that 01026 have side-effects. For this reason, packrat parsing is disabled when 01027 you first import pyparsing. To activate the packrat feature, your 01028 program must call the class method ParserElement.enablePackrat(). If 01029 your program uses psyco to "compile as you go", you must call 01030 enablePackrat before calling psyco.full(). If you do not do this, 01031 Python will crash. For best results, call enablePackrat() immediately 01032 after importing pyparsing. 01033 """ 01034 if not ParserElement._packratEnabled: 01035 ParserElement._packratEnabled = True 01036 ParserElement._parse = ParserElement._parseCache 01037 enablePackrat = staticmethod(enablePackrat) 01038 01039 def parseString( self, instring, parseAll=False ): 01040 """Execute the parse expression with the given string. 01041 This is the main interface to the client code, once the complete 01042 expression has been built. 01043 01044 If you want the grammar to require that the entire input string be 01045 successfully parsed, then set parseAll to True (equivalent to ending 01046 the grammar with StringEnd()). 01047 01048 Note: parseString implicitly calls expandtabs() on the input string, 01049 in order to report proper column numbers in parse actions. 01050 If the input string contains tabs and 01051 the grammar uses parse actions that use the loc argument to index into the 01052 string being parsed, you can ensure you have a consistent view of the input 01053 string by: 01054 - calling parseWithTabs on your grammar before calling parseString 01055 (see L{I{parseWithTabs}<parseWithTabs>}) 01056 - define your parse action using the full (s,loc,toks) signature, and 01057 reference the input string using the parse action's s argument 01058 - explictly expand the tabs in your input string before calling 01059 parseString 01060 """ 01061 ParserElement.resetCache() 01062 if not self.streamlined: 01063 self.streamline() 01064 #~ self.saveAsList = True 01065 for e in self.ignoreExprs: 01066 e.streamline() 01067 if not self.keepTabs: 01068 instring = instring.expandtabs() 01069 try: 01070 loc, tokens = self._parse( instring, 0 ) 01071 if parseAll: 01072 loc = self.preParse( instring, loc ) 01073 StringEnd()._parse( instring, loc ) 01074 except ParseBaseException, exc: 01075 # catch and re-raise exception from here, clears out pyparsing internal stack trace 01076 raise exc 01077 else: 01078 return tokens 01079 01080 def scanString( self, instring, maxMatches=_MAX_INT ): 01081 """Scan the input string for expression matches. Each match will return the 01082 matching tokens, start location, and end location. May be called with optional 01083 maxMatches argument, to clip scanning after 'n' matches are found. 01084 01085 Note that the start and end locations are reported relative to the string 01086 being parsed. See L{I{parseString}<parseString>} for more information on parsing 01087 strings with embedded tabs.""" 01088 if not self.streamlined: 01089 self.streamline() 01090 for e in self.ignoreExprs: 01091 e.streamline() 01092 01093 if not self.keepTabs: 01094 instring = _ustr(instring).expandtabs() 01095 instrlen = len(instring) 01096 loc = 0 01097 preparseFn = self.preParse 01098 parseFn = self._parse 01099 ParserElement.resetCache() 01100 matches = 0 01101 try: 01102 while loc <= instrlen and matches < maxMatches: 01103 try: 01104 preloc = preparseFn( instring, loc ) 01105 nextLoc,tokens = parseFn( instring, preloc, callPreParse=False ) 01106 except ParseException: 01107 loc = preloc+1 01108 else: 01109 matches += 1 01110 yield tokens, preloc, nextLoc 01111 loc = nextLoc 01112 except ParseBaseException, pe: 01113 raise pe 01114 01115 def transformString( self, instring ): 01116 """Extension to scanString, to modify matching text with modified tokens that may 01117 be returned from a parse action. To use transformString, define a grammar and 01118 attach a parse action to it that modifies the returned token list. 01119 Invoking transformString() on a target string will then scan for matches, 01120 and replace the matched text patterns according to the logic in the parse 01121 action. transformString() returns the resulting transformed string.""" 01122 out = [] 01123 lastE = 0 01124 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to 01125 # keep string locs straight between transformString and scanString 01126 self.keepTabs = True 01127 try: 01128 for t,s,e in self.scanString( instring ): 01129 out.append( instring[lastE:s] ) 01130 if t: 01131 if isinstance(t,ParseResults): 01132 out += t.asList() 01133 elif isinstance(t,list): 01134 out += t 01135 else: 01136 out.append(t) 01137 lastE = e 01138 out.append(instring[lastE:]) 01139 return "".join(map(_ustr,out)) 01140 except ParseBaseException, pe: 01141 raise pe 01142 01143 def searchString( self, instring, maxMatches=_MAX_INT ): 01144 """Another extension to scanString, simplifying the access to the tokens found 01145 to match the given parse expression. May be called with optional 01146 maxMatches argument, to clip searching after 'n' matches are found. 01147 """ 01148 try: 01149 return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ]) 01150 except ParseBaseException, pe: 01151 raise pe 01152 01153 def __add__(self, other ): 01154 """Implementation of + operator - returns And""" 01155 if isinstance( other, basestring ): 01156 other = Literal( other ) 01157 if not isinstance( other, ParserElement ): 01158 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 01159 SyntaxWarning, stacklevel=2) 01160 return None 01161 return And( [ self, other ] ) 01162 01163 def __radd__(self, other ): 01164 """Implementation of + operator when left operand is not a ParserElement""" 01165 if isinstance( other, basestring ): 01166 other = Literal( other ) 01167 if not isinstance( other, ParserElement ): 01168 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 01169 SyntaxWarning, stacklevel=2) 01170 return None 01171 return other + self 01172 01173 def __sub__(self, other): 01174 """Implementation of - operator, returns And with error stop""" 01175 if isinstance( other, basestring ): 01176 other = Literal( other ) 01177 if not isinstance( other, ParserElement ): 01178 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 01179 SyntaxWarning, stacklevel=2) 01180 return None 01181 return And( [ self, And._ErrorStop(), other ] ) 01182 01183 def __rsub__(self, other ): 01184 """Implementation of - operator when left operand is not a ParserElement""" 01185 if isinstance( other, basestring ): 01186 other = Literal( other ) 01187 if not isinstance( other, ParserElement ): 01188 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 01189 SyntaxWarning, stacklevel=2) 01190 return None 01191 return other - self 01192 01193 def __mul__(self,other): 01194 if isinstance(other,int): 01195 minElements, optElements = other,0 01196 elif isinstance(other,tuple): 01197 other = (other + (None, None))[:2] 01198 if other[0] is None: 01199 other = (0, other[1]) 01200 if isinstance(other[0],int) and other[1] is None: 01201 if other[0] == 0: 01202 return ZeroOrMore(self) 01203 if other[0] == 1: 01204 return OneOrMore(self) 01205 else: 01206 return self*other[0] + ZeroOrMore(self) 01207 elif isinstance(other[0],int) and isinstance(other[1],int): 01208 minElements, optElements = other 01209 optElements -= minElements 01210 else: 01211 raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1])) 01212 else: 01213 raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other)) 01214 01215 if minElements < 0: 01216 raise ValueError("cannot multiply ParserElement by negative value") 01217 if optElements < 0: 01218 raise ValueError("second tuple value must be greater or equal to first tuple value") 01219 if minElements == optElements == 0: 01220 raise ValueError("cannot multiply ParserElement by 0 or (0,0)") 01221 01222 if (optElements): 01223 def makeOptionalList(n): 01224 if n>1: 01225 return Optional(self + makeOptionalList(n-1)) 01226 else: 01227 return Optional(self) 01228 if minElements: 01229 if minElements == 1: 01230 ret = self + makeOptionalList(optElements) 01231 else: 01232 ret = And([self]*minElements) + makeOptionalList(optElements) 01233 else: 01234 ret = makeOptionalList(optElements) 01235 else: 01236 if minElements == 1: 01237 ret = self 01238 else: 01239 ret = And([self]*minElements) 01240 return ret 01241 01242 def __rmul__(self, other): 01243 return self.__mul__(other) 01244 01245 def __or__(self, other ): 01246 """Implementation of | operator - returns MatchFirst""" 01247 if isinstance( other, basestring ): 01248 other = Literal( other ) 01249 if not isinstance( other, ParserElement ): 01250 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 01251 SyntaxWarning, stacklevel=2) 01252 return None 01253 return MatchFirst( [ self, other ] ) 01254 01255 def __ror__(self, other ): 01256 """Implementation of | operator when left operand is not a ParserElement""" 01257 if isinstance( other, basestring ): 01258 other = Literal( other ) 01259 if not isinstance( other, ParserElement ): 01260 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 01261 SyntaxWarning, stacklevel=2) 01262 return None 01263 return other | self 01264 01265 def __xor__(self, other ): 01266 """Implementation of ^ operator - returns Or""" 01267 if isinstance( other, basestring ): 01268 other = Literal( other ) 01269 if not isinstance( other, ParserElement ): 01270 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 01271 SyntaxWarning, stacklevel=2) 01272 return None 01273 return Or( [ self, other ] ) 01274 01275 def __rxor__(self, other ): 01276 """Implementation of ^ operator when left operand is not a ParserElement""" 01277 if isinstance( other, basestring ): 01278 other = Literal( other ) 01279 if not isinstance( other, ParserElement ): 01280 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 01281 SyntaxWarning, stacklevel=2) 01282 return None 01283 return other ^ self 01284 01285 def __and__(self, other ): 01286 """Implementation of & operator - returns Each""" 01287 if isinstance( other, basestring ): 01288 other = Literal( other ) 01289 if not isinstance( other, ParserElement ): 01290 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 01291 SyntaxWarning, stacklevel=2) 01292 return None 01293 return Each( [ self, other ] ) 01294 01295 def __rand__(self, other ): 01296 """Implementation of & operator when left operand is not a ParserElement""" 01297 if isinstance( other, basestring ): 01298 other = Literal( other ) 01299 if not isinstance( other, ParserElement ): 01300 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 01301 SyntaxWarning, stacklevel=2) 01302 return None 01303 return other & self 01304 01305 def __invert__( self ): 01306 """Implementation of ~ operator - returns NotAny""" 01307 return NotAny( self ) 01308 01309 def __call__(self, name): 01310 """Shortcut for setResultsName, with listAllMatches=default:: 01311 userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno") 01312 could be written as:: 01313 userdata = Word(alphas)("name") + Word(nums+"-")("socsecno") 01314 """ 01315 return self.setResultsName(name) 01316 01317 def suppress( self ): 01318 """Suppresses the output of this ParserElement; useful to keep punctuation from 01319 cluttering up returned output. 01320 """ 01321 return Suppress( self ) 01322 01323 def leaveWhitespace( self ): 01324 """Disables the skipping of whitespace before matching the characters in the 01325 ParserElement's defined pattern. This is normally only used internally by 01326 the pyparsing module, but may be needed in some whitespace-sensitive grammars. 01327 """ 01328 self.skipWhitespace = False 01329 return self 01330 01331 def setWhitespaceChars( self, chars ): 01332 """Overrides the default whitespace chars 01333 """ 01334 self.skipWhitespace = True 01335 self.whiteChars = chars 01336 self.copyDefaultWhiteChars = False 01337 return self 01338 01339 def parseWithTabs( self ): 01340 """Overrides default behavior to expand <TAB>s to spaces before parsing the input string. 01341 Must be called before parseString when the input grammar contains elements that 01342 match <TAB> characters.""" 01343 self.keepTabs = True 01344 return self 01345 01346 def ignore( self, other ): 01347 """Define expression to be ignored (e.g., comments) while doing pattern 01348 matching; may be called repeatedly, to define multiple comment or other 01349 ignorable patterns. 01350 """ 01351 if isinstance( other, Suppress ): 01352 if other not in self.ignoreExprs: 01353 self.ignoreExprs.append( other ) 01354 else: 01355 self.ignoreExprs.append( Suppress( other ) ) 01356 return self 01357 01358 def setDebugActions( self, startAction, successAction, exceptionAction ): 01359 """Enable display of debugging messages while doing pattern matching.""" 01360 self.debugActions = (startAction or _defaultStartDebugAction, 01361 successAction or _defaultSuccessDebugAction, 01362 exceptionAction or _defaultExceptionDebugAction) 01363 self.debug = True 01364 return self 01365 01366 def setDebug( self, flag=True ): 01367 """Enable display of debugging messages while doing pattern matching. 01368 Set flag to True to enable, False to disable.""" 01369 if flag: 01370 self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction ) 01371 else: 01372 self.debug = False 01373 return self 01374 01375 def __str__( self ): 01376 return self.name 01377 01378 def __repr__( self ): 01379 return _ustr(self) 01380 01381 def streamline( self ): 01382 self.streamlined = True 01383 self.strRepr = None 01384 return self 01385 01386 def checkRecursion( self, parseElementList ): 01387 pass 01388 01389 def validate( self, validateTrace=[] ): 01390 """Check defined expressions for valid structure, check for infinite recursive definitions.""" 01391 self.checkRecursion( [] ) 01392 01393 def parseFile( self, file_or_filename, parseAll=False ): 01394 """Execute the parse expression on the given file or filename. 01395 If a filename is specified (instead of a file object), 01396 the entire file is opened, read, and closed before parsing. 01397 """ 01398 try: 01399 file_contents = file_or_filename.read() 01400 except AttributeError: 01401 f = open(file_or_filename, "rb") 01402 file_contents = f.read() 01403 f.close() 01404 try: 01405 return self.parseString(file_contents, parseAll) 01406 except ParseBaseException, exc: 01407 # catch and re-raise exception from here, clears out pyparsing internal stack trace 01408 raise exc 01409 01410 def getException(self): 01411 return ParseException("",0,self.errmsg,self) 01412 01413 def __getattr__(self,aname): 01414 if aname == "myException": 01415 self.myException = ret = self.getException(); 01416 return ret; 01417 else: 01418 raise AttributeError("no such attribute " + aname) 01419 01420 def __eq__(self,other): 01421 if isinstance(other, ParserElement): 01422 return self is other or self.__dict__ == other.__dict__ 01423 elif isinstance(other, basestring): 01424 try: 01425 self.parseString(_ustr(other), parseAll=True) 01426 return True 01427 except ParseBaseException: 01428 return False 01429 else: 01430 return super(ParserElement,self)==other 01431 01432 def __ne__(self,other): 01433 return not (self == other) 01434 01435 def __hash__(self): 01436 return hash(id(self)) 01437 01438 def __req__(self,other): 01439 return self == other 01440 01441 def __rne__(self,other): 01442 return not (self == other) 01443 01444 01445 class Token(ParserElement): 01446 """Abstract ParserElement subclass, for defining atomic matching patterns.""" 01447 def __init__( self ): 01448 super(Token,self).__init__( savelist=False ) 01449 #self.myException = ParseException("",0,"",self) 01450 01451 def setName(self, name): 01452 s = super(Token,self).setName(name) 01453 self.errmsg = "Expected " + self.name 01454 #s.myException.msg = self.errmsg 01455 return s 01456 01457 01458 class Empty(Token): 01459 """An empty token, will always match.""" 01460 def __init__( self ): 01461 super(Empty,self).__init__() 01462 self.name = "Empty" 01463 self.mayReturnEmpty = True 01464 self.mayIndexError = False 01465 01466 01467 class NoMatch(Token): 01468 """A token that will never match.""" 01469 def __init__( self ): 01470 super(NoMatch,self).__init__() 01471 self.name = "NoMatch" 01472 self.mayReturnEmpty = True 01473 self.mayIndexError = False 01474 self.errmsg = "Unmatchable token" 01475 #self.myException.msg = self.errmsg 01476 01477 def parseImpl( self, instring, loc, doActions=True ): 01478 exc = self.myException 01479 exc.loc = loc 01480 exc.pstr = instring 01481 raise exc 01482 01483 01484 class Literal(Token): 01485 """Token to exactly match a specified string.""" 01486 def __init__( self, matchString ): 01487 super(Literal,self).__init__() 01488 self.match = matchString 01489 self.matchLen = len(matchString) 01490 try: 01491 self.firstMatchChar = matchString[0] 01492 except IndexError: 01493 warnings.warn("null string passed to Literal; use Empty() instead", 01494 SyntaxWarning, stacklevel=2) 01495 self.__class__ = Empty 01496 self.name = '"%s"' % _ustr(self.match) 01497 self.errmsg = "Expected " + self.name 01498 self.mayReturnEmpty = False 01499 #self.myException.msg = self.errmsg 01500 self.mayIndexError = False 01501 01502 # Performance tuning: this routine gets called a *lot* 01503 # if this is a single character match string and the first character matches, 01504 # short-circuit as quickly as possible, and avoid calling startswith 01505 #~ @profile 01506 def parseImpl( self, instring, loc, doActions=True ): 01507 if (instring[loc] == self.firstMatchChar and 01508 (self.matchLen==1 or instring.startswith(self.match,loc)) ): 01509 return loc+self.matchLen, self.match 01510 #~ raise ParseException( instring, loc, self.errmsg ) 01511 exc = self.myException 01512 exc.loc = loc 01513 exc.pstr = instring 01514 raise exc 01515 _L = Literal 01516 01517 class Keyword(Token): 01518 """Token to exactly match a specified string as a keyword, that is, it must be 01519 immediately followed by a non-keyword character. Compare with Literal:: 01520 Literal("if") will match the leading 'if' in 'ifAndOnlyIf'. 01521 Keyword("if") will not; it will only match the leading 'if in 'if x=1', or 'if(y==2)' 01522 Accepts two optional constructor arguments in addition to the keyword string: 01523 identChars is a string of characters that would be valid identifier characters, 01524 defaulting to all alphanumerics + "_" and "$"; caseless allows case-insensitive 01525 matching, default is False. 01526 """ 01527 DEFAULT_KEYWORD_CHARS = alphanums+"_$" 01528 01529 def __init__( self, matchString, identChars=DEFAULT_KEYWORD_CHARS, caseless=False ): 01530 super(Keyword,self).__init__() 01531 self.match = matchString 01532 self.matchLen = len(matchString) 01533 try: 01534 self.firstMatchChar = matchString[0] 01535 except IndexError: 01536 warnings.warn("null string passed to Keyword; use Empty() instead", 01537 SyntaxWarning, stacklevel=2) 01538 self.name = '"%s"' % self.match 01539 self.errmsg = "Expected " + self.name 01540 self.mayReturnEmpty = False 01541 #self.myException.msg = self.errmsg 01542 self.mayIndexError = False 01543 self.caseless = caseless 01544 if caseless: 01545 self.caselessmatch = matchString.upper() 01546 identChars = identChars.upper() 01547 self.identChars = _str2dict(identChars) 01548 01549 def parseImpl( self, instring, loc, doActions=True ): 01550 if self.caseless: 01551 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 01552 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and 01553 (loc == 0 or instring[loc-1].upper() not in self.identChars) ): 01554 return loc+self.matchLen, self.match 01555 else: 01556 if (instring[loc] == self.firstMatchChar and 01557 (self.matchLen==1 or instring.startswith(self.match,loc)) and 01558 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and 01559 (loc == 0 or instring[loc-1] not in self.identChars) ): 01560 return loc+self.matchLen, self.match 01561 #~ raise ParseException( instring, loc, self.errmsg ) 01562 exc = self.myException 01563 exc.loc = loc 01564 exc.pstr = instring 01565 raise exc 01566 01567 def copy(self): 01568 c = super(Keyword,self).copy() 01569 c.identChars = Keyword.DEFAULT_KEYWORD_CHARS 01570 return c 01571 01572 def setDefaultKeywordChars( chars ): 01573 """Overrides the default Keyword chars 01574 """ 01575 Keyword.DEFAULT_KEYWORD_CHARS = chars 01576 setDefaultKeywordChars = staticmethod(setDefaultKeywordChars) 01577 01578 class CaselessLiteral(Literal): 01579 """Token to match a specified string, ignoring case of letters. 01580 Note: the matched results will always be in the case of the given 01581 match string, NOT the case of the input text. 01582 """ 01583 def __init__( self, matchString ): 01584 super(CaselessLiteral,self).__init__( matchString.upper() ) 01585 # Preserve the defining literal. 01586 self.returnString = matchString 01587 self.name = "'%s'" % self.returnString 01588 self.errmsg = "Expected " + self.name 01589 #self.myException.msg = self.errmsg 01590 01591 def parseImpl( self, instring, loc, doActions=True ): 01592 if instring[ loc:loc+self.matchLen ].upper() == self.match: 01593 return loc+self.matchLen, self.returnString 01594 #~ raise ParseException( instring, loc, self.errmsg ) 01595 exc = self.myException 01596 exc.loc = loc 01597 exc.pstr = instring 01598 raise exc 01599 01600 class CaselessKeyword(Keyword): 01601 def __init__( self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS ): 01602 super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True ) 01603 01604 def parseImpl( self, instring, loc, doActions=True ): 01605 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 01606 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ): 01607 return loc+self.matchLen, self.match 01608 #~ raise ParseException( instring, loc, self.errmsg ) 01609 exc = self.myException 01610 exc.loc = loc 01611 exc.pstr = instring 01612 raise exc 01613 01614 class Word(Token): 01615 """Token for matching words composed of allowed character sets. 01616 Defined with string containing all allowed initial characters, 01617 an optional string containing allowed body characters (if omitted, 01618 defaults to the initial character set), and an optional minimum, 01619 maximum, and/or exact length. The default value for min is 1 (a 01620 minimum value < 1 is not valid); the default values for max and exact 01621 are 0, meaning no maximum or exact length restriction. 01622 """ 01623 def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False ): 01624 super(Word,self).__init__() 01625 self.initCharsOrig = initChars 01626 self.initChars = _str2dict(initChars) 01627 if bodyChars : 01628 self.bodyCharsOrig = bodyChars 01629 self.bodyChars = _str2dict(bodyChars) 01630 else: 01631 self.bodyCharsOrig = initChars 01632 self.bodyChars = _str2dict(initChars) 01633 01634 self.maxSpecified = max > 0 01635 01636 if min < 1: 01637 raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted") 01638 01639 self.minLen = min 01640 01641 if max > 0: 01642 self.maxLen = max 01643 else: 01644 self.maxLen = _MAX_INT 01645 01646 if exact > 0: 01647 self.maxLen = exact 01648 self.minLen = exact 01649 01650 self.name = _ustr(self) 01651 self.errmsg = "Expected " + self.name 01652 #self.myException.msg = self.errmsg 01653 self.mayIndexError = False 01654 self.asKeyword = asKeyword 01655 01656 if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0): 01657 if self.bodyCharsOrig == self.initCharsOrig: 01658 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig) 01659 elif len(self.bodyCharsOrig) == 1: 01660 self.reString = "%s[%s]*" % \ 01661 (re.escape(self.initCharsOrig), 01662 _escapeRegexRangeChars(self.bodyCharsOrig),) 01663 else: 01664 self.reString = "[%s][%s]*" % \ 01665 (_escapeRegexRangeChars(self.initCharsOrig), 01666 _escapeRegexRangeChars(self.bodyCharsOrig),) 01667 if self.asKeyword: 01668 self.reString = r"\b"+self.reString+r"\b" 01669 try: 01670 self.re = re.compile( self.reString ) 01671 except: 01672 self.re = None 01673 01674 def parseImpl( self, instring, loc, doActions=True ): 01675 if self.re: 01676 result = self.re.match(instring,loc) 01677 if not result: 01678 exc = self.myException 01679 exc.loc = loc 01680 exc.pstr = instring 01681 raise exc 01682 01683 loc = result.end() 01684 return loc,result.group() 01685 01686 if not(instring[ loc ] in self.initChars): 01687 #~ raise ParseException( instring, loc, self.errmsg ) 01688 exc = self.myException 01689 exc.loc = loc 01690 exc.pstr = instring 01691 raise exc 01692 start = loc 01693 loc += 1 01694 instrlen = len(instring) 01695 bodychars = self.bodyChars 01696 maxloc = start + self.maxLen 01697 maxloc = min( maxloc, instrlen ) 01698 while loc < maxloc and instring[loc] in bodychars: 01699 loc += 1 01700 01701 throwException = False 01702 if loc - start < self.minLen: 01703 throwException = True 01704 if self.maxSpecified and loc < instrlen and instring[loc] in bodychars: 01705 throwException = True 01706 if self.asKeyword: 01707 if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars): 01708 throwException = True 01709 01710 if throwException: 01711 #~ raise ParseException( instring, loc, self.errmsg ) 01712 exc = self.myException 01713 exc.loc = loc 01714 exc.pstr = instring 01715 raise exc 01716 01717 return loc, instring[start:loc] 01718 01719 def __str__( self ): 01720 try: 01721 return super(Word,self).__str__() 01722 except: 01723 pass 01724 01725 01726 if self.strRepr is None: 01727 01728 def charsAsStr(s): 01729 if len(s)>4: 01730 return s[:4]+"..." 01731 else: 01732 return s 01733 01734 if ( self.initCharsOrig != self.bodyCharsOrig ): 01735 self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) ) 01736 else: 01737 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig) 01738 01739 return self.strRepr 01740 01741 01742 class Regex(Token): 01743 """Token for matching strings that match a given regular expression. 01744 Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module. 01745 """ 01746 def __init__( self, pattern, flags=0): 01747 """The parameters pattern and flags are passed to the re.compile() function as-is. See the Python re module for an explanation of the acceptable patterns and flags.""" 01748 super(Regex,self).__init__() 01749 01750 if len(pattern) == 0: 01751 warnings.warn("null string passed to Regex; use Empty() instead", 01752 SyntaxWarning, stacklevel=2) 01753 01754 self.pattern = pattern 01755 self.flags = flags 01756 01757 try: 01758 self.re = re.compile(self.pattern, self.flags) 01759 self.reString = self.pattern 01760 except sre_constants.error: 01761 warnings.warn("invalid pattern (%s) passed to Regex" % pattern, 01762 SyntaxWarning, stacklevel=2) 01763 raise 01764 01765 self.name = _ustr(self) 01766 self.errmsg = "Expected " + self.name 01767 #self.myException.msg = self.errmsg 01768 self.mayIndexError = False 01769 self.mayReturnEmpty = True 01770 01771 def parseImpl( self, instring, loc, doActions=True ): 01772 result = self.re.match(instring,loc) 01773 if not result: 01774 exc = self.myException 01775 exc.loc = loc 01776 exc.pstr = instring 01777 raise exc 01778 01779 loc = result.end() 01780 d = result.groupdict() 01781 ret = ParseResults(result.group()) 01782 if d: 01783 for k in d: 01784 ret[k] = d[k] 01785 return loc,ret 01786 01787 def __str__( self ): 01788 try: 01789 return super(Regex,self).__str__() 01790 except: 01791 pass 01792 01793 if self.strRepr is None: 01794 self.strRepr = "Re:(%s)" % repr(self.pattern) 01795 01796 return self.strRepr 01797 01798 01799 class QuotedString(Token): 01800 """Token for matching strings that are delimited by quoting characters. 01801 """ 01802 def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None): 01803 """ 01804 Defined with the following parameters: 01805 - quoteChar - string of one or more characters defining the quote delimiting string 01806 - escChar - character to escape quotes, typically backslash (default=None) 01807 - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None) 01808 - multiline - boolean indicating whether quotes can span multiple lines (default=False) 01809 - unquoteResults - boolean indicating whether the matched text should be unquoted (default=True) 01810 - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=None => same as quoteChar) 01811 """ 01812 super(QuotedString,self).__init__() 01813 01814 # remove white space from quote chars - wont work anyway 01815 quoteChar = quoteChar.strip() 01816 if len(quoteChar) == 0: 01817 warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 01818 raise SyntaxError() 01819 01820 if endQuoteChar is None: 01821 endQuoteChar = quoteChar 01822 else: 01823 endQuoteChar = endQuoteChar.strip() 01824 if len(endQuoteChar) == 0: 01825 warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 01826 raise SyntaxError() 01827 01828 self.quoteChar = quoteChar 01829 self.quoteCharLen = len(quoteChar) 01830 self.firstQuoteChar = quoteChar[0] 01831 self.endQuoteChar = endQuoteChar 01832 self.endQuoteCharLen = len(endQuoteChar) 01833 self.escChar = escChar 01834 self.escQuote = escQuote 01835 self.unquoteResults = unquoteResults 01836 01837 if multiline: 01838 self.flags = re.MULTILINE | re.DOTALL 01839 self.pattern = r'%s(?:[^%s%s]' % \ 01840 ( re.escape(self.quoteChar), 01841 _escapeRegexRangeChars(self.endQuoteChar[0]), 01842 (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 01843 else: 01844 self.flags = 0 01845 self.pattern = r'%s(?:[^%s\n\r%s]' % \ 01846 ( re.escape(self.quoteChar), 01847 _escapeRegexRangeChars(self.endQuoteChar[0]), 01848 (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 01849 if len(self.endQuoteChar) > 1: 01850 self.pattern += ( 01851 '|(?:' + ')|(?:'.join(["%s[^%s]" % (re.escape(self.endQuoteChar[:i]), 01852 _escapeRegexRangeChars(self.endQuoteChar[i])) 01853 for i in range(len(self.endQuoteChar)-1,0,-1)]) + ')' 01854 ) 01855 if escQuote: 01856 self.pattern += (r'|(?:%s)' % re.escape(escQuote)) 01857 if escChar: 01858 self.pattern += (r'|(?:%s.)' % re.escape(escChar)) 01859 self.escCharReplacePattern = re.escape(self.escChar)+"(.)" 01860 self.pattern += (r')*%s' % re.escape(self.endQuoteChar)) 01861 01862 try: 01863 self.re = re.compile(self.pattern, self.flags) 01864 self.reString = self.pattern 01865 except sre_constants.error: 01866 warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern, 01867 SyntaxWarning, stacklevel=2) 01868 raise 01869 01870 self.name = _ustr(self) 01871 self.errmsg = "Expected " + self.name 01872 #self.myException.msg = self.errmsg 01873 self.mayIndexError = False 01874 self.mayReturnEmpty = True 01875 01876 def parseImpl( self, instring, loc, doActions=True ): 01877 result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None 01878 if not result: 01879 exc = self.myException 01880 exc.loc = loc 01881 exc.pstr = instring 01882 raise exc 01883 01884 loc = result.end() 01885 ret = result.group() 01886 01887 if self.unquoteResults: 01888 01889 # strip off quotes 01890 ret = ret[self.quoteCharLen:-self.endQuoteCharLen] 01891 01892 if isinstance(ret,basestring): 01893 # replace escaped characters 01894 if self.escChar: 01895 ret = re.sub(self.escCharReplacePattern,"\g<1>",ret) 01896 01897 # replace escaped quotes 01898 if self.escQuote: 01899 ret = ret.replace(self.escQuote, self.endQuoteChar) 01900 01901 return loc, ret 01902 01903 def __str__( self ): 01904 try: 01905 return super(QuotedString,self).__str__() 01906 except: 01907 pass 01908 01909 if self.strRepr is None: 01910 self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar) 01911 01912 return self.strRepr 01913 01914 01915 class CharsNotIn(Token): 01916 """Token for matching words composed of characters *not* in a given set. 01917 Defined with string containing all disallowed characters, and an optional 01918 minimum, maximum, and/or exact length. The default value for min is 1 (a 01919 minimum value < 1 is not valid); the default values for max and exact 01920 are 0, meaning no maximum or exact length restriction. 01921 """ 01922 def __init__( self, notChars, min=1, max=0, exact=0 ): 01923 super(CharsNotIn,self).__init__() 01924 self.skipWhitespace = False 01925 self.notChars = notChars 01926 01927 if min < 1: 01928 raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted") 01929 01930 self.minLen = min 01931 01932 if max > 0: 01933 self.maxLen = max 01934 else: 01935 self.maxLen = _MAX_INT 01936 01937 if exact > 0: 01938 self.maxLen = exact 01939 self.minLen = exact 01940 01941 self.name = _ustr(self) 01942 self.errmsg = "Expected " + self.name 01943 self.mayReturnEmpty = ( self.minLen == 0 ) 01944 #self.myException.msg = self.errmsg 01945 self.mayIndexError = False 01946 01947 def parseImpl( self, instring, loc, doActions=True ): 01948 if instring[loc] in self.notChars: 01949 #~ raise ParseException( instring, loc, self.errmsg ) 01950 exc = self.myException 01951 exc.loc = loc 01952 exc.pstr = instring 01953 raise exc 01954 01955 start = loc 01956 loc += 1 01957 notchars = self.notChars 01958 maxlen = min( start+self.maxLen, len(instring) ) 01959 while loc < maxlen and \ 01960 (instring[loc] not in notchars): 01961 loc += 1 01962 01963 if loc - start < self.minLen: 01964 #~ raise ParseException( instring, loc, self.errmsg ) 01965 exc = self.myException 01966 exc.loc = loc 01967 exc.pstr = instring 01968 raise exc 01969 01970 return loc, instring[start:loc] 01971 01972 def __str__( self ): 01973 try: 01974 return super(CharsNotIn, self).__str__() 01975 except: 01976 pass 01977 01978 if self.strRepr is None: 01979 if len(self.notChars) > 4: 01980 self.strRepr = "!W:(%s...)" % self.notChars[:4] 01981 else: 01982 self.strRepr = "!W:(%s)" % self.notChars 01983 01984 return self.strRepr 01985 01986 class White(Token): 01987 """Special matching class for matching whitespace. Normally, whitespace is ignored 01988 by pyparsing grammars. This class is included when some whitespace structures 01989 are significant. Define with a string containing the whitespace characters to be 01990 matched; default is " \\t\\r\\n". Also takes optional min, max, and exact arguments, 01991 as defined for the Word class.""" 01992 whiteStrs = { 01993 " " : "<SPC>", 01994 "\t": "<TAB>", 01995 "\n": "<LF>", 01996 "\r": "<CR>", 01997 "\f": "<FF>", 01998 } 01999 def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0): 02000 super(White,self).__init__() 02001 self.matchWhite = ws 02002 self.setWhitespaceChars( "".join([c for c in self.whiteChars if c not in self.matchWhite]) ) 02003 #~ self.leaveWhitespace() 02004 self.name = ("".join([White.whiteStrs[c] for c in self.matchWhite])) 02005 self.mayReturnEmpty = True 02006 self.errmsg = "Expected " + self.name 02007 #self.myException.msg = self.errmsg 02008 02009 self.minLen = min 02010 02011 if max > 0: 02012 self.maxLen = max 02013 else: 02014 self.maxLen = _MAX_INT 02015 02016 if exact > 0: 02017 self.maxLen = exact 02018 self.minLen = exact 02019 02020 def parseImpl( self, instring, loc, doActions=True ): 02021 if not(instring[ loc ] in self.matchWhite): 02022 #~ raise ParseException( instring, loc, self.errmsg ) 02023 exc = self.myException 02024 exc.loc = loc 02025 exc.pstr = instring 02026 raise exc 02027 start = loc 02028 loc += 1 02029 maxloc = start + self.maxLen 02030 maxloc = min( maxloc, len(instring) ) 02031 while loc < maxloc and instring[loc] in self.matchWhite: 02032 loc += 1 02033 02034 if loc - start < self.minLen: 02035 #~ raise ParseException( instring, loc, self.errmsg ) 02036 exc = self.myException 02037 exc.loc = loc 02038 exc.pstr = instring 02039 raise exc 02040 02041 return loc, instring[start:loc] 02042 02043 02044 class _PositionToken(Token): 02045 def __init__( self ): 02046 super(_PositionToken,self).__init__() 02047 self.name=self.__class__.__name__ 02048 self.mayReturnEmpty = True 02049 self.mayIndexError = False 02050 02051 class GoToColumn(_PositionToken): 02052 """Token to advance to a specific column of input text; useful for tabular report scraping.""" 02053 def __init__( self, colno ): 02054 super(GoToColumn,self).__init__() 02055 self.col = colno 02056 02057 def preParse( self, instring, loc ): 02058 if col(loc,instring) != self.col: 02059 instrlen = len(instring) 02060 if self.ignoreExprs: 02061 loc = self._skipIgnorables( instring, loc ) 02062 while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col : 02063 loc += 1 02064 return loc 02065 02066 def parseImpl( self, instring, loc, doActions=True ): 02067 thiscol = col( loc, instring ) 02068 if thiscol > self.col: 02069 raise ParseException( instring, loc, "Text not in expected column", self ) 02070 newloc = loc + self.col - thiscol 02071 ret = instring[ loc: newloc ] 02072 return newloc, ret 02073 02074 class LineStart(_PositionToken): 02075 """Matches if current position is at the beginning of a line within the parse string""" 02076 def __init__( self ): 02077 super(LineStart,self).__init__() 02078 self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) 02079 self.errmsg = "Expected start of line" 02080 #self.myException.msg = self.errmsg 02081 02082 def preParse( self, instring, loc ): 02083 preloc = super(LineStart,self).preParse(instring,loc) 02084 if instring[preloc] == "\n": 02085 loc += 1 02086 return loc 02087 02088 def parseImpl( self, instring, loc, doActions=True ): 02089 if not( loc==0 or 02090 (loc == self.preParse( instring, 0 )) or 02091 (instring[loc-1] == "\n") ): #col(loc, instring) != 1: 02092 #~ raise ParseException( instring, loc, "Expected start of line" ) 02093 exc = self.myException 02094 exc.loc = loc 02095 exc.pstr = instring 02096 raise exc 02097 return loc, [] 02098 02099 class LineEnd(_PositionToken): 02100 """Matches if current position is at the end of a line within the parse string""" 02101 def __init__( self ): 02102 super(LineEnd,self).__init__() 02103 self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) 02104 self.errmsg = "Expected end of line" 02105 #self.myException.msg = self.errmsg 02106 02107 def parseImpl( self, instring, loc, doActions=True ): 02108 if loc<len(instring): 02109 if instring[loc] == "\n": 02110 return loc+1, "\n" 02111 else: 02112 #~ raise ParseException( instring, loc, "Expected end of line" ) 02113 exc = self.myException 02114 exc.loc = loc 02115 exc.pstr = instring 02116 raise exc 02117 elif loc == len(instring): 02118 return loc+1, [] 02119 else: 02120 exc = self.myException 02121 exc.loc = loc 02122 exc.pstr = instring 02123 raise exc 02124 02125 class StringStart(_PositionToken): 02126 """Matches if current position is at the beginning of the parse string""" 02127 def __init__( self ): 02128 super(StringStart,self).__init__() 02129 self.errmsg = "Expected start of text" 02130 #self.myException.msg = self.errmsg 02131 02132 def parseImpl( self, instring, loc, doActions=True ): 02133 if loc != 0: 02134 # see if entire string up to here is just whitespace and ignoreables 02135 if loc != self.preParse( instring, 0 ): 02136 #~ raise ParseException( instring, loc, "Expected start of text" ) 02137 exc = self.myException 02138 exc.loc = loc 02139 exc.pstr = instring 02140 raise exc 02141 return loc, [] 02142 02143 class StringEnd(_PositionToken): 02144 """Matches if current position is at the end of the parse string""" 02145 def __init__( self ): 02146 super(StringEnd,self).__init__() 02147 self.errmsg = "Expected end of text" 02148 #self.myException.msg = self.errmsg 02149 02150 def parseImpl( self, instring, loc, doActions=True ): 02151 if loc < len(instring): 02152 #~ raise ParseException( instring, loc, "Expected end of text" ) 02153 exc = self.myException 02154 exc.loc = loc 02155 exc.pstr = instring 02156 raise exc 02157 elif loc == len(instring): 02158 return loc+1, [] 02159 elif loc > len(instring): 02160 return loc, [] 02161 else: 02162 exc = self.myException 02163 exc.loc = loc 02164 exc.pstr = instring 02165 raise exc 02166 02167 class WordStart(_PositionToken): 02168 """Matches if the current position is at the beginning of a Word, and 02169 is not preceded by any character in a given set of wordChars 02170 (default=printables). To emulate the \b behavior of regular expressions, 02171 use WordStart(alphanums). WordStart will also match at the beginning of 02172 the string being parsed, or at the beginning of a line. 02173 """ 02174 def __init__(self, wordChars = printables): 02175 super(WordStart,self).__init__() 02176 self.wordChars = _str2dict(wordChars) 02177 self.errmsg = "Not at the start of a word" 02178 02179 def parseImpl(self, instring, loc, doActions=True ): 02180 if loc != 0: 02181 if (instring[loc-1] in self.wordChars or 02182 instring[loc] not in self.wordChars): 02183 exc = self.myException 02184 exc.loc = loc 02185 exc.pstr = instring 02186 raise exc 02187 return loc, [] 02188 02189 class WordEnd(_PositionToken): 02190 """Matches if the current position is at the end of a Word, and 02191 is not followed by any character in a given set of wordChars 02192 (default=printables). To emulate the \b behavior of regular expressions, 02193 use WordEnd(alphanums). WordEnd will also match at the end of 02194 the string being parsed, or at the end of a line. 02195 """ 02196 def __init__(self, wordChars = printables): 02197 super(WordEnd,self).__init__() 02198 self.wordChars = _str2dict(wordChars) 02199 self.skipWhitespace = False 02200 self.errmsg = "Not at the end of a word" 02201 02202 def parseImpl(self, instring, loc, doActions=True ): 02203 instrlen = len(instring) 02204 if instrlen>0 and loc<instrlen: 02205 if (instring[loc] in self.wordChars or 02206 instring[loc-1] not in self.wordChars): 02207 #~ raise ParseException( instring, loc, "Expected end of word" ) 02208 exc = self.myException 02209 exc.loc = loc 02210 exc.pstr = instring 02211 raise exc 02212 return loc, [] 02213 02214 02215 class ParseExpression(ParserElement): 02216 """Abstract subclass of ParserElement, for combining and post-processing parsed tokens.""" 02217 def __init__( self, exprs, savelist = False ): 02218 super(ParseExpression,self).__init__(savelist) 02219 if isinstance( exprs, list ): 02220 self.exprs = exprs 02221 elif isinstance( exprs, basestring ): 02222 self.exprs = [ Literal( exprs ) ] 02223 else: 02224 try: 02225 self.exprs = list( exprs ) 02226 except TypeError: 02227 self.exprs = [ exprs ] 02228 self.callPreparse = False 02229 02230 def __getitem__( self, i ): 02231 return self.exprs[i] 02232 02233 def append( self, other ): 02234 self.exprs.append( other ) 02235 self.strRepr = None 02236 return self 02237 02238 def leaveWhitespace( self ): 02239 """Extends leaveWhitespace defined in base class, and also invokes leaveWhitespace on 02240 all contained expressions.""" 02241 self.skipWhitespace = False 02242 self.exprs = [ e.copy() for e in self.exprs ] 02243 for e in self.exprs: 02244 e.leaveWhitespace() 02245 return self 02246 02247 def ignore( self, other ): 02248 if isinstance( other, Suppress ): 02249 if other not in self.ignoreExprs: 02250 super( ParseExpression, self).ignore( other ) 02251 for e in self.exprs: 02252 e.ignore( self.ignoreExprs[-1] ) 02253 else: 02254 super( ParseExpression, self).ignore( other ) 02255 for e in self.exprs: 02256 e.ignore( self.ignoreExprs[-1] ) 02257 return self 02258 02259 def __str__( self ): 02260 try: 02261 return super(ParseExpression,self).__str__() 02262 except: 02263 pass 02264 02265 if self.strRepr is None: 02266 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) ) 02267 return self.strRepr 02268 02269 def streamline( self ): 02270 super(ParseExpression,self).streamline() 02271 02272 for e in self.exprs: 02273 e.streamline() 02274 02275 # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d ) 02276 # but only if there are no parse actions or resultsNames on the nested And's 02277 # (likewise for Or's and MatchFirst's) 02278 if ( len(self.exprs) == 2 ): 02279 other = self.exprs[0] 02280 if ( isinstance( other, self.__class__ ) and 02281 not(other.parseAction) and 02282 other.resultsName is None and 02283 not other.debug ): 02284 self.exprs = other.exprs[:] + [ self.exprs[1] ] 02285 self.strRepr = None 02286 self.mayReturnEmpty |= other.mayReturnEmpty 02287 self.mayIndexError |= other.mayIndexError 02288 02289 other = self.exprs[-1] 02290 if ( isinstance( other, self.__class__ ) and 02291 not(other.parseAction) and 02292 other.resultsName is None and 02293 not other.debug ): 02294 self.exprs = self.exprs[:-1] + other.exprs[:] 02295 self.strRepr = None 02296 self.mayReturnEmpty |= other.mayReturnEmpty 02297 self.mayIndexError |= other.mayIndexError 02298 02299 return self 02300 02301 def setResultsName( self, name, listAllMatches=False ): 02302 ret = super(ParseExpression,self).setResultsName(name,listAllMatches) 02303 return ret 02304 02305 def validate( self, validateTrace=[] ): 02306 tmp = validateTrace[:]+[self] 02307 for e in self.exprs: 02308 e.validate(tmp) 02309 self.checkRecursion( [] ) 02310 02311 class And(ParseExpression): 02312 """Requires all given ParseExpressions to be found in the given order. 02313 Expressions may be separated by whitespace. 02314 May be constructed using the '+' operator. 02315 """ 02316 02317 class _ErrorStop(Empty): 02318 def __init__(self, *args, **kwargs): 02319 super(Empty,self).__init__(*args, **kwargs) 02320 self.leaveWhitespace() 02321 02322 def __init__( self, exprs, savelist = True ): 02323 super(And,self).__init__(exprs, savelist) 02324 self.mayReturnEmpty = True 02325 for e in self.exprs: 02326 if not e.mayReturnEmpty: 02327 self.mayReturnEmpty = False 02328 break 02329 self.setWhitespaceChars( exprs[0].whiteChars ) 02330 self.skipWhitespace = exprs[0].skipWhitespace 02331 self.callPreparse = True 02332 02333 def parseImpl( self, instring, loc, doActions=True ): 02334 # pass False as last arg to _parse for first element, since we already 02335 # pre-parsed the string as part of our And pre-parsing 02336 loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False ) 02337 errorStop = False 02338 for e in self.exprs[1:]: 02339 if isinstance(e, And._ErrorStop): 02340 errorStop = True 02341 continue 02342 if errorStop: 02343 try: 02344 loc, exprtokens = e._parse( instring, loc, doActions ) 02345 except ParseSyntaxException: 02346 raise 02347 except ParseBaseException, pe: 02348 raise ParseSyntaxException(pe) 02349 except IndexError, ie: 02350 raise ParseSyntaxException( ParseException(instring, len(instring), self.errmsg, self) ) 02351 else: 02352 loc, exprtokens = e._parse( instring, loc, doActions ) 02353 if exprtokens or exprtokens.keys(): 02354 resultlist += exprtokens 02355 return loc, resultlist 02356 02357 def __iadd__(self, other ): 02358 if isinstance( other, basestring ): 02359 other = Literal( other ) 02360 return self.append( other ) #And( [ self, other ] ) 02361 02362 def checkRecursion( self, parseElementList ): 02363 subRecCheckList = parseElementList[:] + [ self ] 02364 for e in self.exprs: 02365 e.checkRecursion( subRecCheckList ) 02366 if not e.mayReturnEmpty: 02367 break 02368 02369 def __str__( self ): 02370 if hasattr(self,"name"): 02371 return self.name 02372 02373 if self.strRepr is None: 02374 self.strRepr = "{" + " ".join( [ _ustr(e) for e in self.exprs ] ) + "}" 02375 02376 return self.strRepr 02377 02378 02379 class Or(ParseExpression): 02380 """Requires that at least one ParseExpression is found. 02381 If two expressions match, the expression that matches the longest string will be used. 02382 May be constructed using the '^' operator. 02383 """ 02384 def __init__( self, exprs, savelist = False ): 02385 super(Or,self).__init__(exprs, savelist) 02386 self.mayReturnEmpty = False 02387 for e in self.exprs: 02388 if e.mayReturnEmpty: 02389 self.mayReturnEmpty = True 02390 break 02391 02392 def parseImpl( self, instring, loc, doActions=True ): 02393 maxExcLoc = -1 02394 maxMatchLoc = -1 02395 maxException = None 02396 for e in self.exprs: 02397 try: 02398 loc2 = e.tryParse( instring, loc ) 02399 except ParseException, err: 02400 if err.loc > maxExcLoc: 02401 maxException = err 02402 maxExcLoc = err.loc 02403 except IndexError: 02404 if len(instring) > maxExcLoc: 02405 maxException = ParseException(instring,len(instring),e.errmsg,self) 02406 maxExcLoc = len(instring) 02407 else: 02408 if loc2 > maxMatchLoc: 02409 maxMatchLoc = loc2 02410 maxMatchExp = e 02411 02412 if maxMatchLoc < 0: 02413 if maxException is not None: 02414 raise maxException 02415 else: 02416 raise ParseException(instring, loc, "no defined alternatives to match", self) 02417 02418 return maxMatchExp._parse( instring, loc, doActions ) 02419 02420 def __ixor__(self, other ): 02421 if isinstance( other, basestring ): 02422 other = Literal( other ) 02423 return self.append( other ) #Or( [ self, other ] ) 02424 02425 def __str__( self ): 02426 if hasattr(self,"name"): 02427 return self.name 02428 02429 if self.strRepr is None: 02430 self.strRepr = "{" + " ^ ".join( [ _ustr(e) for e in self.exprs ] ) + "}" 02431 02432 return self.strRepr 02433 02434 def checkRecursion( self, parseElementList ): 02435 subRecCheckList = parseElementList[:] + [ self ] 02436 for e in self.exprs: 02437 e.checkRecursion( subRecCheckList ) 02438 02439 02440 class MatchFirst(ParseExpression): 02441 """Requires that at least one ParseExpression is found. 02442 If two expressions match, the first one listed is the one that will match. 02443 May be constructed using the '|' operator. 02444 """ 02445 def __init__( self, exprs, savelist = False ): 02446 super(MatchFirst,self).__init__(exprs, savelist) 02447 if exprs: 02448 self.mayReturnEmpty = False 02449 for e in self.exprs: 02450 if e.mayReturnEmpty: 02451 self.mayReturnEmpty = True 02452 break 02453 else: 02454 self.mayReturnEmpty = True 02455 02456 def parseImpl( self, instring, loc, doActions=True ): 02457 maxExcLoc = -1 02458 maxException = None 02459 for e in self.exprs: 02460 try: 02461 ret = e._parse( instring, loc, doActions ) 02462 return ret 02463 except ParseException, err: 02464 if err.loc > maxExcLoc: 02465 maxException = err 02466 maxExcLoc = err.loc 02467 except IndexError: 02468 if len(instring) > maxExcLoc: 02469 maxException = ParseException(instring,len(instring),e.errmsg,self) 02470 maxExcLoc = len(instring) 02471 02472 # only got here if no expression matched, raise exception for match that made it the furthest 02473 else: 02474 if maxException is not None: 02475 raise maxException 02476 else: 02477 raise ParseException(instring, loc, "no defined alternatives to match", self) 02478 02479 def __ior__(self, other ): 02480 if isinstance( other, basestring ): 02481 other = Literal( other ) 02482 return self.append( other ) #MatchFirst( [ self, other ] ) 02483 02484 def __str__( self ): 02485 if hasattr(self,"name"): 02486 return self.name 02487 02488 if self.strRepr is None: 02489 self.strRepr = "{" + " | ".join( [ _ustr(e) for e in self.exprs ] ) + "}" 02490 02491 return self.strRepr 02492 02493 def checkRecursion( self, parseElementList ): 02494 subRecCheckList = parseElementList[:] + [ self ] 02495 for e in self.exprs: 02496 e.checkRecursion( subRecCheckList ) 02497 02498 02499 class Each(ParseExpression): 02500 """Requires all given ParseExpressions to be found, but in any order. 02501 Expressions may be separated by whitespace. 02502 May be constructed using the '&' operator. 02503 """ 02504 def __init__( self, exprs, savelist = True ): 02505 super(Each,self).__init__(exprs, savelist) 02506 self.mayReturnEmpty = True 02507 for e in self.exprs: 02508 if not e.mayReturnEmpty: 02509 self.mayReturnEmpty = False 02510 break 02511 self.skipWhitespace = True 02512 self.initExprGroups = True 02513 02514 def parseImpl( self, instring, loc, doActions=True ): 02515 if self.initExprGroups: 02516 self.optionals = [ e.expr for e in self.exprs if isinstance(e,Optional) ] 02517 self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ] 02518 self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ] 02519 self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ] 02520 self.required += self.multirequired 02521 self.initExprGroups = False 02522 tmpLoc = loc 02523 tmpReqd = self.required[:] 02524 tmpOpt = self.optionals[:] 02525 matchOrder = [] 02526 02527 keepMatching = True 02528 while keepMatching: 02529 tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired 02530 failed = [] 02531 for e in tmpExprs: 02532 try: 02533 tmpLoc = e.tryParse( instring, tmpLoc ) 02534 except ParseException: 02535 failed.append(e) 02536 else: 02537 matchOrder.append(e) 02538 if e in tmpReqd: 02539 tmpReqd.remove(e) 02540 elif e in tmpOpt: 02541 tmpOpt.remove(e) 02542 if len(failed) == len(tmpExprs): 02543 keepMatching = False 02544 02545 if tmpReqd: 02546 missing = ", ".join( [ _ustr(e) for e in tmpReqd ] ) 02547 raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing ) 02548 02549 # add any unmatched Optionals, in case they have default values defined 02550 matchOrder += list(e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt) 02551 02552 resultlist = [] 02553 for e in matchOrder: 02554 loc,results = e._parse(instring,loc,doActions) 02555 resultlist.append(results) 02556 02557 finalResults = ParseResults([]) 02558 for r in resultlist: 02559 dups = {} 02560 for k in r.keys(): 02561 if k in finalResults.keys(): 02562 tmp = ParseResults(finalResults[k]) 02563 tmp += ParseResults(r[k]) 02564 dups[k] = tmp 02565 finalResults += ParseResults(r) 02566 for k,v in dups.items(): 02567 finalResults[k] = v 02568 return loc, finalResults 02569 02570 def __str__( self ): 02571 if hasattr(self,"name"): 02572 return self.name 02573 02574 if self.strRepr is None: 02575 self.strRepr = "{" + " & ".join( [ _ustr(e) for e in self.exprs ] ) + "}" 02576 02577 return self.strRepr 02578 02579 def checkRecursion( self, parseElementList ): 02580 subRecCheckList = parseElementList[:] + [ self ] 02581 for e in self.exprs: 02582 e.checkRecursion( subRecCheckList ) 02583 02584 02585 class ParseElementEnhance(ParserElement): 02586 """Abstract subclass of ParserElement, for combining and post-processing parsed tokens.""" 02587 def __init__( self, expr, savelist=False ): 02588 super(ParseElementEnhance,self).__init__(savelist) 02589 if isinstance( expr, basestring ): 02590 expr = Literal(expr) 02591 self.expr = expr 02592 self.strRepr = None 02593 if expr is not None: 02594 self.mayIndexError = expr.mayIndexError 02595 self.mayReturnEmpty = expr.mayReturnEmpty 02596 self.setWhitespaceChars( expr.whiteChars ) 02597 self.skipWhitespace = expr.skipWhitespace 02598 self.saveAsList = expr.saveAsList 02599 self.callPreparse = expr.callPreparse 02600 self.ignoreExprs.extend(expr.ignoreExprs) 02601 02602 def parseImpl( self, instring, loc, doActions=True ): 02603 if self.expr is not None: 02604 return self.expr._parse( instring, loc, doActions, callPreParse=False ) 02605 else: 02606 raise ParseException("",loc,self.errmsg,self) 02607 02608 def leaveWhitespace( self ): 02609 self.skipWhitespace = False 02610 self.expr = self.expr.copy() 02611 if self.expr is not None: 02612 self.expr.leaveWhitespace() 02613 return self 02614 02615 def ignore( self, other ): 02616 if isinstance( other, Suppress ): 02617 if other not in self.ignoreExprs: 02618 super( ParseElementEnhance, self).ignore( other ) 02619 if self.expr is not None: 02620 self.expr.ignore( self.ignoreExprs[-1] ) 02621 else: 02622 super( ParseElementEnhance, self).ignore( other ) 02623 if self.expr is not None: 02624 self.expr.ignore( self.ignoreExprs[-1] ) 02625 return self 02626 02627 def streamline( self ): 02628 super(ParseElementEnhance,self).streamline() 02629 if self.expr is not None: 02630 self.expr.streamline() 02631 return self 02632 02633 def checkRecursion( self, parseElementList ): 02634 if self in parseElementList: 02635 raise RecursiveGrammarException( parseElementList+[self] ) 02636 subRecCheckList = parseElementList[:] + [ self ] 02637 if self.expr is not None: 02638 self.expr.checkRecursion( subRecCheckList ) 02639 02640 def validate( self, validateTrace=[] ): 02641 tmp = validateTrace[:]+[self] 02642 if self.expr is not None: 02643 self.expr.validate(tmp) 02644 self.checkRecursion( [] ) 02645 02646 def __str__( self ): 02647 try: 02648 return super(ParseElementEnhance,self).__str__() 02649 except: 02650 pass 02651 02652 if self.strRepr is None and self.expr is not None: 02653 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) ) 02654 return self.strRepr 02655 02656 02657 class FollowedBy(ParseElementEnhance): 02658 """Lookahead matching of the given parse expression. FollowedBy 02659 does *not* advance the parsing position within the input string, it only 02660 verifies that the specified parse expression matches at the current 02661 position. FollowedBy always returns a null token list.""" 02662 def __init__( self, expr ): 02663 super(FollowedBy,self).__init__(expr) 02664 self.mayReturnEmpty = True 02665 02666 def parseImpl( self, instring, loc, doActions=True ): 02667 self.expr.tryParse( instring, loc ) 02668 return loc, [] 02669 02670 02671 class NotAny(ParseElementEnhance): 02672 """Lookahead to disallow matching with the given parse expression. NotAny 02673 does *not* advance the parsing position within the input string, it only 02674 verifies that the specified parse expression does *not* match at the current 02675 position. Also, NotAny does *not* skip over leading whitespace. NotAny 02676 always returns a null token list. May be constructed using the '~' operator.""" 02677 def __init__( self, expr ): 02678 super(NotAny,self).__init__(expr) 02679 #~ self.leaveWhitespace() 02680 self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs 02681 self.mayReturnEmpty = True 02682 self.errmsg = "Found unwanted token, "+_ustr(self.expr) 02683 #self.myException = ParseException("",0,self.errmsg,self) 02684 02685 def parseImpl( self, instring, loc, doActions=True ): 02686 try: 02687 self.expr.tryParse( instring, loc ) 02688 except (ParseException,IndexError): 02689 pass 02690 else: 02691 #~ raise ParseException(instring, loc, self.errmsg ) 02692 exc = self.myException 02693 exc.loc = loc 02694 exc.pstr = instring 02695 raise exc 02696 return loc, [] 02697 02698 def __str__( self ): 02699 if hasattr(self,"name"): 02700 return self.name 02701 02702 if self.strRepr is None: 02703 self.strRepr = "~{" + _ustr(self.expr) + "}" 02704 02705 return self.strRepr 02706 02707 02708 class ZeroOrMore(ParseElementEnhance): 02709 """Optional repetition of zero or more of the given expression.""" 02710 def __init__( self, expr ): 02711 super(ZeroOrMore,self).__init__(expr) 02712 self.mayReturnEmpty = True 02713 02714 def parseImpl( self, instring, loc, doActions=True ): 02715 tokens = [] 02716 try: 02717 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 02718 hasIgnoreExprs = ( len(self.ignoreExprs) > 0 ) 02719 while 1: 02720 if hasIgnoreExprs: 02721 preloc = self._skipIgnorables( instring, loc ) 02722 else: 02723 preloc = loc 02724 loc, tmptokens = self.expr._parse( instring, preloc, doActions ) 02725 if tmptokens or tmptokens.keys(): 02726 tokens += tmptokens 02727 except (ParseException,IndexError): 02728 pass 02729 02730 return loc, tokens 02731 02732 def __str__( self ): 02733 if hasattr(self,"name"): 02734 return self.name 02735 02736 if self.strRepr is None: 02737 self.strRepr = "[" + _ustr(self.expr) + "]..." 02738 02739 return self.strRepr 02740 02741 def setResultsName( self, name, listAllMatches=False ): 02742 ret = super(ZeroOrMore,self).setResultsName(name,listAllMatches) 02743 ret.saveAsList = True 02744 return ret 02745 02746 02747 class OneOrMore(ParseElementEnhance): 02748 """Repetition of one or more of the given expression.""" 02749 def parseImpl( self, instring, loc, doActions=True ): 02750 # must be at least one 02751 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 02752 try: 02753 hasIgnoreExprs = ( len(self.ignoreExprs) > 0 ) 02754 while 1: 02755 if hasIgnoreExprs: 02756 preloc = self._skipIgnorables( instring, loc ) 02757 else: 02758 preloc = loc 02759 loc, tmptokens = self.expr._parse( instring, preloc, doActions ) 02760 if tmptokens or tmptokens.keys(): 02761 tokens += tmptokens 02762 except (ParseException,IndexError): 02763 pass 02764 02765 return loc, tokens 02766 02767 def __str__( self ): 02768 if hasattr(self,"name"): 02769 return self.name 02770 02771 if self.strRepr is None: 02772 self.strRepr = "{" + _ustr(self.expr) + "}..." 02773 02774 return self.strRepr 02775 02776 def setResultsName( self, name, listAllMatches=False ): 02777 ret = super(OneOrMore,self).setResultsName(name,listAllMatches) 02778 ret.saveAsList = True 02779 return ret 02780 02781 class _NullToken(object): 02782 def __bool__(self): 02783 return False 02784 __nonzero__ = __bool__ 02785 def __str__(self): 02786 return "" 02787 02788 _optionalNotMatched = _NullToken() 02789 class Optional(ParseElementEnhance): 02790 """Optional matching of the given expression. 02791 A default return string can also be specified, if the optional expression 02792 is not found. 02793 """ 02794 def __init__( self, exprs, default=_optionalNotMatched ): 02795 super(Optional,self).__init__( exprs, savelist=False ) 02796 self.defaultValue = default 02797 self.mayReturnEmpty = True 02798 02799 def parseImpl( self, instring, loc, doActions=True ): 02800 try: 02801 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 02802 except (ParseException,IndexError): 02803 if self.defaultValue is not _optionalNotMatched: 02804 if self.expr.resultsName: 02805 tokens = ParseResults([ self.defaultValue ]) 02806 tokens[self.expr.resultsName] = self.defaultValue 02807 else: 02808 tokens = [ self.defaultValue ] 02809 else: 02810 tokens = [] 02811 return loc, tokens 02812 02813 def __str__( self ): 02814 if hasattr(self,"name"): 02815 return self.name 02816 02817 if self.strRepr is None: 02818 self.strRepr = "[" + _ustr(self.expr) + "]" 02819 02820 return self.strRepr 02821 02822 02823 class SkipTo(ParseElementEnhance): 02824 """Token for skipping over all undefined text until the matched expression is found. 02825 If include is set to true, the matched expression is also parsed (the skipped text 02826 and matched expression are returned as a 2-element list). The ignore 02827 argument is used to define grammars (typically quoted strings and comments) that 02828 might contain false matches. 02829 """ 02830 def __init__( self, other, include=False, ignore=None, failOn=None ): 02831 super( SkipTo, self ).__init__( other ) 02832 self.ignoreExpr = ignore 02833 self.mayReturnEmpty = True 02834 self.mayIndexError = False 02835 self.includeMatch = include 02836 self.asList = False 02837 if failOn is not None and isinstance(failOn, basestring): 02838 self.failOn = Literal(failOn) 02839 else: 02840 self.failOn = failOn 02841 self.errmsg = "No match found for "+_ustr(self.expr) 02842 #self.myException = ParseException("",0,self.errmsg,self) 02843 02844 def parseImpl( self, instring, loc, doActions=True ): 02845 startLoc = loc 02846 instrlen = len(instring) 02847 expr = self.expr 02848 failParse = False 02849 while loc <= instrlen: 02850 try: 02851 if self.failOn: 02852 try: 02853 self.failOn.tryParse(instring, loc) 02854 except ParseBaseException: 02855 pass 02856 else: 02857 failParse = True 02858 raise ParseException(instring, loc, "Found expression " + str(self.failOn)) 02859 failParse = False 02860 if self.ignoreExpr is not None: 02861 while 1: 02862 try: 02863 loc = self.ignoreExpr.tryParse(instring,loc) 02864 print "found ignoreExpr, advance to", loc 02865 except ParseBaseException: 02866 break 02867 expr._parse( instring, loc, doActions=False, callPreParse=False ) 02868 skipText = instring[startLoc:loc] 02869 if self.includeMatch: 02870 loc,mat = expr._parse(instring,loc,doActions,callPreParse=False) 02871 if mat: 02872 skipRes = ParseResults( skipText ) 02873 skipRes += mat 02874 return loc, [ skipRes ] 02875 else: 02876 return loc, [ skipText ] 02877 else: 02878 return loc, [ skipText ] 02879 except (ParseException,IndexError): 02880 if failParse: 02881 raise 02882 else: 02883 loc += 1 02884 exc = self.myException 02885 exc.loc = loc 02886 exc.pstr = instring 02887 raise exc 02888 02889 class Forward(ParseElementEnhance): 02890 """Forward declaration of an expression to be defined later - 02891 used for recursive grammars, such as algebraic infix notation. 02892 When the expression is known, it is assigned to the Forward variable using the '<<' operator. 02893 02894 Note: take care when assigning to Forward not to overlook precedence of operators. 02895 Specifically, '|' has a lower precedence than '<<', so that:: 02896 fwdExpr << a | b | c 02897 will actually be evaluated as:: 02898 (fwdExpr << a) | b | c 02899 thereby leaving b and c out as parseable alternatives. It is recommended that you 02900 explicitly group the values inserted into the Forward:: 02901 fwdExpr << (a | b | c) 02902 """ 02903 def __init__( self, other=None ): 02904 super(Forward,self).__init__( other, savelist=False ) 02905 02906 def __lshift__( self, other ): 02907 if isinstance( other, basestring ): 02908 other = Literal(other) 02909 self.expr = other 02910 self.mayReturnEmpty = other.mayReturnEmpty 02911 self.strRepr = None 02912 self.mayIndexError = self.expr.mayIndexError 02913 self.mayReturnEmpty = self.expr.mayReturnEmpty 02914 self.setWhitespaceChars( self.expr.whiteChars ) 02915 self.skipWhitespace = self.expr.skipWhitespace 02916 self.saveAsList = self.expr.saveAsList 02917 self.ignoreExprs.extend(self.expr.ignoreExprs) 02918 return None 02919 02920 def leaveWhitespace( self ): 02921 self.skipWhitespace = False 02922 return self 02923 02924 def streamline( self ): 02925 if not self.streamlined: 02926 self.streamlined = True 02927 if self.expr is not None: 02928 self.expr.streamline() 02929 return self 02930 02931 def validate( self, validateTrace=[] ): 02932 if self not in validateTrace: 02933 tmp = validateTrace[:]+[self] 02934 if self.expr is not None: 02935 self.expr.validate(tmp) 02936 self.checkRecursion([]) 02937 02938 def __str__( self ): 02939 if hasattr(self,"name"): 02940 return self.name 02941 02942 self._revertClass = self.__class__ 02943 self.__class__ = _ForwardNoRecurse 02944 try: 02945 if self.expr is not None: 02946 retString = _ustr(self.expr) 02947 else: 02948 retString = "None" 02949 finally: 02950 self.__class__ = self._revertClass 02951 return self.__class__.__name__ + ": " + retString 02952 02953 def copy(self): 02954 if self.expr is not None: 02955 return super(Forward,self).copy() 02956 else: 02957 ret = Forward() 02958 ret << self 02959 return ret 02960 02961 class _ForwardNoRecurse(Forward): 02962 def __str__( self ): 02963 return "..." 02964 02965 class TokenConverter(ParseElementEnhance): 02966 """Abstract subclass of ParseExpression, for converting parsed results.""" 02967 def __init__( self, expr, savelist=False ): 02968 super(TokenConverter,self).__init__( expr )#, savelist ) 02969 self.saveAsList = False 02970 02971 class Upcase(TokenConverter): 02972 """Converter to upper case all matching tokens.""" 02973 def __init__(self, *args): 02974 super(Upcase,self).__init__(*args) 02975 warnings.warn("Upcase class is deprecated, use upcaseTokens parse action instead", 02976 DeprecationWarning,stacklevel=2) 02977 02978 def postParse( self, instring, loc, tokenlist ): 02979 return list(map( string.upper, tokenlist )) 02980 02981 02982 class Combine(TokenConverter): 02983 """Converter to concatenate all matching tokens to a single string. 02984 By default, the matching patterns must also be contiguous in the input string; 02985 this can be disabled by specifying 'adjacent=False' in the constructor. 02986 """ 02987 def __init__( self, expr, joinString="", adjacent=True ): 02988 super(Combine,self).__init__( expr ) 02989 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself 02990 if adjacent: 02991 self.leaveWhitespace() 02992 self.adjacent = adjacent 02993 self.skipWhitespace = True 02994 self.joinString = joinString 02995 02996 def ignore( self, other ): 02997 if self.adjacent: 02998 ParserElement.ignore(self, other) 02999 else: 03000 super( Combine, self).ignore( other ) 03001 return self 03002 03003 def postParse( self, instring, loc, tokenlist ): 03004 retToks = tokenlist.copy() 03005 del retToks[:] 03006 retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults) 03007 03008 if self.resultsName and len(retToks.keys())>0: 03009 return [ retToks ] 03010 else: 03011 return retToks 03012 03013 class Group(TokenConverter): 03014 """Converter to return the matched tokens as a list - useful for returning tokens of ZeroOrMore and OneOrMore expressions.""" 03015 def __init__( self, expr ): 03016 super(Group,self).__init__( expr ) 03017 self.saveAsList = True 03018 03019 def postParse( self, instring, loc, tokenlist ): 03020 return [ tokenlist ] 03021 03022 class Dict(TokenConverter): 03023 """Converter to return a repetitive expression as a list, but also as a dictionary. 03024 Each element can also be referenced using the first token in the expression as its key. 03025 Useful for tabular report scraping when the first column can be used as a item key. 03026 """ 03027 def __init__( self, exprs ): 03028 super(Dict,self).__init__( exprs ) 03029 self.saveAsList = True 03030 03031 def postParse( self, instring, loc, tokenlist ): 03032 for i,tok in enumerate(tokenlist): 03033 if len(tok) == 0: 03034 continue 03035 ikey = tok[0] 03036 if isinstance(ikey,int): 03037 ikey = _ustr(tok[0]).strip() 03038 if len(tok)==1: 03039 tokenlist[ikey] = _ParseResultsWithOffset("",i) 03040 elif len(tok)==2 and not isinstance(tok[1],ParseResults): 03041 tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i) 03042 else: 03043 dictvalue = tok.copy() #ParseResults(i) 03044 del dictvalue[0] 03045 if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.keys()): 03046 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i) 03047 else: 03048 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i) 03049 03050 if self.resultsName: 03051 return [ tokenlist ] 03052 else: 03053 return tokenlist 03054 03055 03056 class Suppress(TokenConverter): 03057 """Converter for ignoring the results of a parsed expression.""" 03058 def postParse( self, instring, loc, tokenlist ): 03059 return [] 03060 03061 def suppress( self ): 03062 return self 03063 03064 03065 class OnlyOnce(object): 03066 """Wrapper for parse actions, to ensure they are only called once.""" 03067 def __init__(self, methodCall): 03068 self.callable = ParserElement._normalizeParseActionArgs(methodCall) 03069 self.called = False 03070 def __call__(self,s,l,t): 03071 if not self.called: 03072 results = self.callable(s,l,t) 03073 self.called = True 03074 return results 03075 raise ParseException(s,l,"") 03076 def reset(self): 03077 self.called = False 03078 03079 def traceParseAction(f): 03080 """Decorator for debugging parse actions.""" 03081 f = ParserElement._normalizeParseActionArgs(f) 03082 def z(*paArgs): 03083 thisFunc = f.func_name 03084 s,l,t = paArgs[-3:] 03085 if len(paArgs)>3: 03086 thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc 03087 sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) ) 03088 try: 03089 ret = f(*paArgs) 03090 except Exception, exc: 03091 sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) ) 03092 raise 03093 sys.stderr.write( "<<leaving %s (ret: %s)\n" % (thisFunc,ret) ) 03094 return ret 03095 try: 03096 z.__name__ = f.__name__ 03097 except AttributeError: 03098 pass 03099 return z 03100 03101 # 03102 # global helpers 03103 # 03104 def delimitedList( expr, delim=",", combine=False ): 03105 """Helper to define a delimited list of expressions - the delimiter defaults to ','. 03106 By default, the list elements and delimiters can have intervening whitespace, and 03107 comments, but this can be overridden by passing 'combine=True' in the constructor. 03108 If combine is set to True, the matching tokens are returned as a single token 03109 string, with the delimiters included; otherwise, the matching tokens are returned 03110 as a list of tokens, with the delimiters suppressed. 03111 """ 03112 dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..." 03113 if combine: 03114 return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName) 03115 else: 03116 return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName) 03117 03118 def countedArray( expr ): 03119 """Helper to define a counted list of expressions. 03120 This helper defines a pattern of the form:: 03121 integer expr expr expr... 03122 where the leading integer tells how many expr expressions follow. 03123 The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed. 03124 """ 03125 arrayExpr = Forward() 03126 def countFieldParseAction(s,l,t): 03127 n = int(t[0]) 03128 arrayExpr << (n and Group(And([expr]*n)) or Group(empty)) 03129 return [] 03130 return ( Word(nums).setName("arrayLen").setParseAction(countFieldParseAction, callDuringTry=True) + arrayExpr ) 03131 03132 def _flatten(L): 03133 if type(L) is not list: return [L] 03134 if L == []: return L 03135 return _flatten(L[0]) + _flatten(L[1:]) 03136 03137 def matchPreviousLiteral(expr): 03138 """Helper to define an expression that is indirectly defined from 03139 the tokens matched in a previous expression, that is, it looks 03140 for a 'repeat' of a previous expression. For example:: 03141 first = Word(nums) 03142 second = matchPreviousLiteral(first) 03143 matchExpr = first + ":" + second 03144 will match "1:1", but not "1:2". Because this matches a 03145 previous literal, will also match the leading "1:1" in "1:10". 03146 If this is not desired, use matchPreviousExpr. 03147 Do *not* use with packrat parsing enabled. 03148 """ 03149 rep = Forward() 03150 def copyTokenToRepeater(s,l,t): 03151 if t: 03152 if len(t) == 1: 03153 rep << t[0] 03154 else: 03155 # flatten t tokens 03156 tflat = _flatten(t.asList()) 03157 rep << And( [ Literal(tt) for tt in tflat ] ) 03158 else: 03159 rep << Empty() 03160 expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 03161 return rep 03162 03163 def matchPreviousExpr(expr): 03164 """Helper to define an expression that is indirectly defined from 03165 the tokens matched in a previous expression, that is, it looks 03166 for a 'repeat' of a previous expression. For example:: 03167 first = Word(nums) 03168 second = matchPreviousExpr(first) 03169 matchExpr = first + ":" + second 03170 will match "1:1", but not "1:2". Because this matches by 03171 expressions, will *not* match the leading "1:1" in "1:10"; 03172 the expressions are evaluated first, and then compared, so 03173 "1" is compared with "10". 03174 Do *not* use with packrat parsing enabled. 03175 """ 03176 rep = Forward() 03177 e2 = expr.copy() 03178 rep << e2 03179 def copyTokenToRepeater(s,l,t): 03180 matchTokens = _flatten(t.asList()) 03181 def mustMatchTheseTokens(s,l,t): 03182 theseTokens = _flatten(t.asList()) 03183 if theseTokens != matchTokens: 03184 raise ParseException("",0,"") 03185 rep.setParseAction( mustMatchTheseTokens, callDuringTry=True ) 03186 expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 03187 return rep 03188 03189 def _escapeRegexRangeChars(s): 03190 #~ escape these chars: ^-] 03191 for c in r"\^-]": 03192 s = s.replace(c,_bslash+c) 03193 s = s.replace("\n",r"\n") 03194 s = s.replace("\t",r"\t") 03195 return _ustr(s) 03196 03197 def oneOf( strs, caseless=False, useRegex=True ): 03198 """Helper to quickly define a set of alternative Literals, and makes sure to do 03199 longest-first testing when there is a conflict, regardless of the input order, 03200 but returns a MatchFirst for best performance. 03201 03202 Parameters: 03203 - strs - a string of space-delimited literals, or a list of string literals 03204 - caseless - (default=False) - treat all literals as caseless 03205 - useRegex - (default=True) - as an optimization, will generate a Regex 03206 object; otherwise, will generate a MatchFirst object (if caseless=True, or 03207 if creating a Regex raises an exception) 03208 """ 03209 if caseless: 03210 isequal = ( lambda a,b: a.upper() == b.upper() ) 03211 masks = ( lambda a,b: b.upper().startswith(a.upper()) ) 03212 parseElementClass = CaselessLiteral 03213 else: 03214 isequal = ( lambda a,b: a == b ) 03215 masks = ( lambda a,b: b.startswith(a) ) 03216 parseElementClass = Literal 03217 03218 if isinstance(strs,(list,tuple)): 03219 symbols = list(strs[:]) 03220 elif isinstance(strs,basestring): 03221 symbols = strs.split() 03222 else: 03223 warnings.warn("Invalid argument to oneOf, expected string or list", 03224 SyntaxWarning, stacklevel=2) 03225 03226 i = 0 03227 while i < len(symbols)-1: 03228 cur = symbols[i] 03229 for j,other in enumerate(symbols[i+1:]): 03230 if ( isequal(other, cur) ): 03231 del symbols[i+j+1] 03232 break 03233 elif ( masks(cur, other) ): 03234 del symbols[i+j+1] 03235 symbols.insert(i,other) 03236 cur = other 03237 break 03238 else: 03239 i += 1 03240 03241 if not caseless and useRegex: 03242 #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] )) 03243 try: 03244 if len(symbols)==len("".join(symbols)): 03245 return Regex( "[%s]" % "".join( [ _escapeRegexRangeChars(sym) for sym in symbols] ) ) 03246 else: 03247 return Regex( "|".join( [ re.escape(sym) for sym in symbols] ) ) 03248 except: 03249 warnings.warn("Exception creating Regex for oneOf, building MatchFirst", 03250 SyntaxWarning, stacklevel=2) 03251 03252 03253 # last resort, just use MatchFirst 03254 return MatchFirst( [ parseElementClass(sym) for sym in symbols ] ) 03255 03256 def dictOf( key, value ): 03257 """Helper to easily and clearly define a dictionary by specifying the respective patterns 03258 for the key and value. Takes care of defining the Dict, ZeroOrMore, and Group tokens 03259 in the proper order. The key pattern can include delimiting markers or punctuation, 03260 as long as they are suppressed, thereby leaving the significant key text. The value 03261 pattern can include named results, so that the Dict results can include named token 03262 fields. 03263 """ 03264 return Dict( ZeroOrMore( Group ( key + value ) ) ) 03265 03266 def originalTextFor(expr, asString=True): 03267 """Helper to return the original, untokenized text for a given expression. Useful to 03268 restore the parsed fields of an HTML start tag into the raw tag text itself, or to 03269 revert separate tokens with intervening whitespace back to the original matching 03270 input text. Simpler to use than the parse action keepOriginalText, and does not 03271 require the inspect module to chase up the call stack. By default, returns a 03272 string containing the original parsed text. 03273 03274 If the optional asString argument is passed as False, then the return value is a 03275 ParseResults containing any results names that were originally matched, and a 03276 single token containing the original matched text from the input string. So if 03277 the expression passed to originalTextFor contains expressions with defined 03278 results names, you must set asString to False if you want to preserve those 03279 results name values.""" 03280 locMarker = Empty().setParseAction(lambda s,loc,t: loc) 03281 matchExpr = locMarker("_original_start") + expr + locMarker("_original_end") 03282 if asString: 03283 extractText = lambda s,l,t: s[t._original_start:t._original_end] 03284 else: 03285 def extractText(s,l,t): 03286 del t[:] 03287 t.insert(0, s[t._original_start:t._original_end]) 03288 del t["_original_start"] 03289 del t["_original_end"] 03290 matchExpr.setParseAction(extractText) 03291 return matchExpr 03292 03293 # convenience constants for positional expressions 03294 empty = Empty().setName("empty") 03295 lineStart = LineStart().setName("lineStart") 03296 lineEnd = LineEnd().setName("lineEnd") 03297 stringStart = StringStart().setName("stringStart") 03298 stringEnd = StringEnd().setName("stringEnd") 03299 03300 _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1]) 03301 _printables_less_backslash = "".join([ c for c in printables if c not in r"\]" ]) 03302 _escapedHexChar = Combine( Suppress(_bslash + "0x") + Word(hexnums) ).setParseAction(lambda s,l,t:unichr(int(t[0],16))) 03303 _escapedOctChar = Combine( Suppress(_bslash) + Word("0","01234567") ).setParseAction(lambda s,l,t:unichr(int(t[0],8))) 03304 _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(_printables_less_backslash,exact=1) 03305 _charRange = Group(_singleChar + Suppress("-") + _singleChar) 03306 _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]" 03307 03308 _expanded = lambda p: (isinstance(p,ParseResults) and ''.join([ unichr(c) for c in range(ord(p[0]),ord(p[1])+1) ]) or p) 03309 03310 def srange(s): 03311 r"""Helper to easily define string ranges for use in Word construction. Borrows 03312 syntax from regexp '[]' string range definitions:: 03313 srange("[0-9]") -> "0123456789" 03314 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" 03315 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" 03316 The input string must be enclosed in []'s, and the returned string is the expanded 03317 character set joined into a single string. 03318 The values enclosed in the []'s may be:: 03319 a single character 03320 an escaped character with a leading backslash (such as \- or \]) 03321 an escaped hex character with a leading '\0x' (\0x21, which is a '!' character) 03322 an escaped octal character with a leading '\0' (\041, which is a '!' character) 03323 a range of any of the above, separated by a dash ('a-z', etc.) 03324 any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.) 03325 """ 03326 try: 03327 return "".join([_expanded(part) for part in _reBracketExpr.parseString(s).body]) 03328 except: 03329 return "" 03330 03331 def matchOnlyAtCol(n): 03332 """Helper method for defining parse actions that require matching at a specific 03333 column in the input text. 03334 """ 03335 def verifyCol(strg,locn,toks): 03336 if col(locn,strg) != n: 03337 raise ParseException(strg,locn,"matched token not at column %d" % n) 03338 return verifyCol 03339 03340 def replaceWith(replStr): 03341 """Helper method for common parse actions that simply return a literal value. Especially 03342 useful when used with transformString(). 03343 """ 03344 def _replFunc(*args): 03345 return [replStr] 03346 return _replFunc 03347 03348 def removeQuotes(s,l,t): 03349 """Helper parse action for removing quotation marks from parsed quoted strings. 03350 To use, add this parse action to quoted string using:: 03351 quotedString.setParseAction( removeQuotes ) 03352 """ 03353 return t[0][1:-1] 03354 03355 def upcaseTokens(s,l,t): 03356 """Helper parse action to convert tokens to upper case.""" 03357 return [ tt.upper() for tt in map(_ustr,t) ] 03358 03359 def downcaseTokens(s,l,t): 03360 """Helper parse action to convert tokens to lower case.""" 03361 return [ tt.lower() for tt in map(_ustr,t) ] 03362 03363 def keepOriginalText(s,startLoc,t): 03364 """Helper parse action to preserve original parsed text, 03365 overriding any nested parse actions.""" 03366 try: 03367 endloc = getTokensEndLoc() 03368 except ParseException: 03369 raise ParseFatalException("incorrect usage of keepOriginalText - may only be called as a parse action") 03370 del t[:] 03371 t += ParseResults(s[startLoc:endloc]) 03372 return t 03373 03374 def getTokensEndLoc(): 03375 """Method to be called from within a parse action to determine the end 03376 location of the parsed tokens.""" 03377 import inspect 03378 fstack = inspect.stack() 03379 try: 03380 # search up the stack (through intervening argument normalizers) for correct calling routine 03381 for f in fstack[2:]: 03382 if f[3] == "_parseNoCache": 03383 endloc = f[0].f_locals["loc"] 03384 return endloc 03385 else: 03386 raise ParseFatalException("incorrect usage of getTokensEndLoc - may only be called from within a parse action") 03387 finally: 03388 del fstack 03389 03390 def _makeTags(tagStr, xml): 03391 """Internal helper to construct opening and closing tag expressions, given a tag name""" 03392 if isinstance(tagStr,basestring): 03393 resname = tagStr 03394 tagStr = Keyword(tagStr, caseless=not xml) 03395 else: 03396 resname = tagStr.name 03397 03398 tagAttrName = Word(alphas,alphanums+"_-:") 03399 if (xml): 03400 tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes ) 03401 openTag = Suppress("<") + tagStr + \ 03402 Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \ 03403 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") 03404 else: 03405 printablesLessRAbrack = "".join( [ c for c in printables if c not in ">" ] ) 03406 tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack) 03407 openTag = Suppress("<") + tagStr + \ 03408 Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \ 03409 Optional( Suppress("=") + tagAttrValue ) ))) + \ 03410 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") 03411 closeTag = Combine(_L("</") + tagStr + ">") 03412 03413 openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % tagStr) 03414 closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % tagStr) 03415 03416 return openTag, closeTag 03417 03418 def makeHTMLTags(tagStr): 03419 """Helper to construct opening and closing tag expressions for HTML, given a tag name""" 03420 return _makeTags( tagStr, False ) 03421 03422 def makeXMLTags(tagStr): 03423 """Helper to construct opening and closing tag expressions for XML, given a tag name""" 03424 return _makeTags( tagStr, True ) 03425 03426 def withAttribute(*args,**attrDict): 03427 """Helper to create a validating parse action to be used with start tags created 03428 with makeXMLTags or makeHTMLTags. Use withAttribute to qualify a starting tag 03429 with a required attribute value, to avoid false matches on common tags such as 03430 <TD> or <DIV>. 03431 03432 Call withAttribute with a series of attribute names and values. Specify the list 03433 of filter attributes names and values as: 03434 - keyword arguments, as in (class="Customer",align="right"), or 03435 - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") ) 03436 For attribute names with a namespace prefix, you must use the second form. Attribute 03437 names are matched insensitive to upper/lower case. 03438 03439 To verify that the attribute exists, but without specifying a value, pass 03440 withAttribute.ANY_VALUE as the value. 03441 """ 03442 if args: 03443 attrs = args[:] 03444 else: 03445 attrs = attrDict.items() 03446 attrs = [(k,v) for k,v in attrs] 03447 def pa(s,l,tokens): 03448 for attrName,attrValue in attrs: 03449 if attrName not in tokens: 03450 raise ParseException(s,l,"no matching attribute " + attrName) 03451 if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue: 03452 raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" % 03453 (attrName, tokens[attrName], attrValue)) 03454 return pa 03455 withAttribute.ANY_VALUE = object() 03456 03457 opAssoc = _Constants() 03458 opAssoc.LEFT = object() 03459 opAssoc.RIGHT = object() 03460 03461 def operatorPrecedence( baseExpr, opList ): 03462 """Helper method for constructing grammars of expressions made up of 03463 operators working in a precedence hierarchy. Operators may be unary or 03464 binary, left- or right-associative. Parse actions can also be attached 03465 to operator expressions. 03466 03467 Parameters: 03468 - baseExpr - expression representing the most basic element for the nested 03469 - opList - list of tuples, one for each operator precedence level in the 03470 expression grammar; each tuple is of the form 03471 (opExpr, numTerms, rightLeftAssoc, parseAction), where: 03472 - opExpr is the pyparsing expression for the operator; 03473 may also be a string, which will be converted to a Literal; 03474 if numTerms is 3, opExpr is a tuple of two expressions, for the 03475 two operators separating the 3 terms 03476 - numTerms is the number of terms for this operator (must 03477 be 1, 2, or 3) 03478 - rightLeftAssoc is the indicator whether the operator is 03479 right or left associative, using the pyparsing-defined 03480 constants opAssoc.RIGHT and opAssoc.LEFT. 03481 - parseAction is the parse action to be associated with 03482 expressions matching this operator expression (the 03483 parse action tuple member may be omitted) 03484 """ 03485 ret = Forward() 03486 lastExpr = baseExpr | ( Suppress('(') + ret + Suppress(')') ) 03487 for i,operDef in enumerate(opList): 03488 opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4] 03489 if arity == 3: 03490 if opExpr is None or len(opExpr) != 2: 03491 raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions") 03492 opExpr1, opExpr2 = opExpr 03493 thisExpr = Forward()#.setName("expr%d" % i) 03494 if rightLeftAssoc == opAssoc.LEFT: 03495 if arity == 1: 03496 matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) ) 03497 elif arity == 2: 03498 if opExpr is not None: 03499 matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) ) 03500 else: 03501 matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) ) 03502 elif arity == 3: 03503 matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \ 03504 Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr ) 03505 else: 03506 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 03507 elif rightLeftAssoc == opAssoc.RIGHT: 03508 if arity == 1: 03509 # try to avoid LR with this extra test 03510 if not isinstance(opExpr, Optional): 03511 opExpr = Optional(opExpr) 03512 matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr ) 03513 elif arity == 2: 03514 if opExpr is not None: 03515 matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) ) 03516 else: 03517 matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) ) 03518 elif arity == 3: 03519 matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \ 03520 Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr ) 03521 else: 03522 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 03523 else: 03524 raise ValueError("operator must indicate right or left associativity") 03525 if pa: 03526 matchExpr.setParseAction( pa ) 03527 thisExpr << ( matchExpr | lastExpr ) 03528 lastExpr = thisExpr 03529 ret << lastExpr 03530 return ret 03531 03532 dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName("string enclosed in double quotes") 03533 sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName("string enclosed in single quotes") 03534 quotedString = Regex(r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*')''').setName("quotedString using single or double quotes") 03535 unicodeString = Combine(_L('u') + quotedString.copy()) 03536 03537 def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString): 03538 """Helper method for defining nested lists enclosed in opening and closing 03539 delimiters ("(" and ")" are the default). 03540 03541 Parameters: 03542 - opener - opening character for a nested list (default="("); can also be a pyparsing expression 03543 - closer - closing character for a nested list (default=")"); can also be a pyparsing expression 03544 - content - expression for items within the nested lists (default=None) 03545 - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString) 03546 03547 If an expression is not provided for the content argument, the nested 03548 expression will capture all whitespace-delimited content between delimiters 03549 as a list of separate values. 03550 03551 Use the ignoreExpr argument to define expressions that may contain 03552 opening or closing characters that should not be treated as opening 03553 or closing characters for nesting, such as quotedString or a comment 03554 expression. Specify multiple expressions using an Or or MatchFirst. 03555 The default is quotedString, but if no expressions are to be ignored, 03556 then pass None for this argument. 03557 """ 03558 if opener == closer: 03559 raise ValueError("opening and closing strings cannot be the same") 03560 if content is None: 03561 if isinstance(opener,basestring) and isinstance(closer,basestring): 03562 if len(opener) == 1 and len(closer)==1: 03563 if ignoreExpr is not None: 03564 content = (Combine(OneOrMore(~ignoreExpr + 03565 CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 03566 ).setParseAction(lambda t:t[0].strip())) 03567 else: 03568 content = (empty+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS 03569 ).setParseAction(lambda t:t[0].strip())) 03570 else: 03571 if ignoreExpr is not None: 03572 content = (Combine(OneOrMore(~ignoreExpr + 03573 ~Literal(opener) + ~Literal(closer) + 03574 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 03575 ).setParseAction(lambda t:t[0].strip())) 03576 else: 03577 content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) + 03578 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 03579 ).setParseAction(lambda t:t[0].strip())) 03580 else: 03581 raise ValueError("opening and closing arguments must be strings if no content expression is given") 03582 ret = Forward() 03583 if ignoreExpr is not None: 03584 ret << Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) ) 03585 else: 03586 ret << Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) ) 03587 return ret 03588 03589 def indentedBlock(blockStatementExpr, indentStack, indent=True): 03590 """Helper method for defining space-delimited indentation blocks, such as 03591 those used to define block statements in Python source code. 03592 03593 Parameters: 03594 - blockStatementExpr - expression defining syntax of statement that 03595 is repeated within the indented block 03596 - indentStack - list created by caller to manage indentation stack 03597 (multiple statementWithIndentedBlock expressions within a single grammar 03598 should share a common indentStack) 03599 - indent - boolean indicating whether block must be indented beyond the 03600 the current level; set to False for block of left-most statements 03601 (default=True) 03602 03603 A valid block must contain at least one blockStatement. 03604 """ 03605 def checkPeerIndent(s,l,t): 03606 if l >= len(s): return 03607 curCol = col(l,s) 03608 if curCol != indentStack[-1]: 03609 if curCol > indentStack[-1]: 03610 raise ParseFatalException(s,l,"illegal nesting") 03611 raise ParseException(s,l,"not a peer entry") 03612 03613 def checkSubIndent(s,l,t): 03614 curCol = col(l,s) 03615 if curCol > indentStack[-1]: 03616 indentStack.append( curCol ) 03617 else: 03618 raise ParseException(s,l,"not a subentry") 03619 03620 def checkUnindent(s,l,t): 03621 if l >= len(s): return 03622 curCol = col(l,s) 03623 if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]): 03624 raise ParseException(s,l,"not an unindent") 03625 indentStack.pop() 03626 03627 NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress()) 03628 INDENT = Empty() + Empty().setParseAction(checkSubIndent) 03629 PEER = Empty().setParseAction(checkPeerIndent) 03630 UNDENT = Empty().setParseAction(checkUnindent) 03631 if indent: 03632 smExpr = Group( Optional(NL) + 03633 FollowedBy(blockStatementExpr) + 03634 INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT) 03635 else: 03636 smExpr = Group( Optional(NL) + 03637 (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) ) 03638 blockStatementExpr.ignore(_bslash + LineEnd()) 03639 return smExpr 03640 03641 alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") 03642 punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") 03643 03644 anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:")) 03645 commonHTMLEntity = Combine(_L("&") + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";").streamline() 03646 _htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),'><& "')) 03647 replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None 03648 03649 # it's easy to get these comment structures wrong - they're very common, so may as well make them available 03650 cStyleComment = Regex(r"/\*(?:[^*]*\*+)+?/").setName("C style comment") 03651 03652 htmlComment = Regex(r"<!--[\s\S]*?-->") 03653 restOfLine = Regex(r".*").leaveWhitespace() 03654 dblSlashComment = Regex(r"\/\/(\\\n|.)*").setName("// comment") 03655 cppStyleComment = Regex(r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?<!\\)|\Z))").setName("C++ style comment") 03656 03657 javaStyleComment = cppStyleComment 03658 pythonStyleComment = Regex(r"#.*").setName("Python style comment") 03659 _noncomma = "".join( [ c for c in printables if c != "," ] ) 03660 _commasepitem = Combine(OneOrMore(Word(_noncomma) + 03661 Optional( Word(" \t") + 03662 ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem") 03663 commaSeparatedList = delimitedList( Optional( quotedString | _commasepitem, default="") ).setName("commaSeparatedList") 03664 03665 03666 if __name__ == "__main__": 03667 03668 def test( teststring ): 03669 try: 03670 tokens = simpleSQL.parseString( teststring ) 03671 tokenlist = tokens.asList() 03672 print (teststring + "->" + str(tokenlist)) 03673 print ("tokens = " + str(tokens)) 03674 print ("tokens.columns = " + str(tokens.columns)) 03675 print ("tokens.tables = " + str(tokens.tables)) 03676 print (tokens.asXML("SQL",True)) 03677 except ParseBaseException,err: 03678 print (teststring + "->") 03679 print (err.line) 03680 print (" "*(err.column-1) + "^") 03681 print (err) 03682 print() 03683 03684 selectToken = CaselessLiteral( "select" ) 03685 fromToken = CaselessLiteral( "from" ) 03686 03687 ident = Word( alphas, alphanums + "_$" ) 03688 columnName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens ) 03689 columnNameList = Group( delimitedList( columnName ) )#.setName("columns") 03690 tableName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens ) 03691 tableNameList = Group( delimitedList( tableName ) )#.setName("tables") 03692 simpleSQL = ( selectToken + \ 03693 ( '*' | columnNameList ).setResultsName( "columns" ) + \ 03694 fromToken + \ 03695 tableNameList.setResultsName( "tables" ) ) 03696 03697 test( "SELECT * from XYZZY, ABC" ) 03698 test( "select * from SYS.XYZZY" ) 03699 test( "Select A from Sys.dual" ) 03700 test( "Select AA,BB,CC from Sys.dual" ) 03701 test( "Select A, B, C from Sys.dual" ) 03702 test( "Select A, B, C from Sys.dual" ) 03703 test( "Xelect A, B, C from Sys.dual" ) 03704 test( "Select A, B, C frox Sys.dual" ) 03705 test( "Select" ) 03706 test( "Select ^^^ frox Sys.dual" ) 03707 test( "Select A, B, C from Sys.dual, Table2 " )