Spaces:

xsigus24
/

text-generation-webui

Running

App Files Files Community

text-generation-webui / installer_files /conda /lib /python3.10 /site-packages /pycparser /ply /lex.py

xsigus24

Upload folder using huggingface_hub

1d777c4 over 1 year ago

raw

history blame contribute delete

42.9 kB

	# -----------------------------------------------------------------------------
	# ply: lex.py
	#
	# Copyright (C) 2001-2017
	# David M. Beazley (Dabeaz LLC)
	# All rights reserved.
	#
	# Redistribution and use in source and binary forms, with or without
	# modification, are permitted provided that the following conditions are
	# met:
	#
	# * Redistributions of source code must retain the above copyright notice,
	# this list of conditions and the following disclaimer.
	# * Redistributions in binary form must reproduce the above copyright notice,
	# this list of conditions and the following disclaimer in the documentation
	# and/or other materials provided with the distribution.
	# * Neither the name of the David Beazley or Dabeaz LLC may be used to
	# endorse or promote products derived from this software without
	# specific prior written permission.
	#
	# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
	# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
	# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
	# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
	# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
	# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
	# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
	# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	# -----------------------------------------------------------------------------

	__version__ = '3.10'
	__tabversion__ = '3.10'

	import re
	import sys
	import types
	import copy
	import os
	import inspect

	# This tuple contains known string types
	try:
	# Python 2.6
	StringTypes = (types.StringType, types.UnicodeType)
	except AttributeError:
	# Python 3.0
	StringTypes = (str, bytes)

	# This regular expression is used to match valid token names
	_is_identifier = re.compile(r'^[a-zA-Z0-9_]+$')

	# Exception thrown when invalid token encountered and no default error
	# handler is defined.
	class LexError(Exception):
	def __init__(self, message, s):
	self.args = (message,)
	self.text = s


	# Token class. This class is used to represent the tokens produced.
	class LexToken(object):
	def __str__(self):
	return 'LexToken(%s,%r,%d,%d)' % (self.type, self.value, self.lineno, self.lexpos)

	def __repr__(self):
	return str(self)


	# This object is a stand-in for a logging object created by the
	# logging module.

	class PlyLogger(object):
	def __init__(self, f):
	self.f = f

	def critical(self, msg, args, *kwargs):
	self.f.write((msg % args) + '\n')

	def warning(self, msg, args, *kwargs):
	self.f.write('WARNING: ' + (msg % args) + '\n')

	def error(self, msg, args, *kwargs):
	self.f.write('ERROR: ' + (msg % args) + '\n')

	info = critical
	debug = critical


	# Null logger is used when no output is generated. Does nothing.
	class NullLogger(object):
	def __getattribute__(self, name):
	return self

	def __call__(self, args, *kwargs):
	return self


	# -----------------------------------------------------------------------------
	# === Lexing Engine ===
	#
	# The following Lexer class implements the lexer runtime. There are only
	# a few public methods and attributes:
	#
	# input() - Store a new string in the lexer
	# token() - Get the next token
	# clone() - Clone the lexer
	#
	# lineno - Current line number
	# lexpos - Current position in the input string
	# -----------------------------------------------------------------------------

	class Lexer:
	def __init__(self):
	self.lexre = None # Master regular expression. This is a list of
	# tuples (re, findex) where re is a compiled
	# regular expression and findex is a list
	# mapping regex group numbers to rules
	self.lexretext = None # Current regular expression strings
	self.lexstatere = {} # Dictionary mapping lexer states to master regexs
	self.lexstateretext = {} # Dictionary mapping lexer states to regex strings
	self.lexstaterenames = {} # Dictionary mapping lexer states to symbol names
	self.lexstate = 'INITIAL' # Current lexer state
	self.lexstatestack = [] # Stack of lexer states
	self.lexstateinfo = None # State information
	self.lexstateignore = {} # Dictionary of ignored characters for each state
	self.lexstateerrorf = {} # Dictionary of error functions for each state
	self.lexstateeoff = {} # Dictionary of eof functions for each state
	self.lexreflags = 0 # Optional re compile flags
	self.lexdata = None # Actual input data (as a string)
	self.lexpos = 0 # Current position in input text
	self.lexlen = 0 # Length of the input text
	self.lexerrorf = None # Error rule (if any)
	self.lexeoff = None # EOF rule (if any)
	self.lextokens = None # List of valid tokens
	self.lexignore = '' # Ignored characters
	self.lexliterals = '' # Literal characters that can be passed through
	self.lexmodule = None # Module
	self.lineno = 1 # Current line number
	self.lexoptimize = False # Optimized mode

	def clone(self, object=None):
	c = copy.copy(self)

	# If the object parameter has been supplied, it means we are attaching the
	# lexer to a new object. In this case, we have to rebind all methods in
	# the lexstatere and lexstateerrorf tables.

	if object:
	newtab = {}
	for key, ritem in self.lexstatere.items():
	newre = []
	for cre, findex in ritem:
	newfindex = []
	for f in findex:
	if not f or not f[0]:
	newfindex.append(f)
	continue
	newfindex.append((getattr(object, f[0].__name__), f[1]))
	newre.append((cre, newfindex))
	newtab[key] = newre
	c.lexstatere = newtab
	c.lexstateerrorf = {}
	for key, ef in self.lexstateerrorf.items():
	c.lexstateerrorf[key] = getattr(object, ef.__name__)
	c.lexmodule = object
	return c

	# ------------------------------------------------------------
	# writetab() - Write lexer information to a table file
	# ------------------------------------------------------------
	def writetab(self, lextab, outputdir=''):
	if isinstance(lextab, types.ModuleType):
	raise IOError("Won't overwrite existing lextab module")
	basetabmodule = lextab.split('.')[-1]
	filename = os.path.join(outputdir, basetabmodule) + '.py'
	with open(filename, 'w') as tf:
	tf.write('# %s.py. This file automatically created by PLY (version %s). Don\'t edit!\n' % (basetabmodule, __version__))
	tf.write('_tabversion = %s\n' % repr(__tabversion__))
	tf.write('_lextokens = set(%s)\n' % repr(tuple(self.lextokens)))
	tf.write('_lexreflags = %s\n' % repr(self.lexreflags))
	tf.write('_lexliterals = %s\n' % repr(self.lexliterals))
	tf.write('_lexstateinfo = %s\n' % repr(self.lexstateinfo))

	# Rewrite the lexstatere table, replacing function objects with function names
	tabre = {}
	for statename, lre in self.lexstatere.items():
	titem = []
	for (pat, func), retext, renames in zip(lre, self.lexstateretext[statename], self.lexstaterenames[statename]):
	titem.append((retext, _funcs_to_names(func, renames)))
	tabre[statename] = titem

	tf.write('_lexstatere = %s\n' % repr(tabre))
	tf.write('_lexstateignore = %s\n' % repr(self.lexstateignore))

	taberr = {}
	for statename, ef in self.lexstateerrorf.items():
	taberr[statename] = ef.__name__ if ef else None
	tf.write('_lexstateerrorf = %s\n' % repr(taberr))

	tabeof = {}
	for statename, ef in self.lexstateeoff.items():
	tabeof[statename] = ef.__name__ if ef else None
	tf.write('_lexstateeoff = %s\n' % repr(tabeof))

	# ------------------------------------------------------------
	# readtab() - Read lexer information from a tab file
	# ------------------------------------------------------------
	def readtab(self, tabfile, fdict):
	if isinstance(tabfile, types.ModuleType):
	lextab = tabfile
	else:
	exec('import %s' % tabfile)
	lextab = sys.modules[tabfile]

	if getattr(lextab, '_tabversion', '0.0') != __tabversion__:
	raise ImportError('Inconsistent PLY version')

	self.lextokens = lextab._lextokens
	self.lexreflags = lextab._lexreflags
	self.lexliterals = lextab._lexliterals
	self.lextokens_all = self.lextokens \| set(self.lexliterals)
	self.lexstateinfo = lextab._lexstateinfo
	self.lexstateignore = lextab._lexstateignore
	self.lexstatere = {}
	self.lexstateretext = {}
	for statename, lre in lextab._lexstatere.items():
	titem = []
	txtitem = []
	for pat, func_name in lre:
	titem.append((re.compile(pat, lextab._lexreflags), _names_to_funcs(func_name, fdict)))

	self.lexstatere[statename] = titem
	self.lexstateretext[statename] = txtitem

	self.lexstateerrorf = {}
	for statename, ef in lextab._lexstateerrorf.items():
	self.lexstateerrorf[statename] = fdict[ef]

	self.lexstateeoff = {}
	for statename, ef in lextab._lexstateeoff.items():
	self.lexstateeoff[statename] = fdict[ef]

	self.begin('INITIAL')

	# ------------------------------------------------------------
	# input() - Push a new string into the lexer
	# ------------------------------------------------------------
	def input(self, s):
	# Pull off the first character to see if s looks like a string
	c = s[:1]
	if not isinstance(c, StringTypes):
	raise ValueError('Expected a string')
	self.lexdata = s
	self.lexpos = 0
	self.lexlen = len(s)

	# ------------------------------------------------------------
	# begin() - Changes the lexing state
	# ------------------------------------------------------------
	def begin(self, state):
	if state not in self.lexstatere:
	raise ValueError('Undefined state')
	self.lexre = self.lexstatere[state]
	self.lexretext = self.lexstateretext[state]
	self.lexignore = self.lexstateignore.get(state, '')
	self.lexerrorf = self.lexstateerrorf.get(state, None)
	self.lexeoff = self.lexstateeoff.get(state, None)
	self.lexstate = state

	# ------------------------------------------------------------
	# push_state() - Changes the lexing state and saves old on stack
	# ------------------------------------------------------------
	def push_state(self, state):
	self.lexstatestack.append(self.lexstate)
	self.begin(state)

	# ------------------------------------------------------------
	# pop_state() - Restores the previous state
	# ------------------------------------------------------------
	def pop_state(self):
	self.begin(self.lexstatestack.pop())

	# ------------------------------------------------------------
	# current_state() - Returns the current lexing state
	# ------------------------------------------------------------
	def current_state(self):
	return self.lexstate

	# ------------------------------------------------------------
	# skip() - Skip ahead n characters
	# ------------------------------------------------------------
	def skip(self, n):
	self.lexpos += n

	# ------------------------------------------------------------
	# opttoken() - Return the next token from the Lexer
	#
	# Note: This function has been carefully implemented to be as fast
	# as possible. Don't make changes unless you really know what
	# you are doing
	# ------------------------------------------------------------
	def token(self):
	# Make local copies of frequently referenced attributes
	lexpos = self.lexpos
	lexlen = self.lexlen
	lexignore = self.lexignore
	lexdata = self.lexdata

	while lexpos < lexlen:
	# This code provides some short-circuit code for whitespace, tabs, and other ignored characters
	if lexdata[lexpos] in lexignore:
	lexpos += 1
	continue

	# Look for a regular expression match
	for lexre, lexindexfunc in self.lexre:
	m = lexre.match(lexdata, lexpos)
	if not m:
	continue

	# Create a token for return
	tok = LexToken()
	tok.value = m.group()
	tok.lineno = self.lineno
	tok.lexpos = lexpos

	i = m.lastindex
	func, tok.type = lexindexfunc[i]

	if not func:
	# If no token type was set, it's an ignored token
	if tok.type:
	self.lexpos = m.end()
	return tok
	else:
	lexpos = m.end()
	break

	lexpos = m.end()

	# If token is processed by a function, call it

	tok.lexer = self # Set additional attributes useful in token rules
	self.lexmatch = m
	self.lexpos = lexpos

	newtok = func(tok)

	# Every function must return a token, if nothing, we just move to next token
	if not newtok:
	lexpos = self.lexpos # This is here in case user has updated lexpos.
	lexignore = self.lexignore # This is here in case there was a state change
	break

	# Verify type of the token. If not in the token map, raise an error
	if not self.lexoptimize:
	if newtok.type not in self.lextokens_all:
	raise LexError("%s:%d: Rule '%s' returned an unknown token type '%s'" % (
	func.__code__.co_filename, func.__code__.co_firstlineno,
	func.__name__, newtok.type), lexdata[lexpos:])

	return newtok
	else:
	# No match, see if in literals
	if lexdata[lexpos] in self.lexliterals:
	tok = LexToken()
	tok.value = lexdata[lexpos]
	tok.lineno = self.lineno
	tok.type = tok.value
	tok.lexpos = lexpos
	self.lexpos = lexpos + 1
	return tok

	# No match. Call t_error() if defined.
	if self.lexerrorf:
	tok = LexToken()
	tok.value = self.lexdata[lexpos:]
	tok.lineno = self.lineno
	tok.type = 'error'
	tok.lexer = self
	tok.lexpos = lexpos
	self.lexpos = lexpos
	newtok = self.lexerrorf(tok)
	if lexpos == self.lexpos:
	# Error method didn't change text position at all. This is an error.
	raise LexError("Scanning error. Illegal character '%s'" % (lexdata[lexpos]), lexdata[lexpos:])
	lexpos = self.lexpos
	if not newtok:
	continue
	return newtok

	self.lexpos = lexpos
	raise LexError("Illegal character '%s' at index %d" % (lexdata[lexpos], lexpos), lexdata[lexpos:])

	if self.lexeoff:
	tok = LexToken()
	tok.type = 'eof'
	tok.value = ''
	tok.lineno = self.lineno
	tok.lexpos = lexpos
	tok.lexer = self
	self.lexpos = lexpos
	newtok = self.lexeoff(tok)
	return newtok

	self.lexpos = lexpos + 1
	if self.lexdata is None:
	raise RuntimeError('No input string given with input()')
	return None

	# Iterator interface
	def __iter__(self):
	return self

	def next(self):
	t = self.token()
	if t is None:
	raise StopIteration
	return t

	__next__ = next

	# -----------------------------------------------------------------------------
	# ==== Lex Builder ===
	#
	# The functions and classes below are used to collect lexing information
	# and build a Lexer object from it.
	# -----------------------------------------------------------------------------

	# -----------------------------------------------------------------------------
	# _get_regex(func)
	#
	# Returns the regular expression assigned to a function either as a doc string
	# or as a .regex attribute attached by the @TOKEN decorator.
	# -----------------------------------------------------------------------------
	def _get_regex(func):
	return getattr(func, 'regex', func.__doc__)

	# -----------------------------------------------------------------------------
	# get_caller_module_dict()
	#
	# This function returns a dictionary containing all of the symbols defined within
	# a caller further down the call stack. This is used to get the environment
	# associated with the yacc() call if none was provided.
	# -----------------------------------------------------------------------------
	def get_caller_module_dict(levels):
	f = sys._getframe(levels)
	ldict = f.f_globals.copy()
	if f.f_globals != f.f_locals:
	ldict.update(f.f_locals)
	return ldict

	# -----------------------------------------------------------------------------
	# _funcs_to_names()
	#
	# Given a list of regular expression functions, this converts it to a list
	# suitable for output to a table file
	# -----------------------------------------------------------------------------
	def _funcs_to_names(funclist, namelist):
	result = []
	for f, name in zip(funclist, namelist):
	if f and f[0]:
	result.append((name, f[1]))
	else:
	result.append(f)
	return result

	# -----------------------------------------------------------------------------
	# _names_to_funcs()
	#
	# Given a list of regular expression function names, this converts it back to
	# functions.
	# -----------------------------------------------------------------------------
	def _names_to_funcs(namelist, fdict):
	result = []
	for n in namelist:
	if n and n[0]:
	result.append((fdict[n[0]], n[1]))
	else:
	result.append(n)
	return result

	# -----------------------------------------------------------------------------
	# _form_master_re()
	#
	# This function takes a list of all of the regex components and attempts to
	# form the master regular expression. Given limitations in the Python re
	# module, it may be necessary to break the master regex into separate expressions.
	# -----------------------------------------------------------------------------
	def _form_master_re(relist, reflags, ldict, toknames):
	if not relist:
	return []
	regex = '\|'.join(relist)
	try:
	lexre = re.compile(regex, reflags)

	# Build the index to function map for the matching engine
	lexindexfunc = [None] * (max(lexre.groupindex.values()) + 1)
	lexindexnames = lexindexfunc[:]

	for f, i in lexre.groupindex.items():
	handle = ldict.get(f, None)
	if type(handle) in (types.FunctionType, types.MethodType):
	lexindexfunc[i] = (handle, toknames[f])
	lexindexnames[i] = f
	elif handle is not None:
	lexindexnames[i] = f
	if f.find('ignore_') > 0:
	lexindexfunc[i] = (None, None)
	else:
	lexindexfunc[i] = (None, toknames[f])

	return [(lexre, lexindexfunc)], [regex], [lexindexnames]
	except Exception:
	m = int(len(relist)/2)
	if m == 0:
	m = 1
	llist, lre, lnames = _form_master_re(relist[:m], reflags, ldict, toknames)
	rlist, rre, rnames = _form_master_re(relist[m:], reflags, ldict, toknames)
	return (llist+rlist), (lre+rre), (lnames+rnames)

	# -----------------------------------------------------------------------------
	# def _statetoken(s,names)
	#
	# Given a declaration name s of the form "t_" and a dictionary whose keys are
	# state names, this function returns a tuple (states,tokenname) where states
	# is a tuple of state names and tokenname is the name of the token. For example,
	# calling this with s = "t_foo_bar_SPAM" might return (('foo','bar'),'SPAM')
	# -----------------------------------------------------------------------------
	def _statetoken(s, names):
	nonstate = 1
	parts = s.split('_')
	for i, part in enumerate(parts[1:], 1):
	if part not in names and part != 'ANY':
	break

	if i > 1:
	states = tuple(parts[1:i])
	else:
	states = ('INITIAL',)

	if 'ANY' in states:
	states = tuple(names)

	tokenname = '_'.join(parts[i:])
	return (states, tokenname)


	# -----------------------------------------------------------------------------
	# LexerReflect()
	#
	# This class represents information needed to build a lexer as extracted from a
	# user's input file.
	# -----------------------------------------------------------------------------
	class LexerReflect(object):
	def __init__(self, ldict, log=None, reflags=0):
	self.ldict = ldict
	self.error_func = None
	self.tokens = []
	self.reflags = reflags
	self.stateinfo = {'INITIAL': 'inclusive'}
	self.modules = set()
	self.error = False
	self.log = PlyLogger(sys.stderr) if log is None else log

	# Get all of the basic information
	def get_all(self):
	self.get_tokens()
	self.get_literals()
	self.get_states()
	self.get_rules()

	# Validate all of the information
	def validate_all(self):
	self.validate_tokens()
	self.validate_literals()
	self.validate_rules()
	return self.error

	# Get the tokens map
	def get_tokens(self):
	tokens = self.ldict.get('tokens', None)
	if not tokens:
	self.log.error('No token list is defined')
	self.error = True
	return

	if not isinstance(tokens, (list, tuple)):
	self.log.error('tokens must be a list or tuple')
	self.error = True
	return

	if not tokens:
	self.log.error('tokens is empty')
	self.error = True
	return

	self.tokens = tokens

	# Validate the tokens
	def validate_tokens(self):
	terminals = {}
	for n in self.tokens:
	if not _is_identifier.match(n):
	self.log.error("Bad token name '%s'", n)
	self.error = True
	if n in terminals:
	self.log.warning("Token '%s' multiply defined", n)
	terminals[n] = 1

	# Get the literals specifier
	def get_literals(self):
	self.literals = self.ldict.get('literals', '')
	if not self.literals:
	self.literals = ''

	# Validate literals
	def validate_literals(self):
	try:
	for c in self.literals:
	if not isinstance(c, StringTypes) or len(c) > 1:
	self.log.error('Invalid literal %s. Must be a single character', repr(c))
	self.error = True

	except TypeError:
	self.log.error('Invalid literals specification. literals must be a sequence of characters')
	self.error = True

	def get_states(self):
	self.states = self.ldict.get('states', None)
	# Build statemap
	if self.states:
	if not isinstance(self.states, (tuple, list)):
	self.log.error('states must be defined as a tuple or list')
	self.error = True
	else:
	for s in self.states:
	if not isinstance(s, tuple) or len(s) != 2:
	self.log.error("Invalid state specifier %s. Must be a tuple (statename,'exclusive\|inclusive')", repr(s))
	self.error = True
	continue
	name, statetype = s
	if not isinstance(name, StringTypes):
	self.log.error('State name %s must be a string', repr(name))
	self.error = True
	continue
	if not (statetype == 'inclusive' or statetype == 'exclusive'):
	self.log.error("State type for state %s must be 'inclusive' or 'exclusive'", name)
	self.error = True
	continue
	if name in self.stateinfo:
	self.log.error("State '%s' already defined", name)
	self.error = True
	continue
	self.stateinfo[name] = statetype

	# Get all of the symbols with a t_ prefix and sort them into various
	# categories (functions, strings, error functions, and ignore characters)

	def get_rules(self):
	tsymbols = [f for f in self.ldict if f[:2] == 't_']

	# Now build up a list of functions and a list of strings
	self.toknames = {} # Mapping of symbols to token names
	self.funcsym = {} # Symbols defined as functions
	self.strsym = {} # Symbols defined as strings
	self.ignore = {} # Ignore strings by state
	self.errorf = {} # Error functions by state
	self.eoff = {} # EOF functions by state

	for s in self.stateinfo:
	self.funcsym[s] = []
	self.strsym[s] = []

	if len(tsymbols) == 0:
	self.log.error('No rules of the form t_rulename are defined')
	self.error = True
	return

	for f in tsymbols:
	t = self.ldict[f]
	states, tokname = _statetoken(f, self.stateinfo)
	self.toknames[f] = tokname

	if hasattr(t, '__call__'):
	if tokname == 'error':
	for s in states:
	self.errorf[s] = t
	elif tokname == 'eof':
	for s in states:
	self.eoff[s] = t
	elif tokname == 'ignore':
	line = t.__code__.co_firstlineno
	file = t.__code__.co_filename
	self.log.error("%s:%d: Rule '%s' must be defined as a string", file, line, t.__name__)
	self.error = True
	else:
	for s in states:
	self.funcsym[s].append((f, t))
	elif isinstance(t, StringTypes):
	if tokname == 'ignore':
	for s in states:
	self.ignore[s] = t
	if '\\' in t:
	self.log.warning("%s contains a literal backslash '\\'", f)

	elif tokname == 'error':
	self.log.error("Rule '%s' must be defined as a function", f)
	self.error = True
	else:
	for s in states:
	self.strsym[s].append((f, t))
	else:
	self.log.error('%s not defined as a function or string', f)
	self.error = True

	# Sort the functions by line number
	for f in self.funcsym.values():
	f.sort(key=lambda x: x[1].__code__.co_firstlineno)

	# Sort the strings by regular expression length
	for s in self.strsym.values():
	s.sort(key=lambda x: len(x[1]), reverse=True)

	# Validate all of the t_rules collected
	def validate_rules(self):
	for state in self.stateinfo:
	# Validate all rules defined by functions

	for fname, f in self.funcsym[state]:
	line = f.__code__.co_firstlineno
	file = f.__code__.co_filename
	module = inspect.getmodule(f)
	self.modules.add(module)

	tokname = self.toknames[fname]
	if isinstance(f, types.MethodType):
	reqargs = 2
	else:
	reqargs = 1
	nargs = f.__code__.co_argcount
	if nargs > reqargs:
	self.log.error("%s:%d: Rule '%s' has too many arguments", file, line, f.__name__)
	self.error = True
	continue

	if nargs < reqargs:
	self.log.error("%s:%d: Rule '%s' requires an argument", file, line, f.__name__)
	self.error = True
	continue

	if not _get_regex(f):
	self.log.error("%s:%d: No regular expression defined for rule '%s'", file, line, f.__name__)
	self.error = True
	continue

	try:
	c = re.compile('(?P<%s>%s)' % (fname, _get_regex(f)), self.reflags)
	if c.match(''):
	self.log.error("%s:%d: Regular expression for rule '%s' matches empty string", file, line, f.__name__)
	self.error = True
	except re.error as e:
	self.log.error("%s:%d: Invalid regular expression for rule '%s'. %s", file, line, f.__name__, e)
	if '#' in _get_regex(f):
	self.log.error("%s:%d. Make sure '#' in rule '%s' is escaped with '\\#'", file, line, f.__name__)
	self.error = True

	# Validate all rules defined by strings
	for name, r in self.strsym[state]:
	tokname = self.toknames[name]
	if tokname == 'error':
	self.log.error("Rule '%s' must be defined as a function", name)
	self.error = True
	continue

	if tokname not in self.tokens and tokname.find('ignore_') < 0:
	self.log.error("Rule '%s' defined for an unspecified token %s", name, tokname)
	self.error = True
	continue

	try:
	c = re.compile('(?P<%s>%s)' % (name, r), self.reflags)
	if (c.match('')):
	self.log.error("Regular expression for rule '%s' matches empty string", name)
	self.error = True
	except re.error as e:
	self.log.error("Invalid regular expression for rule '%s'. %s", name, e)
	if '#' in r:
	self.log.error("Make sure '#' in rule '%s' is escaped with '\\#'", name)
	self.error = True

	if not self.funcsym[state] and not self.strsym[state]:
	self.log.error("No rules defined for state '%s'", state)
	self.error = True

	# Validate the error function
	efunc = self.errorf.get(state, None)
	if efunc:
	f = efunc
	line = f.__code__.co_firstlineno
	file = f.__code__.co_filename
	module = inspect.getmodule(f)
	self.modules.add(module)

	if isinstance(f, types.MethodType):
	reqargs = 2
	else:
	reqargs = 1
	nargs = f.__code__.co_argcount
	if nargs > reqargs:
	self.log.error("%s:%d: Rule '%s' has too many arguments", file, line, f.__name__)
	self.error = True

	if nargs < reqargs:
	self.log.error("%s:%d: Rule '%s' requires an argument", file, line, f.__name__)
	self.error = True

	for module in self.modules:
	self.validate_module(module)

	# -----------------------------------------------------------------------------
	# validate_module()
	#
	# This checks to see if there are duplicated t_rulename() functions or strings
	# in the parser input file. This is done using a simple regular expression
	# match on each line in the source code of the given module.
	# -----------------------------------------------------------------------------

	def validate_module(self, module):
	try:
	lines, linen = inspect.getsourcelines(module)
	except IOError:
	return

	fre = re.compile(r'\sdef\s+(t_[a-zA-Z_0-9])\(')
	sre = re.compile(r'\s(t_[a-zA-Z_0-9])\s*=')

	counthash = {}
	linen += 1
	for line in lines:
	m = fre.match(line)
	if not m:
	m = sre.match(line)
	if m:
	name = m.group(1)
	prev = counthash.get(name)
	if not prev:
	counthash[name] = linen
	else:
	filename = inspect.getsourcefile(module)
	self.log.error('%s:%d: Rule %s redefined. Previously defined on line %d', filename, linen, name, prev)
	self.error = True
	linen += 1

	# -----------------------------------------------------------------------------
	# lex(module)
	#
	# Build all of the regular expression rules from definitions in the supplied module
	# -----------------------------------------------------------------------------
	def lex(module=None, object=None, debug=False, optimize=False, lextab='lextab',
	reflags=int(re.VERBOSE), nowarn=False, outputdir=None, debuglog=None, errorlog=None):

	if lextab is None:
	lextab = 'lextab'

	global lexer

	ldict = None
	stateinfo = {'INITIAL': 'inclusive'}
	lexobj = Lexer()
	lexobj.lexoptimize = optimize
	global token, input

	if errorlog is None:
	errorlog = PlyLogger(sys.stderr)

	if debug:
	if debuglog is None:
	debuglog = PlyLogger(sys.stderr)

	# Get the module dictionary used for the lexer
	if object:
	module = object

	# Get the module dictionary used for the parser
	if module:
	_items = [(k, getattr(module, k)) for k in dir(module)]
	ldict = dict(_items)
	# If no __file__ attribute is available, try to obtain it from the __module__ instead
	if '__file__' not in ldict:
	ldict['__file__'] = sys.modules[ldict['__module__']].__file__
	else:
	ldict = get_caller_module_dict(2)

	# Determine if the module is package of a package or not.
	# If so, fix the tabmodule setting so that tables load correctly
	pkg = ldict.get('__package__')
	if pkg and isinstance(lextab, str):
	if '.' not in lextab:
	lextab = pkg + '.' + lextab

	# Collect parser information from the dictionary
	linfo = LexerReflect(ldict, log=errorlog, reflags=reflags)
	linfo.get_all()
	if not optimize:
	if linfo.validate_all():
	raise SyntaxError("Can't build lexer")

	if optimize and lextab:
	try:
	lexobj.readtab(lextab, ldict)
	token = lexobj.token
	input = lexobj.input
	lexer = lexobj
	return lexobj

	except ImportError:
	pass

	# Dump some basic debugging information
	if debug:
	debuglog.info('lex: tokens = %r', linfo.tokens)
	debuglog.info('lex: literals = %r', linfo.literals)
	debuglog.info('lex: states = %r', linfo.stateinfo)

	# Build a dictionary of valid token names
	lexobj.lextokens = set()
	for n in linfo.tokens:
	lexobj.lextokens.add(n)

	# Get literals specification
	if isinstance(linfo.literals, (list, tuple)):
	lexobj.lexliterals = type(linfo.literals[0])().join(linfo.literals)
	else:
	lexobj.lexliterals = linfo.literals

	lexobj.lextokens_all = lexobj.lextokens \| set(lexobj.lexliterals)

	# Get the stateinfo dictionary
	stateinfo = linfo.stateinfo

	regexs = {}
	# Build the master regular expressions
	for state in stateinfo:
	regex_list = []

	# Add rules defined by functions first
	for fname, f in linfo.funcsym[state]:
	line = f.__code__.co_firstlineno
	file = f.__code__.co_filename
	regex_list.append('(?P<%s>%s)' % (fname, _get_regex(f)))
	if debug:
	debuglog.info("lex: Adding rule %s -> '%s' (state '%s')", fname, _get_regex(f), state)

	# Now add all of the simple rules
	for name, r in linfo.strsym[state]:
	regex_list.append('(?P<%s>%s)' % (name, r))
	if debug:
	debuglog.info("lex: Adding rule %s -> '%s' (state '%s')", name, r, state)

	regexs[state] = regex_list

	# Build the master regular expressions

	if debug:
	debuglog.info('lex: ==== MASTER REGEXS FOLLOW ====')

	for state in regexs:
	lexre, re_text, re_names = _form_master_re(regexs[state], reflags, ldict, linfo.toknames)
	lexobj.lexstatere[state] = lexre
	lexobj.lexstateretext[state] = re_text
	lexobj.lexstaterenames[state] = re_names
	if debug:
	for i, text in enumerate(re_text):
	debuglog.info("lex: state '%s' : regex[%d] = '%s'", state, i, text)

	# For inclusive states, we need to add the regular expressions from the INITIAL state
	for state, stype in stateinfo.items():
	if state != 'INITIAL' and stype == 'inclusive':
	lexobj.lexstatere[state].extend(lexobj.lexstatere['INITIAL'])
	lexobj.lexstateretext[state].extend(lexobj.lexstateretext['INITIAL'])
	lexobj.lexstaterenames[state].extend(lexobj.lexstaterenames['INITIAL'])

	lexobj.lexstateinfo = stateinfo
	lexobj.lexre = lexobj.lexstatere['INITIAL']
	lexobj.lexretext = lexobj.lexstateretext['INITIAL']
	lexobj.lexreflags = reflags

	# Set up ignore variables
	lexobj.lexstateignore = linfo.ignore
	lexobj.lexignore = lexobj.lexstateignore.get('INITIAL', '')

	# Set up error functions
	lexobj.lexstateerrorf = linfo.errorf
	lexobj.lexerrorf = linfo.errorf.get('INITIAL', None)
	if not lexobj.lexerrorf:
	errorlog.warning('No t_error rule is defined')

	# Set up eof functions
	lexobj.lexstateeoff = linfo.eoff
	lexobj.lexeoff = linfo.eoff.get('INITIAL', None)

	# Check state information for ignore and error rules
	for s, stype in stateinfo.items():
	if stype == 'exclusive':
	if s not in linfo.errorf:
	errorlog.warning("No error rule is defined for exclusive state '%s'", s)
	if s not in linfo.ignore and lexobj.lexignore:
	errorlog.warning("No ignore rule is defined for exclusive state '%s'", s)
	elif stype == 'inclusive':
	if s not in linfo.errorf:
	linfo.errorf[s] = linfo.errorf.get('INITIAL', None)
	if s not in linfo.ignore:
	linfo.ignore[s] = linfo.ignore.get('INITIAL', '')

	# Create global versions of the token() and input() functions
	token = lexobj.token
	input = lexobj.input
	lexer = lexobj

	# If in optimize mode, we write the lextab
	if lextab and optimize:
	if outputdir is None:
	# If no output directory is set, the location of the output files
	# is determined according to the following rules:
	# - If lextab specifies a package, files go into that package directory
	# - Otherwise, files go in the same directory as the specifying module
	if isinstance(lextab, types.ModuleType):
	srcfile = lextab.__file__
	else:
	if '.' not in lextab:
	srcfile = ldict['__file__']
	else:
	parts = lextab.split('.')
	pkgname = '.'.join(parts[:-1])
	exec('import %s' % pkgname)
	srcfile = getattr(sys.modules[pkgname], '__file__', '')
	outputdir = os.path.dirname(srcfile)
	try:
	lexobj.writetab(lextab, outputdir)
	except IOError as e:
	errorlog.warning("Couldn't write lextab module %r. %s" % (lextab, e))

	return lexobj

	# -----------------------------------------------------------------------------
	# runmain()
	#
	# This runs the lexer as a main program
	# -----------------------------------------------------------------------------

	def runmain(lexer=None, data=None):
	if not data:
	try:
	filename = sys.argv[1]
	f = open(filename)
	data = f.read()
	f.close()
	except IndexError:
	sys.stdout.write('Reading from standard input (type EOF to end):\n')
	data = sys.stdin.read()

	if lexer:
	_input = lexer.input
	else:
	_input = input
	_input(data)
	if lexer:
	_token = lexer.token
	else:
	_token = token

	while True:
	tok = _token()
	if not tok:
	break
	sys.stdout.write('(%s,%r,%d,%d)\n' % (tok.type, tok.value, tok.lineno, tok.lexpos))

	# -----------------------------------------------------------------------------
	# @TOKEN(regex)
	#
	# This decorator function can be used to set the regex expression on a function
	# when its docstring might need to be set in an alternative way
	# -----------------------------------------------------------------------------

	def TOKEN(r):
	def set_regex(f):
	if hasattr(r, '__call__'):
	f.regex = _get_regex(r)
	else:
	f.regex = r
	return f
	return set_regex

	# Alternative spelling of the TOKEN decorator
	Token = TOKEN