blob: 53754f86087d7d0c089a833d6dd029d639cf84de [file] [log] [blame]
#!/usr/bin/env python
"""
(c) 2002, 2003, 2004, 2005 Simon Burton <simon@arrowtheory.com>
Released under GNU LGPL license.
"""
import sys
from lexer import Lexer
from parse_core import Symbols, Parser
import node as node_module
class Node(node_module.Node):
def is_typedef(self):
for x in self:
if isinstance(x,Node):
if x.is_typedef():
return 1
return 0
#def explain(self):
#l = []
#for x in self:
#if isinstance(x,Node):
#l.append(x.explain())
#else:
#l.append(str(x))
#return string.join(l," ")
##(self.__class__.__name__,string.join(l) )
def psource(self):
if hasattr(self,'lines'):
print "# "+string.join(self.lines,"\n# ")+"\n"
###################################################################
#
###################################################################
#
class BasicType(Node):
" int char short etc. "
def __init__(self,name):
Node.__init__(self,name)
class Qualifier(Node):
"""
"""
def __init__(self,name):
Node.__init__(self,name)
self.name=name
class StorageClass(Node):
"""
"""
def __init__(self,name):
Node.__init__(self,name)
self.name=name
class Typedef(StorageClass):
"""
"""
def __init__(self,s='typedef'):
Node.__init__(self,s)
#def explain(self):
#return "type"
class Ellipses(Node):
"""
"""
def __init__(self,s='...'):
Node.__init__(self,s)
class GCCBuiltin(BasicType):
"""
"""
pass
class Identifier(Node):
"""
"""
def __init__(self,name="",*items):
if name or 1:
Node.__init__(self,name,*items)
else:
Node.__init__(self)
self.name=name
class Function(Node,Parser):
"""
"""
def __init__(self,*items):
Node.__init__(self,*items)
def parse(self,lexer,symbols):
symbols = Symbols(symbols)
args = ''
#lexer.get_token()
if lexer.tok != ')':
if not lexer.tok:
self.parse_error(lexer)
#lexer.unget_token() # unget start of decl
while lexer.tok != ')':
node = ParameterDeclaration()
node.parse(lexer,symbols)
self.append( node )
if lexer.tok != ')' and lexer.tok != ',':
self.parse_error(lexer)
if lexer.tok == ',':
lexer.get_token()
lexer.get_token()
class Pointer(Node):
"""
"""
def __init__(self,*items):
Node.__init__(self,*items)
class Array(Node,Parser):
"""
"""
def __init__(self,*items):
Node.__init__(self,*items)
def parse(self,lexer,symbols):
lexer.get_token() # a number or ']'
# XX
# HACK HACK: constant c expressions can appear in here:
# eg. [ 15 * sizeof (int) - 2 * sizeof (void *) ]
# XX
toks = []
while lexer.tok != ']':
#self.append( lexer.kind )
toks.append( lexer.tok )
lexer.get_token()
child = " ".join(toks)
if child == "":
child = None
self.append( child )
lexer.get_token() # read past the ']'
class Tag(Node):
"""
"""
pass
class Compound(Node,Parser):
"Struct or Union"
def __init__(self,*items,**kw):
Node.__init__(self,*items,**kw)
def parse(self,lexer,symbols):
symbols = Symbols(symbols)
tag = "" # anonymous
if lexer.tok != '{':
tag = lexer.tok
if not ( tag[0]=='_' or tag[0].isalpha() ):
self.parse_error(lexer ,"expected tag, got '%s'"%tag )
lexer.get_token()
if tag:
self.append(Tag(tag))
else:
self.append(Tag())
self.tag = tag
if lexer.tok == '{':
fieldlist = []
lexer.get_token()
if lexer.tok != '}':
if not lexer.tok: self.parse_error(lexer)
while lexer.tok != '}':
node = StructDeclaration()
node.parse(lexer,symbols)
fieldlist.append( node )
self += fieldlist
lexer.get_token()
if self.verbose:
print "%s.__init__() #<--"%(self)
class Struct(Compound):
"""
"""
pass
class Union(Compound):
"""
"""
pass
class Enum(Node,Parser):
"""
"""
def __init__(self,*items,**kw):
Node.__init__(self,*items,**kw)
def parse(self,lexer,symbols):
tag = "" # anonymous
if lexer.tok != '{':
tag = lexer.tok
if not ( tag[0]=='_' or tag[0].isalpha() ):
self.parse_error(lexer ,"expected tag, got '%s'"%tag )
lexer.get_token()
if tag:
self.append(Tag(tag))
else:
self.append(Tag())
self.tag = tag
if lexer.tok == '{':
lexer.get_token()
if lexer.tok != '}': # XX dopey control flow
if not lexer.tok: # XX dopey control flow
self.parse_error(lexer) # XX dopey control flow
while lexer.tok != '}': # XX dopey control flow
if lexer.kind is not None:
self.expected_error(lexer ,"identifier" )
ident = Identifier(lexer.tok)
if symbols[ident[0]] is not None:
self.parse_error(lexer,"%s already defined."%ident[0])
symbols[ident[0]]=ident
self.append( ident )
lexer.get_token()
if lexer.tok == '=':
lexer.get_token()
# ConstantExpr
# XX hack hack XX
while lexer.tok!=',' and lexer.tok!='}':
lexer.get_token()
# if type( lexer.kind ) is not int:
# #self.parse_error(lexer ,"expected integer" )
# # XX hack hack XX
# while lexer.tok!=',' and lexer.tok!='}':
# lexer.get_token()
# else:
# # put initializer into the Identifier
# ident.append( lexer.kind )
# lexer.get_token()
if lexer.tok != '}':
if lexer.tok != ',':
self.expected_error(lexer,"}",",")
lexer.get_token() # ','
lexer.get_token()
if self.verbose:
print "%s.__init__() #<--"%(self)
class Declarator(Node,Parser):
"""
"""
def __init__(self,*items):
Node.__init__(self,*items)
self.ident = None
def parse(self,lexer,symbols):
#Parser.parse_enter(self,lexer)
stack = []
# read up to identifier, pushing tokens onto stack
self.ident = self.parse_identifier(lexer,symbols,stack)
self.name = ''
if self.ident is not None:
self.append( self.ident )
self.name = self.ident.name
# now read outwards from identifier
self.parse_declarator(lexer,symbols,stack)
#Parser.parse_leave(self,lexer)
def parse_identifier(self,lexer,symbols,stack):
if self.verbose:
print "%s.parse_identifier()"%self
ident = None
if lexer.tok != ';':
while lexer.tok and lexer.kind is not None:
stack.append( (lexer.tok, lexer.kind) )
lexer.get_token()
if lexer.tok:
ident = Identifier( lexer.tok )
#stack.append( (ident.name, ident) )
lexer.get_token()
if self.verbose:
print "%s.parse_identifier()=%s"%(self,repr(ident))
return ident
def parse_declarator(self,lexer,symbols,stack,level=0):
if self.verbose:
print " "*level+"%s.parse_declarator(%s) # --->"%\
(self,stack)
if lexer.tok == '[':
while lexer.tok == '[':
node = Array()
node.parse(lexer,symbols)
self.append(node)
if lexer.tok == '(':
self.parse_error(lexer ,"array of functions" )
elif lexer.tok == '(':
lexer.get_token()
node = Function()
node.parse(lexer,symbols)
self.append( node )
if lexer.tok == '(':
self.parse_error(lexer ,"function returns a function" )
if lexer.tok == '[':
self.parse_error(lexer ,"function returns an array" )
while stack:
tok, kind = stack[-1] # peek
if tok == '(':
stack.pop()
self.consume(lexer,')')
self.parse_declarator(lexer,symbols,stack,level+1)
elif tok == '*':
stack.pop()
self.append( Pointer() )
else:
tok, kind = stack.pop()
self.append( kind )
if self.verbose:
print " "*level+"%s.parse_declarator(%s) # <---"%\
(self,stack)
class AbstractDeclarator(Declarator):
""" used in ParameterDeclaration; may lack an identifier """
def parse_identifier(self,lexer,symbols,stack):
if self.verbose:
print "%s.parse_identifier()"%self
ident = None
ident = Identifier()
while 1:
if lexer.tok == ';':
self.parse_error(lexer)
if lexer.tok == ')':
break
if lexer.tok == ',':
break
if lexer.tok == '[':
break
if lexer.kind is None:
#print "%s.new identifier"%self
ident = Identifier( lexer.tok )
lexer.get_token()
#stack.append( (ident.name, ident) )
break
stack.append( (lexer.tok, lexer.kind) )
lexer.get_token()
if self.verbose:
print "%s.parse_identifier()=%s"%(self,repr(ident))
return ident
class FieldLength(Node):
"""
"""
pass
class StructDeclarator(Declarator):
"""
"""
def parse(self,lexer,symbols):
if lexer.tok != ':':
Declarator.parse(self,lexer,symbols)
if lexer.tok == ':':
lexer.get_token()
# ConstantExpr
length = int(lexer.tok)
#print "length = ",length
self.append( FieldLength(length) )
lexer.get_token()
class DeclarationSpecifiers(Node,Parser):
"""
"""
def __init__(self,*items):
Node.__init__(self,*items)
def __eq__(self,other):
" unordered (set/bag) equality "
if not isinstance(other,Node):
return 0
for i in range(len(self)):
if not self[i] in other:
return 0
for i in range(len(other)):
if not other[i] in self:
return 0
return 1
def parse(self,lexer,symbols):
self.parse_spec(lexer,symbols)
self.reverse()
def parse_spec(self,lexer,symbols):
typespec = None
while lexer.tok:
if isinstance( lexer.kind, TypeAlias ) or\
isinstance( lexer.kind, BasicType ):
if typespec is not None:
self.parse_error(lexer ,"type already specified as %s"\
%typespec )
typespec=lexer.kind
self.append( lexer.kind )
lexer.get_token()
elif isinstance( lexer.kind, Qualifier ):
self.append( lexer.kind )
lexer.get_token()
elif isinstance( lexer.kind, StorageClass ):
self.append( lexer.kind )
lexer.get_token()
elif lexer.tok=='struct':
lexer.get_token()
self.parse_struct(lexer,symbols)
break #?
elif lexer.tok=='union':
lexer.get_token()
self.parse_union(lexer,symbols)
break #?
elif lexer.tok=='enum':
lexer.get_token()
self.parse_enum(lexer,symbols)
break #?
elif lexer.kind is None:
# identifier
break
else:
break
def parse_struct(self,lexer,symbols):
if self.verbose:
print "%s.parse_struct()"%(self)
node = Struct()
node.parse(lexer,symbols)
_node = None
if node.tag:
_node = symbols.get_tag( node.tag )
if _node is not None:
if not isinstance( _node, Struct ):
self.parse_error(lexer,"tag defined as wrong kind")
if len(node)>1:
if len(_node)>1:
self.parse_error(lexer,"tag already defined as %s"%_node)
#symbols.set_tag( node.tag, node )
#else:
# refer to the previously defined struct
##node = _node
#node = _node.clone()
if 0:
# refer to the previously defined struct
if len(node)==1:
_node = symbols.deep_get_tag( node.tag )
if _node is not None:
node=_node
# But what about any future reference to the struct ?
if node.tag:
symbols.set_tag( node.tag, node )
self.append( node )
def parse_union(self,lexer,symbols):
if self.verbose:
print "%s.parse_union(%s)"%(self,node)
node = Union()
node.parse(lexer,symbols)
_node = None
if node.tag:
_node = symbols.get_tag( node.tag )
if _node is not None:
if not isinstance( _node, Union ):
self.parse_error(lexer,"tag %s defined as wrong kind"%repr(node.tag))
if len(node)>1:
if len(_node)>1:
self.parse_error(lexer,"tag already defined as %s"%_node)
#symbols.set_tag( node.tag, node )
#else:
#node = _node
#if len(node)==1:
#_node = symbols.deep_get_tag( node.tag )
#if _node is not None:
#node=_node
if node.tag:
symbols.set_tag( node.tag, node )
self.append( node )
def parse_enum(self,lexer,symbols):
if self.verbose:
print "%s.parse_enum(%s)"%(self,node)
node = Enum()
node.parse(lexer,symbols)
_node = None
if node.tag:
_node = symbols.get_tag( node.tag )
if _node is not None:
if not isinstance( _node, Enum ):
self.parse_error(lexer,"tag defined as wrong kind")
if len(node)>1:
if len(_node)>1:
self.parse_error(lexer,"tag already defined as %s"%_node)
#symbols.set_tag( node.tag, node )
#else:
#node = _node
#if len(node)==1:
#_node = symbols.deep_get_tag( node.tag )
#if _node is not None:
#node=_node
if node.tag:
symbols.set_tag( node.tag, node )
self.append( node )
def is_typedef(self):
return self.find(Typedef) is not None
def needs_declarator(self):
for node in self:
if isinstance( node, Struct ):
return False
if isinstance( node, Enum ):
return False
if isinstance( node, Union ):
return False
return True
class TypeSpecifiers(DeclarationSpecifiers):
" used in ParameterDeclaration "
def parse_spec(self,lexer,symbols):
typespec = None
while lexer.tok:
if isinstance( lexer.kind, TypeAlias ) or\
isinstance( lexer.kind, BasicType ):
if typespec is not None:
self.parse_error(lexer ,"type already specified as %s"\
%typespec )
typespec=lexer.kind
self.append( lexer.kind )
lexer.get_token()
elif isinstance( lexer.kind, Qualifier ):
self.append( lexer.kind )
lexer.get_token()
elif isinstance( lexer.kind, StorageClass ):
self.parse_error(lexer ,"'%s' cannot appear here"%lexer.tok )
elif lexer.tok=='struct':
lexer.get_token()
self.parse_struct(lexer,symbols)
break #?
elif lexer.tok=='union':
lexer.get_token()
self.parse_union(lexer,symbols)
break #?
elif lexer.tok=='enum':
lexer.get_token()
self.parse_enum(lexer,symbols)
break #?
elif lexer.kind is None:
# identifier
break
else:
break
class Initializer(Node,Parser):
"""
"""
def __init__(self,*items):
Node.__init__(self,*items)
def parse(self,lexer,symbols):
self.parse_error(lexer,"not implemented")
class TypeAlias(Node):
" typedefed things "
def __init__(self,name,decl=None):
Node.__init__(self,name)#,decl)
self.name=name
self.decl=decl
class Declaration(Node,Parser):
"""
"""
def __init__(self,*items):
Node.__init__(self,*items)
#self.acted=False
def parse(self,lexer,symbols):
if not lexer.tok:
return
Parser.parse_enter(self,lexer)
declspec = DeclarationSpecifiers()
declspec.parse(lexer,symbols)
if len(declspec)==0:
if lexer.tok == ';':
lexer.get_token()
# empty declaration...
return
self.parse_error(lexer,
"expected specifiers, got '%s'"%lexer.tok )
self.append(declspec)
while 1:
decl = Declarator()
decl.parse(lexer,symbols)
if len(decl)==0:
if declspec.needs_declarator():
self.parse_error(lexer,
"expected declarator, got '%s'"%lexer.tok )
self.append(decl)
ident = decl.ident
if ident is not None:
#if len(ident):
# install symbol
node = symbols[ident[0]]
if node is not None:
# we allow functions to be defined (as same) again
#print node.deepstr(),'\n', self.deepstr()
_node = node.clone()
_node.delete(Identifier)
_self = self.clone()
_self.delete(Identifier)
if _node != _self:
self.parse_error(lexer,
"\n%s\n already defined as \n%s\n"%\
(self.deepstr(),node.deepstr()))
else:
if self.is_typedef():
#lexer.mktypedef( ident[0], self )
tp = TypeAlias(ident[0],decl)
lexer.mktypedef( ident[0], tp )
else:
symbols[ident[0]] = self
if lexer.tok == '=':
# parse initializer
lexer.get_token()
init = Initializer()
init.parse(lexer,symbols)
ident.append( init ) # as in Enum
#else: struct, union or enum
if lexer.tok == ';':
# no more declarators
break
if lexer.tok == '{':
# ! ahhh, function body !!!
# sys.stderr.write(
# "WARNING: function body found at line %s\n"%lexer.lno )
bcount = 1
while bcount:
lexer.get_brace_token()
if lexer.tok == '}':
bcount -= 1
if lexer.tok == '{':
bcount += 1
lexer.get_token()
Parser.parse_leave(self,lexer)
return
self.consume(lexer,',')
self.consume(lexer,';')
Parser.parse_leave(self,lexer)
def is_typedef(self):
spec=self[0]
assert isinstance(spec,DeclarationSpecifiers), self.deepstr()
return spec.is_typedef()
class ParameterDeclaration(Declaration):
"""
"""
def parse(self,lexer,symbols):
typespec = TypeSpecifiers()
typespec.parse(lexer,symbols)
self.append(typespec)
decl = AbstractDeclarator()
decl.parse(lexer,symbols)
self.append(decl)
ident = decl.ident
if ident is not None and ident[0]:
node = symbols[ident[0]]
if node is not None:
self.parse_error(lexer,
"%s already defined as %s"%(ident,node))
else:
symbols[ident[0]] = self
class StructDeclaration(Declaration):
"""
"""
def parse(self,lexer,symbols):
if not lexer.tok:
return
declspec = DeclarationSpecifiers()
declspec.parse(lexer,symbols)
self.append(declspec)
if len(declspec)==0:
if lexer.tok == ';':
lexer.get_token()
# empty declaration...
return
self.parse_error(lexer,
"expected specifiers, got '%s'"%lexer.tok )
while 1:
decl = StructDeclarator()
decl.parse(lexer,symbols)
if len(decl)==0:
self.parse_error(lexer,
"expected declarator, got '%s'"%lexer.tok )
self.append(decl)
ident = decl.ident
if ident is not None:
node = symbols[ident[0]]
if node is not None:
self.parse_error(lexer ,
"%s already defined as %s"%(ident,node))
else:
if declspec.is_typedef():
self.parse_error(lexer,"typedef in struct or union")
else:
symbols[ident[0]] = self
if lexer.tok == ';':
break
self.consume(lexer,',')
self.consume(lexer,';')
class TransUnit(Node,Parser):
"""
"""
def __init__(self,*items,**kw):
Node.__init__(self,*items,**kw)
def parse(self,s,verbose=0):
self.symbols = Symbols()
self.lexer = Lexer(s,verbose=verbose) #,host=__module__)
node = None
while self.lexer.tok:
node=Declaration()
node.parse(self.lexer,self.symbols)
#sys.stderr.write( "# line %s\n"%self.lexer.lno )
if node:
self.append(node)
#node.psource()
#print node.deepstr(),'\n'
#node.act()
def strip(self,files):
" leave only the declarations from <files> "
i=0
while i<len(self):
if self[i].file in files:
i=i+1
else:
self.pop(i)
def strip_filter(self,cb):
" leave only the declarations such that cb(file) "
i=0
while i<len(self):
if cb(self[i].file):
i=i+1
else:
self.pop(i)
def assert_no_dups(self):
check={}
for node in self.nodes():
assert not check.has_key(id(node))
check[id(node)]=1
try:
import NoModule
import psyco
from psyco.classes import *
except ImportError:
class _psyco:
def jit(self): pass
def bind(self, f): pass
def proxy(self, f): return f
psyco = _psyco()
psyco.bind( Lexer.get_token )
psyco.bind( Node )
def run0():
verbose = 0
if not sys.argv[1:]:
s = sys.stdin.read()
if sys.argv[1:]:
s = sys.argv[1]
#if sys.argv[2:]:
#verbose = int(sys.argv[2])
if 0:
import profile
profile.run('TransUnit(s)','prof.out')
import pstats
p=pstats.Stats('prof.out')
p.strip_dirs().sort_stats(-1).print_stats()
else:
node = TransUnit(verbose = 1 )
node.parse(s)
node.act(1,1,1)
def run1():
cstr = "char *(*)() ,"
node = AbstractDeclarator()
node.parse( Lexer(cstr,True), Symbols() )
print node.deepstr()
if __name__=="__main__":
pass