blob: ab5b884514ca1e6733ebd80701ff8a816b795ab2 [file] [log] [blame]
# parser for Unix yacc-based grammars
#
# Author: David Beazley (dave@dabeaz.com)
# Date : October 2, 2006
import ylex
tokens = ylex.tokens
from ply import *
tokenlist = []
preclist = []
emit_code = 1
def p_yacc(p):
'''yacc : defsection rulesection'''
def p_defsection(p):
'''defsection : definitions SECTION
| SECTION'''
p.lexer.lastsection = 1
print "tokens = ", repr(tokenlist)
print
print "precedence = ", repr(preclist)
print
print "# -------------- RULES ----------------"
print
def p_rulesection(p):
'''rulesection : rules SECTION'''
print "# -------------- RULES END ----------------"
print_code(p[2],0)
def p_definitions(p):
'''definitions : definitions definition
| definition'''
def p_definition_literal(p):
'''definition : LITERAL'''
print_code(p[1],0)
def p_definition_start(p):
'''definition : START ID'''
print "start = '%s'" % p[2]
def p_definition_token(p):
'''definition : toktype opttype idlist optsemi '''
for i in p[3]:
if i[0] not in "'\"":
tokenlist.append(i)
if p[1] == '%left':
preclist.append(('left',) + tuple(p[3]))
elif p[1] == '%right':
preclist.append(('right',) + tuple(p[3]))
elif p[1] == '%nonassoc':
preclist.append(('nonassoc',)+ tuple(p[3]))
def p_toktype(p):
'''toktype : TOKEN
| LEFT
| RIGHT
| NONASSOC'''
p[0] = p[1]
def p_opttype(p):
'''opttype : '<' ID '>'
| empty'''
def p_idlist(p):
'''idlist : idlist optcomma tokenid
| tokenid'''
if len(p) == 2:
p[0] = [p[1]]
else:
p[0] = p[1]
p[1].append(p[3])
def p_tokenid(p):
'''tokenid : ID
| ID NUMBER
| QLITERAL
| QLITERAL NUMBER'''
p[0] = p[1]
def p_optsemi(p):
'''optsemi : ';'
| empty'''
def p_optcomma(p):
'''optcomma : ','
| empty'''
def p_definition_type(p):
'''definition : TYPE '<' ID '>' namelist optsemi'''
# type declarations are ignored
def p_namelist(p):
'''namelist : namelist optcomma ID
| ID'''
def p_definition_union(p):
'''definition : UNION CODE optsemi'''
# Union declarations are ignored
def p_rules(p):
'''rules : rules rule
| rule'''
if len(p) == 2:
rule = p[1]
else:
rule = p[2]
# Print out a Python equivalent of this rule
embedded = [ ] # Embedded actions (a mess)
embed_count = 0
rulename = rule[0]
rulecount = 1
for r in rule[1]:
# r contains one of the rule possibilities
print "def p_%s_%d(p):" % (rulename,rulecount)
prod = []
prodcode = ""
for i in range(len(r)):
item = r[i]
if item[0] == '{': # A code block
if i == len(r) - 1:
prodcode = item
break
else:
# an embedded action
embed_name = "_embed%d_%s" % (embed_count,rulename)
prod.append(embed_name)
embedded.append((embed_name,item))
embed_count += 1
else:
prod.append(item)
print " '''%s : %s'''" % (rulename, " ".join(prod))
# Emit code
print_code(prodcode,4)
print
rulecount += 1
for e,code in embedded:
print "def p_%s(p):" % e
print " '''%s : '''" % e
print_code(code,4)
print
def p_rule(p):
'''rule : ID ':' rulelist ';' '''
p[0] = (p[1],[p[3]])
def p_rule2(p):
'''rule : ID ':' rulelist morerules ';' '''
p[4].insert(0,p[3])
p[0] = (p[1],p[4])
def p_rule_empty(p):
'''rule : ID ':' ';' '''
p[0] = (p[1],[[]])
def p_rule_empty2(p):
'''rule : ID ':' morerules ';' '''
p[3].insert(0,[])
p[0] = (p[1],p[3])
def p_morerules(p):
'''morerules : morerules '|' rulelist
| '|' rulelist
| '|' '''
if len(p) == 2:
p[0] = [[]]
elif len(p) == 3:
p[0] = [p[2]]
else:
p[0] = p[1]
p[0].append(p[3])
# print "morerules", len(p), p[0]
def p_rulelist(p):
'''rulelist : rulelist ruleitem
| ruleitem'''
if len(p) == 2:
p[0] = [p[1]]
else:
p[0] = p[1]
p[1].append(p[2])
def p_ruleitem(p):
'''ruleitem : ID
| QLITERAL
| CODE
| PREC'''
p[0] = p[1]
def p_empty(p):
'''empty : '''
def p_error(p):
pass
yacc.yacc(debug=0)
def print_code(code,indent):
if not emit_code: return
codelines = code.splitlines()
for c in codelines:
print "%s# %s" % (" "*indent,c)