| # parser for Unix yacc-based grammars |
| # |
| # Author: David Beazley (dave@dabeaz.com) |
| # Date : October 2, 2006 |
| |
| import ylex |
| tokens = ylex.tokens |
| |
| from ply import * |
| |
| tokenlist = [] |
| preclist = [] |
| |
| emit_code = 1 |
| |
| def p_yacc(p): |
| '''yacc : defsection rulesection''' |
| |
| def p_defsection(p): |
| '''defsection : definitions SECTION |
| | SECTION''' |
| p.lexer.lastsection = 1 |
| print "tokens = ", repr(tokenlist) |
| print |
| print "precedence = ", repr(preclist) |
| print |
| print "# -------------- RULES ----------------" |
| print |
| |
| def p_rulesection(p): |
| '''rulesection : rules SECTION''' |
| |
| print "# -------------- RULES END ----------------" |
| print_code(p[2],0) |
| |
| def p_definitions(p): |
| '''definitions : definitions definition |
| | definition''' |
| |
| def p_definition_literal(p): |
| '''definition : LITERAL''' |
| print_code(p[1],0) |
| |
| def p_definition_start(p): |
| '''definition : START ID''' |
| print "start = '%s'" % p[2] |
| |
| def p_definition_token(p): |
| '''definition : toktype opttype idlist optsemi ''' |
| for i in p[3]: |
| if i[0] not in "'\"": |
| tokenlist.append(i) |
| if p[1] == '%left': |
| preclist.append(('left',) + tuple(p[3])) |
| elif p[1] == '%right': |
| preclist.append(('right',) + tuple(p[3])) |
| elif p[1] == '%nonassoc': |
| preclist.append(('nonassoc',)+ tuple(p[3])) |
| |
| def p_toktype(p): |
| '''toktype : TOKEN |
| | LEFT |
| | RIGHT |
| | NONASSOC''' |
| p[0] = p[1] |
| |
| def p_opttype(p): |
| '''opttype : '<' ID '>' |
| | empty''' |
| |
| def p_idlist(p): |
| '''idlist : idlist optcomma tokenid |
| | tokenid''' |
| if len(p) == 2: |
| p[0] = [p[1]] |
| else: |
| p[0] = p[1] |
| p[1].append(p[3]) |
| |
| def p_tokenid(p): |
| '''tokenid : ID |
| | ID NUMBER |
| | QLITERAL |
| | QLITERAL NUMBER''' |
| p[0] = p[1] |
| |
| def p_optsemi(p): |
| '''optsemi : ';' |
| | empty''' |
| |
| def p_optcomma(p): |
| '''optcomma : ',' |
| | empty''' |
| |
| def p_definition_type(p): |
| '''definition : TYPE '<' ID '>' namelist optsemi''' |
| # type declarations are ignored |
| |
| def p_namelist(p): |
| '''namelist : namelist optcomma ID |
| | ID''' |
| |
| def p_definition_union(p): |
| '''definition : UNION CODE optsemi''' |
| # Union declarations are ignored |
| |
| def p_rules(p): |
| '''rules : rules rule |
| | rule''' |
| if len(p) == 2: |
| rule = p[1] |
| else: |
| rule = p[2] |
| |
| # Print out a Python equivalent of this rule |
| |
| embedded = [ ] # Embedded actions (a mess) |
| embed_count = 0 |
| |
| rulename = rule[0] |
| rulecount = 1 |
| for r in rule[1]: |
| # r contains one of the rule possibilities |
| print "def p_%s_%d(p):" % (rulename,rulecount) |
| prod = [] |
| prodcode = "" |
| for i in range(len(r)): |
| item = r[i] |
| if item[0] == '{': # A code block |
| if i == len(r) - 1: |
| prodcode = item |
| break |
| else: |
| # an embedded action |
| embed_name = "_embed%d_%s" % (embed_count,rulename) |
| prod.append(embed_name) |
| embedded.append((embed_name,item)) |
| embed_count += 1 |
| else: |
| prod.append(item) |
| print " '''%s : %s'''" % (rulename, " ".join(prod)) |
| # Emit code |
| print_code(prodcode,4) |
| print |
| rulecount += 1 |
| |
| for e,code in embedded: |
| print "def p_%s(p):" % e |
| print " '''%s : '''" % e |
| print_code(code,4) |
| print |
| |
| def p_rule(p): |
| '''rule : ID ':' rulelist ';' ''' |
| p[0] = (p[1],[p[3]]) |
| |
| def p_rule2(p): |
| '''rule : ID ':' rulelist morerules ';' ''' |
| p[4].insert(0,p[3]) |
| p[0] = (p[1],p[4]) |
| |
| def p_rule_empty(p): |
| '''rule : ID ':' ';' ''' |
| p[0] = (p[1],[[]]) |
| |
| def p_rule_empty2(p): |
| '''rule : ID ':' morerules ';' ''' |
| |
| p[3].insert(0,[]) |
| p[0] = (p[1],p[3]) |
| |
| def p_morerules(p): |
| '''morerules : morerules '|' rulelist |
| | '|' rulelist |
| | '|' ''' |
| |
| if len(p) == 2: |
| p[0] = [[]] |
| elif len(p) == 3: |
| p[0] = [p[2]] |
| else: |
| p[0] = p[1] |
| p[0].append(p[3]) |
| |
| # print "morerules", len(p), p[0] |
| |
| def p_rulelist(p): |
| '''rulelist : rulelist ruleitem |
| | ruleitem''' |
| |
| if len(p) == 2: |
| p[0] = [p[1]] |
| else: |
| p[0] = p[1] |
| p[1].append(p[2]) |
| |
| def p_ruleitem(p): |
| '''ruleitem : ID |
| | QLITERAL |
| | CODE |
| | PREC''' |
| p[0] = p[1] |
| |
| def p_empty(p): |
| '''empty : ''' |
| |
| def p_error(p): |
| pass |
| |
| yacc.yacc(debug=0) |
| |
| def print_code(code,indent): |
| if not emit_code: return |
| codelines = code.splitlines() |
| for c in codelines: |
| print "%s# %s" % (" "*indent,c) |
| |