| # Copyright (c) 2003-2005 The Regents of The University of Michigan |
| # All rights reserved. |
| # |
| # Redistribution and use in source and binary forms, with or without |
| # modification, are permitted provided that the following conditions are |
| # met: redistributions of source code must retain the above copyright |
| # notice, this list of conditions and the following disclaimer; |
| # redistributions in binary form must reproduce the above copyright |
| # notice, this list of conditions and the following disclaimer in the |
| # documentation and/or other materials provided with the distribution; |
| # neither the name of the copyright holders nor the names of its |
| # contributors may be used to endorse or promote products derived from |
| # this software without specific prior written permission. |
| # |
| # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| # |
| # Authors: Gabe Black |
| |
| from __future__ import print_function |
| |
| import os |
| import sys |
| import re |
| import string |
| import traceback |
| # get type names |
| from types import * |
| |
| from ply import lex |
| from ply import yacc |
| |
| ########################################################################## |
| # |
| # Base classes for use outside of the assembler |
| # |
| ########################################################################## |
| |
| class Micro_Container(object): |
| def __init__(self, name): |
| self.microops = [] |
| self.name = name |
| self.directives = {} |
| self.micro_classes = {} |
| self.labels = {} |
| |
| def add_microop(self, mnemonic, microop): |
| self.microops.append(microop) |
| |
| def __str__(self): |
| string = "%s:\n" % self.name |
| for microop in self.microops: |
| string += " %s\n" % microop |
| return string |
| |
| class Combinational_Macroop(Micro_Container): |
| pass |
| |
| class Rom_Macroop(object): |
| def __init__(self, name, target): |
| self.name = name |
| self.target = target |
| |
| def __str__(self): |
| return "%s: %s\n" % (self.name, self.target) |
| |
| class Rom(Micro_Container): |
| def __init__(self, name): |
| super(Rom, self).__init__(name) |
| self.externs = {} |
| |
| ########################################################################## |
| # |
| # Support classes |
| # |
| ########################################################################## |
| |
| class Label(object): |
| def __init__(self): |
| self.extern = False |
| self.name = "" |
| |
| class Block(object): |
| def __init__(self): |
| self.statements = [] |
| |
| class Statement(object): |
| def __init__(self): |
| self.is_microop = False |
| self.is_directive = False |
| self.params = "" |
| |
| class Microop(Statement): |
| def __init__(self): |
| super(Microop, self).__init__() |
| self.mnemonic = "" |
| self.labels = [] |
| self.is_microop = True |
| |
| class Directive(Statement): |
| def __init__(self): |
| super(Directive, self).__init__() |
| self.name = "" |
| self.is_directive = True |
| |
| ########################################################################## |
| # |
| # Functions that handle common tasks |
| # |
| ########################################################################## |
| |
| def print_error(message): |
| print() |
| print("*** %s" % message) |
| print() |
| |
| def handle_statement(parser, container, statement): |
| if statement.is_microop: |
| if statement.mnemonic not in parser.microops.keys(): |
| raise Exception, "Unrecognized mnemonic: %s" % statement.mnemonic |
| parser.symbols["__microopClassFromInsideTheAssembler"] = \ |
| parser.microops[statement.mnemonic] |
| try: |
| microop = eval('__microopClassFromInsideTheAssembler(%s)' % |
| statement.params, {}, parser.symbols) |
| except: |
| print_error("Error creating microop object with mnemonic %s." % \ |
| statement.mnemonic) |
| raise |
| try: |
| for label in statement.labels: |
| container.labels[label.text] = microop |
| if label.is_extern: |
| container.externs[label.text] = microop |
| container.add_microop(statement.mnemonic, microop) |
| except: |
| print_error("Error adding microop.") |
| raise |
| elif statement.is_directive: |
| if statement.name not in container.directives.keys(): |
| raise Exception, "Unrecognized directive: %s" % statement.name |
| parser.symbols["__directiveFunctionFromInsideTheAssembler"] = \ |
| container.directives[statement.name] |
| try: |
| eval('__directiveFunctionFromInsideTheAssembler(%s)' % |
| statement.params, {}, parser.symbols) |
| except: |
| print_error("Error executing directive.") |
| print(container.directives) |
| raise |
| else: |
| raise Exception, "Didn't recognize the type of statement", statement |
| |
| ########################################################################## |
| # |
| # Lexer specification |
| # |
| ########################################################################## |
| |
| # Error handler. Just call exit. Output formatted to work under |
| # Emacs compile-mode. Optional 'print_traceback' arg, if set to True, |
| # prints a Python stack backtrace too (can be handy when trying to |
| # debug the parser itself). |
| def error(lineno, string, print_traceback = False): |
| # Print a Python stack backtrace if requested. |
| if (print_traceback): |
| traceback.print_exc() |
| if lineno != 0: |
| line_str = "%d:" % lineno |
| else: |
| line_str = "" |
| sys.exit("%s %s" % (line_str, string)) |
| |
| reserved = ('DEF', 'MACROOP', 'ROM', 'EXTERN') |
| |
| tokens = reserved + ( |
| # identifier |
| 'ID', |
| # arguments for microops and directives |
| 'PARAMS', |
| |
| 'LPAREN', 'RPAREN', |
| 'LBRACE', 'RBRACE', |
| 'COLON', 'SEMI', 'DOT', |
| 'NEWLINE' |
| ) |
| |
| # New lines are ignored at the top level, but they end statements in the |
| # assembler |
| states = ( |
| ('asm', 'exclusive'), |
| ('params', 'exclusive'), |
| ) |
| |
| reserved_map = { } |
| for r in reserved: |
| reserved_map[r.lower()] = r |
| |
| # Ignore comments |
| def t_ANY_COMMENT(t): |
| r'\#[^\n]*(?=\n)' |
| |
| def t_ANY_MULTILINECOMMENT(t): |
| r'/\*([^/]|((?<!\*)/))*\*/' |
| |
| # A colon marks the end of a label. It should follow an ID which will |
| # put the lexer in the "params" state. Seeing the colon will put it back |
| # in the "asm" state since it knows it saw a label and not a mnemonic. |
| def t_params_COLON(t): |
| r':' |
| t.lexer.begin('asm') |
| return t |
| |
| # Parameters are a string of text which don't contain an unescaped statement |
| # statement terminator, ie a newline or semi colon. |
| def t_params_PARAMS(t): |
| r'([^\n;\\]|(\\[\n;\\]))+' |
| t.lineno += t.value.count('\n') |
| unescapeParamsRE = re.compile(r'(\\[\n;\\])') |
| def unescapeParams(mo): |
| val = mo.group(0) |
| return val[1] |
| t.value = unescapeParamsRE.sub(unescapeParams, t.value) |
| t.lexer.begin('asm') |
| return t |
| |
| # An "ID" in the micro assembler is either a label, directive, or mnemonic |
| # If it's either a directive or a mnemonic, it will be optionally followed by |
| # parameters. If it's a label, the following colon will make the lexer stop |
| # looking for parameters. |
| def t_asm_ID(t): |
| r'[A-Za-z_]\w*' |
| t.type = reserved_map.get(t.value, 'ID') |
| # If the ID is really "extern", we shouldn't start looking for parameters |
| # yet. The real ID, the label itself, is coming up. |
| if t.type != 'EXTERN': |
| t.lexer.begin('params') |
| return t |
| |
| # If there is a label and you're -not- in the assembler (which would be caught |
| # above), don't start looking for parameters. |
| def t_ANY_ID(t): |
| r'[A-Za-z_]\w*' |
| t.type = reserved_map.get(t.value, 'ID') |
| return t |
| |
| # Braces enter and exit micro assembly |
| def t_INITIAL_LBRACE(t): |
| r'\{' |
| t.lexer.begin('asm') |
| return t |
| |
| def t_asm_RBRACE(t): |
| r'\}' |
| t.lexer.begin('INITIAL') |
| return t |
| |
| # At the top level, keep track of newlines only for line counting. |
| def t_INITIAL_NEWLINE(t): |
| r'\n+' |
| t.lineno += t.value.count('\n') |
| |
| # In the micro assembler, do line counting but also return a token. The |
| # token is needed by the parser to detect the end of a statement. |
| def t_asm_NEWLINE(t): |
| r'\n+' |
| t.lineno += t.value.count('\n') |
| return t |
| |
| # A newline or semi colon when looking for params signals that the statement |
| # is over and the lexer should go back to looking for regular assembly. |
| def t_params_NEWLINE(t): |
| r'\n+' |
| t.lineno += t.value.count('\n') |
| t.lexer.begin('asm') |
| return t |
| |
| def t_params_SEMI(t): |
| r';' |
| t.lexer.begin('asm') |
| return t |
| |
| # Basic regular expressions to pick out simple tokens |
| t_ANY_LPAREN = r'\(' |
| t_ANY_RPAREN = r'\)' |
| t_ANY_SEMI = r';' |
| t_ANY_DOT = r'\.' |
| |
| t_ANY_ignore = ' \t\x0c' |
| |
| def t_ANY_error(t): |
| error(t.lineno, "illegal character '%s'" % t.value[0]) |
| t.skip(1) |
| |
| ########################################################################## |
| # |
| # Parser specification |
| # |
| ########################################################################## |
| |
| # Start symbol for a file which may have more than one macroop or rom |
| # specification. |
| def p_file(t): |
| 'file : opt_rom_or_macros' |
| |
| def p_opt_rom_or_macros_0(t): |
| 'opt_rom_or_macros : ' |
| |
| def p_opt_rom_or_macros_1(t): |
| 'opt_rom_or_macros : rom_or_macros' |
| |
| def p_rom_or_macros_0(t): |
| 'rom_or_macros : rom_or_macro' |
| |
| def p_rom_or_macros_1(t): |
| 'rom_or_macros : rom_or_macros rom_or_macro' |
| |
| def p_rom_or_macro_0(t): |
| '''rom_or_macro : rom_block |
| | macroop_def''' |
| |
| # Defines a section of microcode that should go in the current ROM |
| def p_rom_block(t): |
| 'rom_block : DEF ROM block SEMI' |
| if not t.parser.rom: |
| print_error("Rom block found, but no Rom object specified.") |
| raise TypeError, "Rom block found, but no Rom object was specified." |
| for statement in t[3].statements: |
| handle_statement(t.parser, t.parser.rom, statement) |
| t[0] = t.parser.rom |
| |
| # Defines a macroop that jumps to an external label in the ROM |
| def p_macroop_def_0(t): |
| 'macroop_def : DEF MACROOP ID LPAREN ID RPAREN SEMI' |
| if not t.parser.rom_macroop_type: |
| print_error("ROM based macroop found, but no ROM macroop class was specified.") |
| raise TypeError, "ROM based macroop found, but no ROM macroop class was specified." |
| macroop = t.parser.rom_macroop_type(t[3], t[5]) |
| t.parser.macroops[t[3]] = macroop |
| |
| |
| # Defines a macroop that is combinationally generated |
| def p_macroop_def_1(t): |
| 'macroop_def : DEF MACROOP ID block SEMI' |
| try: |
| curop = t.parser.macro_type(t[3]) |
| except TypeError: |
| print_error("Error creating macroop object.") |
| raise |
| for statement in t[4].statements: |
| handle_statement(t.parser, curop, statement) |
| t.parser.macroops[t[3]] = curop |
| |
| # A block of statements |
| def p_block(t): |
| 'block : LBRACE statements RBRACE' |
| block = Block() |
| block.statements = t[2] |
| t[0] = block |
| |
| def p_statements_0(t): |
| 'statements : statement' |
| if t[1]: |
| t[0] = [t[1]] |
| else: |
| t[0] = [] |
| |
| def p_statements_1(t): |
| 'statements : statements statement' |
| if t[2]: |
| t[1].append(t[2]) |
| t[0] = t[1] |
| |
| def p_statement(t): |
| 'statement : content_of_statement end_of_statement' |
| t[0] = t[1] |
| |
| # A statement can be a microop or an assembler directive |
| def p_content_of_statement_0(t): |
| '''content_of_statement : microop |
| | directive''' |
| t[0] = t[1] |
| |
| # Ignore empty statements |
| def p_content_of_statement_1(t): |
| 'content_of_statement : ' |
| pass |
| |
| # Statements are ended by newlines or a semi colon |
| def p_end_of_statement(t): |
| '''end_of_statement : NEWLINE |
| | SEMI''' |
| pass |
| |
| # Different flavors of microop to avoid shift/reduce errors |
| def p_microop_0(t): |
| 'microop : labels ID' |
| microop = Microop() |
| microop.labels = t[1] |
| microop.mnemonic = t[2] |
| t[0] = microop |
| |
| def p_microop_1(t): |
| 'microop : ID' |
| microop = Microop() |
| microop.mnemonic = t[1] |
| t[0] = microop |
| |
| def p_microop_2(t): |
| 'microop : labels ID PARAMS' |
| microop = Microop() |
| microop.labels = t[1] |
| microop.mnemonic = t[2] |
| microop.params = t[3] |
| t[0] = microop |
| |
| def p_microop_3(t): |
| 'microop : ID PARAMS' |
| microop = Microop() |
| microop.mnemonic = t[1] |
| microop.params = t[2] |
| t[0] = microop |
| |
| # Labels in the microcode |
| def p_labels_0(t): |
| 'labels : label' |
| t[0] = [t[1]] |
| |
| def p_labels_1(t): |
| 'labels : labels label' |
| t[1].append(t[2]) |
| t[0] = t[1] |
| |
| # labels on lines by themselves are attached to the following instruction. |
| def p_labels_2(t): |
| 'labels : labels NEWLINE' |
| t[0] = t[1] |
| |
| def p_label_0(t): |
| 'label : ID COLON' |
| label = Label() |
| label.is_extern = False |
| label.text = t[1] |
| t[0] = label |
| |
| def p_label_1(t): |
| 'label : EXTERN ID COLON' |
| label = Label() |
| label.is_extern = True |
| label.text = t[2] |
| t[0] = label |
| |
| # Directives for the macroop |
| def p_directive_0(t): |
| 'directive : DOT ID' |
| directive = Directive() |
| directive.name = t[2] |
| t[0] = directive |
| |
| def p_directive_1(t): |
| 'directive : DOT ID PARAMS' |
| directive = Directive() |
| directive.name = t[2] |
| directive.params = t[3] |
| t[0] = directive |
| |
| # Parse error handler. Note that the argument here is the offending |
| # *token*, not a grammar symbol (hence the need to use t.value) |
| def p_error(t): |
| if t: |
| error(t.lineno, "syntax error at '%s'" % t.value) |
| else: |
| error(0, "unknown syntax error", True) |
| |
| class MicroAssembler(object): |
| |
| def __init__(self, macro_type, microops, |
| rom = None, rom_macroop_type = None): |
| self.lexer = lex.lex() |
| self.parser = yacc.yacc() |
| self.parser.macro_type = macro_type |
| self.parser.macroops = {} |
| self.parser.microops = microops |
| self.parser.rom = rom |
| self.parser.rom_macroop_type = rom_macroop_type |
| self.parser.symbols = {} |
| self.symbols = self.parser.symbols |
| |
| def assemble(self, asm): |
| self.parser.parse(asm, lexer=self.lexer) |
| macroops = self.parser.macroops |
| self.parser.macroops = {} |
| return macroops |