| # Copyright (c) 2003-2005 The Regents of The University of Michigan |
| # All rights reserved. |
| # |
| # Redistribution and use in source and binary forms, with or without |
| # modification, are permitted provided that the following conditions are |
| # met: redistributions of source code must retain the above copyright |
| # notice, this list of conditions and the following disclaimer; |
| # redistributions in binary form must reproduce the above copyright |
| # notice, this list of conditions and the following disclaimer in the |
| # documentation and/or other materials provided with the distribution; |
| # neither the name of the copyright holders nor the names of its |
| # contributors may be used to endorse or promote products derived from |
| # this software without specific prior written permission. |
| # |
| # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| |
| import os |
| import sys |
| import re |
| import traceback |
| |
| # get type names |
| from types import * |
| |
| from ply import lex |
| from ply import yacc |
| |
| ########################################################################## |
| # |
| # Base classes for use outside of the assembler |
| # |
| ########################################################################## |
| |
| |
| class MicroContainer: |
| def __init__(self, name): |
| self.microops = [] |
| self.name = name |
| self.directives = {} |
| self.micro_classes = {} |
| self.labels = {} |
| |
| def add_microop(self, mnemonic, microop): |
| microop.mnemonic = mnemonic |
| microop.micropc = len(self.microops) |
| self.microops.append(microop) |
| |
| def __str__(self): |
| string = f"{self.name}:\n" |
| for microop in self.microops: |
| string += f" {microop}\n" |
| return string |
| |
| |
| class CombinationalMacroop(MicroContainer): |
| pass |
| |
| |
| class RomMacroop: |
| def __init__(self, name, target): |
| self.name = name |
| self.target = target |
| |
| def __str__(self): |
| return f"{self.name}: {self.target}\n" |
| |
| |
| class Rom(MicroContainer): |
| def __init__(self, name): |
| super().__init__(name) |
| self.externs = {} |
| |
| |
| ########################################################################## |
| # |
| # Support classes |
| # |
| ########################################################################## |
| |
| |
| class Label(object): |
| def __init__(self): |
| self.extern = False |
| self.name = "" |
| |
| |
| class Block(object): |
| def __init__(self): |
| self.statements = [] |
| |
| |
| class Statement(object): |
| def __init__(self): |
| self.is_microop = False |
| self.is_directive = False |
| self.params = "" |
| |
| |
| class Microop(Statement): |
| def __init__(self): |
| super().__init__() |
| self.mnemonic = "" |
| self.labels = [] |
| self.is_microop = True |
| |
| |
| class Directive(Statement): |
| def __init__(self): |
| super().__init__() |
| self.name = "" |
| self.is_directive = True |
| |
| |
| ########################################################################## |
| # |
| # Functions that handle common tasks |
| # |
| ########################################################################## |
| |
| |
| def print_error(message): |
| print() |
| print(f"*** {message}") |
| print() |
| |
| |
| def handle_statement(parser, container, statement): |
| if statement.is_microop: |
| if statement.mnemonic not in parser.microops.keys(): |
| raise Exception(f"Unrecognized mnemonic: {statement.mnemonic}") |
| parser.symbols[ |
| "__microopClassFromInsideTheAssembler" |
| ] = parser.microops[statement.mnemonic] |
| try: |
| microop = eval( |
| f"__microopClassFromInsideTheAssembler({statement.params})", |
| {}, |
| parser.symbols, |
| ) |
| except: |
| print_error( |
| f"Error creating microop object with mnemonic {statement.mnemonic}." |
| ) |
| raise |
| try: |
| for label in statement.labels: |
| container.labels[label.text] = microop |
| if label.is_extern: |
| container.externs[label.text] = microop |
| container.add_microop(statement.mnemonic, microop) |
| except: |
| print_error("Error adding microop.") |
| raise |
| elif statement.is_directive: |
| if statement.name not in container.directives.keys(): |
| raise Exception(f"Unrecognized directive: {statement.name}") |
| parser.symbols[ |
| "__directiveFunctionFromInsideTheAssembler" |
| ] = container.directives[statement.name] |
| try: |
| eval( |
| f"__directiveFunctionFromInsideTheAssembler({statement.params})", |
| {}, |
| parser.symbols, |
| ) |
| except: |
| print_error("Error executing directive.") |
| print(container.directives) |
| raise |
| else: |
| raise Exception(f"Didn't recognize the type of statement {statement}") |
| |
| |
| ########################################################################## |
| # |
| # Lexer specification |
| # |
| ########################################################################## |
| |
| # Error handler. Just call exit. Output formatted to work under |
| # Emacs compile-mode. Optional 'print_traceback' arg, if set to True, |
| # prints a Python stack backtrace too (can be handy when trying to |
| # debug the parser itself). |
| def error(lineno, string, print_traceback=False): |
| # Print a Python stack backtrace if requested. |
| if print_traceback: |
| traceback.print_exc() |
| if lineno != 0: |
| line_str = "%d:" % lineno |
| else: |
| line_str = "" |
| sys.exit(f"{line_str} {string}") |
| |
| |
| reserved = ("DEF", "MACROOP", "ROM", "EXTERN") |
| |
| tokens = reserved + ( |
| # identifier |
| "ID", |
| # arguments for microops and directives |
| "PARAMS", |
| "LPAREN", |
| "RPAREN", |
| "LBRACE", |
| "RBRACE", |
| "COLON", |
| "SEMI", |
| "DOT", |
| "NEWLINE", |
| ) |
| |
| # New lines are ignored at the top level, but they end statements in the |
| # assembler |
| states = ( |
| ("asm", "exclusive"), |
| ("params", "exclusive"), |
| ("header", "exclusive"), |
| ) |
| |
| reserved_map = {} |
| for r in reserved: |
| reserved_map[r.lower()] = r |
| |
| # Ignore comments |
| def t_ANY_COMMENT(t): |
| r"\#[^\n]*(?=\n)" |
| |
| |
| def t_ANY_MULTILINECOMMENT(t): |
| r"/\*([^/]|((?<!\*)/))*\*/" |
| |
| |
| # A colon marks the end of a label. It should follow an ID which will |
| # put the lexer in the "params" state. Seeing the colon will put it back |
| # in the "asm" state since it knows it saw a label and not a mnemonic. |
| def t_params_COLON(t): |
| r":" |
| t.lexer.pop_state() |
| return t |
| |
| |
| # Parameters are a string of text which don't contain an unescaped statement |
| # statement terminator, ie a newline or semi colon. |
| def t_params_PARAMS(t): |
| r"([^\n;\\]|(\\[\n;\\]))+" |
| t.lineno += t.value.count("\n") |
| unescapeParamsRE = re.compile(r"(\\[\n;\\])") |
| |
| def unescapeParams(mo): |
| val = mo.group(0) |
| return val[1] |
| |
| t.value = unescapeParamsRE.sub(unescapeParams, t.value) |
| t.lexer.pop_state() |
| return t |
| |
| |
| # An "ID" in the micro assembler is either a label, directive, or mnemonic |
| # If it's either a directive or a mnemonic, it will be optionally followed by |
| # parameters. If it's a label, the following colon will make the lexer stop |
| # looking for parameters. |
| def t_asm_ID(t): |
| r"[A-Za-z_]\w*" |
| t.type = reserved_map.get(t.value, "ID") |
| # If the ID is really "extern", we shouldn't start looking for parameters |
| # yet. The real ID, the label itself, is coming up. |
| if t.type != "EXTERN": |
| t.lexer.push_state("params") |
| return t |
| |
| |
| def t_header_ID(t): |
| r"[A-Za-z_]\w*" |
| return t |
| |
| |
| # If there is a label and you're -not- in the assembler (which would be caught |
| # above), don't start looking for parameters. |
| def t_ANY_ID(t): |
| r"[A-Za-z_]\w*" |
| t.type = reserved_map.get(t.value, "ID") |
| if t.type == "MACROOP": |
| t.lexer.push_state("asm") |
| t.lexer.push_state("header") |
| elif t.type == "ROM": |
| t.lexer.push_state("asm") |
| t.lexer.push_state("header") |
| return t |
| |
| |
| # Braces enter and exit micro assembly |
| def t_header_LBRACE(t): |
| r"\{" |
| t.lexer.pop_state() |
| return t |
| |
| |
| def t_asm_RBRACE(t): |
| r"\}" |
| t.lexer.pop_state() |
| return t |
| |
| |
| # In the micro assembler, do line counting but also return a token. The |
| # token is needed by the parser to detect the end of a statement. |
| def t_asm_NEWLINE(t): |
| r"\n+" |
| t.lineno += t.value.count("\n") |
| return t |
| |
| |
| # A newline or semi colon when looking for params signals that the statement |
| # is over and the lexer should go back to looking for regular assembly. |
| def t_params_NEWLINE(t): |
| r"\n+" |
| t.lineno += t.value.count("\n") |
| t.lexer.pop_state() |
| return t |
| |
| |
| def t_params_SEMI(t): |
| r";" |
| t.lexer.pop_state() |
| return t |
| |
| |
| # Unless handled specially above, track newlines only for line counting. |
| def t_ANY_NEWLINE(t): |
| r"\n+" |
| t.lineno += t.value.count("\n") |
| |
| |
| # Basic regular expressions to pick out simple tokens |
| t_ANY_LPAREN = r"\(" |
| t_ANY_RPAREN = r"\)" |
| t_ANY_SEMI = r";" |
| t_ANY_DOT = r"\." |
| |
| t_ANY_ignore = " \t\x0c" |
| |
| |
| def t_ANY_error(t): |
| error(t.lineno, f"illegal character '{t.value[0]}'") |
| t.skip(1) |
| |
| |
| ########################################################################## |
| # |
| # Parser specification |
| # |
| ########################################################################## |
| |
| # Start symbol for a file which may have more than one macroop or rom |
| # specification. |
| def p_file(t): |
| "file : opt_rom_or_macros" |
| |
| |
| def p_opt_rom_or_macros_0(t): |
| "opt_rom_or_macros :" |
| |
| |
| def p_opt_rom_or_macros_1(t): |
| "opt_rom_or_macros : rom_or_macros" |
| |
| |
| def p_rom_or_macros_0(t): |
| "rom_or_macros : rom_or_macro" |
| |
| |
| def p_rom_or_macros_1(t): |
| "rom_or_macros : rom_or_macros rom_or_macro" |
| |
| |
| def p_rom_or_macro_0(t): |
| """rom_or_macro : rom_block |
| | macroop_def""" |
| |
| |
| # Defines a section of microcode that should go in the current ROM |
| def p_rom_block(t): |
| "rom_block : DEF ROM block SEMI" |
| if not t.parser.rom: |
| print_error("Rom block found, but no Rom object specified.") |
| raise TypeError("Rom block found, but no Rom object was specified.") |
| for statement in t[3].statements: |
| handle_statement(t.parser, t.parser.rom, statement) |
| t[0] = t.parser.rom |
| |
| |
| # Defines a macroop that jumps to an external label in the ROM |
| def p_macroop_def_0(t): |
| "macroop_def : DEF MACROOP ID LPAREN ID RPAREN SEMI" |
| if not t.parser.rom_macroop_type: |
| print_error( |
| "ROM based macroop found, but no ROM macroop " |
| + "class was specified." |
| ) |
| raise TypeError( |
| "ROM based macroop found, but no ROM macroop " |
| + "class was specified." |
| ) |
| macroop = t.parser.rom_macroop_type(t[3], t[5]) |
| t.parser.macroops[t[3]] = macroop |
| |
| |
| # Defines a macroop that is combinationally generated |
| def p_macroop_def_1(t): |
| "macroop_def : DEF MACROOP ID block SEMI" |
| try: |
| curop = t.parser.macro_type(t[3]) |
| except TypeError: |
| print_error("Error creating macroop object.") |
| raise |
| for statement in t[4].statements: |
| handle_statement(t.parser, curop, statement) |
| t.parser.macroops[t[3]] = curop |
| |
| |
| # A block of statements |
| def p_block(t): |
| "block : LBRACE statements RBRACE" |
| block = Block() |
| block.statements = t[2] |
| t[0] = block |
| |
| |
| def p_statements_0(t): |
| "statements : statement" |
| if t[1]: |
| t[0] = [t[1]] |
| else: |
| t[0] = [] |
| |
| |
| def p_statements_1(t): |
| "statements : statements statement" |
| if t[2]: |
| t[1].append(t[2]) |
| t[0] = t[1] |
| |
| |
| def p_statement(t): |
| "statement : content_of_statement end_of_statement" |
| t[0] = t[1] |
| |
| |
| # A statement can be a microop or an assembler directive |
| def p_content_of_statement_0(t): |
| """content_of_statement : microop |
| | directive""" |
| t[0] = t[1] |
| |
| |
| # Ignore empty statements |
| def p_content_of_statement_1(t): |
| "content_of_statement :" |
| pass |
| |
| |
| # Statements are ended by newlines or a semi colon |
| def p_end_of_statement(t): |
| """end_of_statement : NEWLINE |
| | SEMI""" |
| pass |
| |
| |
| # Different flavors of microop to avoid shift/reduce errors |
| def p_microop_0(t): |
| "microop : labels ID" |
| microop = Microop() |
| microop.labels = t[1] |
| microop.mnemonic = t[2] |
| t[0] = microop |
| |
| |
| def p_microop_1(t): |
| "microop : ID" |
| microop = Microop() |
| microop.mnemonic = t[1] |
| t[0] = microop |
| |
| |
| def p_microop_2(t): |
| "microop : labels ID PARAMS" |
| microop = Microop() |
| microop.labels = t[1] |
| microop.mnemonic = t[2] |
| microop.params = t[3] |
| t[0] = microop |
| |
| |
| def p_microop_3(t): |
| "microop : ID PARAMS" |
| microop = Microop() |
| microop.mnemonic = t[1] |
| microop.params = t[2] |
| t[0] = microop |
| |
| |
| # Labels in the microcode |
| def p_labels_0(t): |
| "labels : label" |
| t[0] = [t[1]] |
| |
| |
| def p_labels_1(t): |
| "labels : labels label" |
| t[1].append(t[2]) |
| t[0] = t[1] |
| |
| |
| # labels on lines by themselves are attached to the following instruction. |
| def p_labels_2(t): |
| "labels : labels NEWLINE" |
| t[0] = t[1] |
| |
| |
| def p_label_0(t): |
| "label : ID COLON" |
| label = Label() |
| label.is_extern = False |
| label.text = t[1] |
| t[0] = label |
| |
| |
| def p_label_1(t): |
| "label : EXTERN ID COLON" |
| label = Label() |
| label.is_extern = True |
| label.text = t[2] |
| t[0] = label |
| |
| |
| # Directives for the macroop |
| def p_directive_0(t): |
| "directive : DOT ID" |
| directive = Directive() |
| directive.name = t[2] |
| t[0] = directive |
| |
| |
| def p_directive_1(t): |
| "directive : DOT ID PARAMS" |
| directive = Directive() |
| directive.name = t[2] |
| directive.params = t[3] |
| t[0] = directive |
| |
| |
| # Parse error handler. Note that the argument here is the offending |
| # *token*, not a grammar symbol (hence the need to use t.value) |
| def p_error(t): |
| if t: |
| error(t.lineno, f"syntax error at '{t.value}'") |
| else: |
| error(0, "unknown syntax error", True) |
| |
| |
| class MicroAssembler(object): |
| def __init__(self, macro_type, microops, rom=None, rom_macroop_type=None): |
| self.lexer = lex.lex() |
| self.parser = yacc.yacc(write_tables=False) |
| self.parser.macro_type = macro_type |
| self.parser.macroops = {} |
| self.parser.microops = microops |
| self.parser.rom = rom |
| self.parser.rom_macroop_type = rom_macroop_type |
| self.parser.symbols = {} |
| self.symbols = self.parser.symbols |
| |
| def assemble(self, asm): |
| self.parser.parse(asm, lexer=self.lexer) |
| macroops = self.parser.macroops |
| self.parser.macroops = {} |
| return macroops |