From 81039ce304e42777c6648883ad2b1d381092cec1 Mon Sep 17 00:00:00 2001 From: RunasSudo Date: Mon, 13 Feb 2017 14:44:36 +1030 Subject: [PATCH] Implement basic assembler --- asm.py | 151 +++++++++++++++++++++++++++++++++++++++++ disasm.py | 10 ++- libsynacor/bytecode.py | 14 ++++ 3 files changed, 172 insertions(+), 3 deletions(-) create mode 100755 asm.py diff --git a/asm.py b/asm.py new file mode 100755 index 0000000..e969f1a --- /dev/null +++ b/asm.py @@ -0,0 +1,151 @@ +#!/usr/bin/env python3 +# synacor.py - An implementation of the Synacor Challenge +# Copyright © 2017 RunasSudo +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +from libsynacor import * + +import argparse +import struct + +parser = argparse.ArgumentParser() +parser.add_argument('file', help='.asm file to read') +parser.add_argument('output', help='.bin file to write') +args = parser.parse_args() + +line_no = 0 + +def split_line(line): + tokens = [] + token = '' + idx = 0 + + in_comment = False + in_string = False + in_escape = False + + while idx < len(line): + if in_comment: + pass + elif in_string: + if in_escape: + token += line[idx] + else: + if line[idx] == '\\': + in_escape = True + token += line[idx] + elif line[idx] == '"': + in_string = False + token += line[idx] + else: + token += line[idx] + else: + if line[idx] == ' ': + if token != '': + tokens.append(token) + token = '' + elif line[idx] == '"': + in_string = True + token += line[idx] + elif line[idx] == ';': + in_comment = True + else: + token += line[idx] + idx += 1 + # Final token + if token != '': + tokens.append(token) + + return tokens + +def unescape_char(char): + return char.encode('utf-8').decode('unicode_escape') + +def assemble_next_instruction(source): + line = source.readline() + global line_no; line_no += 1 + if line == '': + return None, [] + + tokens = split_line(line.strip()) + return assemble_instruction(source, tokens) + +def assemble_instruction(source, tokens): + if len(tokens) == 0: + return assemble_next_instruction(source) + if tokens[0].endswith(':'): + # Label + label = tokens[0][:-1] + instructions, inst_labels = assemble_instruction(source, tokens[1:]) + return instructions, inst_labels + [label] + else: + # Instruction + name = tokens[0] + if name not in instructions_by_name: + raise Exception('Unknown instruction {}'.format(name)) + instruction = instructions_by_name[name]() + + # Special cases + if isinstance(instruction, InstructionOut) and tokens[1].startswith('"'): + chars = unescape_char(tokens[1][1:-1]) + instructions = [] + for char in chars: + instruction = InstructionOut() + instruction.args.append(OpLiteral(ord(char))) + instructions.append(instruction) + return instructions, [] + elif isinstance(instruction, InstructionData): + if tokens[1].startswith('"'): + chars = unescape_char(tokens[1][1:-1]) + instruction.args = [ord(char) for char in chars] + return [instruction], [] + else: + instruction.args = [int(x, 16) for x in tokens[1:]] + return [instruction], [] + else: + if len(tokens) != instruction.nargs + 1: + raise Exception('Invalid number of arguments: Expected {}, got {}'.format(instruction.nargs, len(tokens) - 1)) + for i in range(instruction.nargs): + argstr = tokens[i + 1] + if argstr.startswith('R'): + # Register + arg = OpRegister(int(argstr[1:])) + else: + # Hex literal + arg = OpLiteral(int(argstr, 16)) + instruction.args.append(arg) + return [instruction], [] + +# TODO: First pass + +# Second pass +SYN_MEM = [0] * 32768 +SYN_PTR = 0 + +with open(args.file, 'r') as source: + try: + while True: + instructions, labels = assemble_next_instruction(source) + if instructions is None: + break + for instruction in instructions: + code = instruction.assemble() + SYN_MEM[SYN_PTR:SYN_PTR+len(code)] = code + SYN_PTR += len(code) + except Exception as ex: + raise Exception('Error at line {}'.format(line_no)) from ex + +with open(args.output, 'wb') as f: + f.write(struct.pack('<32768H', *SYN_MEM)) diff --git a/disasm.py b/disasm.py index 13edaa1..b6457cb 100755 --- a/disasm.py +++ b/disasm.py @@ -17,13 +17,17 @@ from libsynacor import * -import sys +import argparse -with open(sys.argv[1], 'rb') as data: +parser = argparse.ArgumentParser() +parser.add_argument('file', help='.bin file containing the initial memory dump') +args = parser.parse_args() + +with open(args.file, 'rb') as data: SYN_MEM = memory_from_file(data) def escape_char(char): - return char.replace('\\', '\\\\').replace('\n', '\\n').replace('"', '\\"') + return char.encode('unicode_escape').decode('utf-8').replace('"', '\\"') MODE_OUT = False MODE_DAT = False #False, 1 (data), 2 (text) diff --git a/libsynacor/bytecode.py b/libsynacor/bytecode.py index d62497c..c155788 100644 --- a/libsynacor/bytecode.py +++ b/libsynacor/bytecode.py @@ -33,6 +33,8 @@ class OpLiteral(Operand): def describe(self): return '{:04x}'.format(self.value) + def assemble(self): + return self.value class OpRegister(Operand): def __init__(self, register): @@ -44,6 +46,8 @@ class OpRegister(Operand): def describe(self): return 'R{}'.format(self.register) + def assemble(self): + return self.register + 32768 instructions_by_opcode = {} instructions_by_name = {} @@ -70,6 +74,9 @@ class Instruction: description += ' {}'.format(self.args[i].describe()) return description + def assemble(self): + return [self.opcode] + [self.args[i].assemble() for i in range(self.nargs)] + @staticmethod def next_instruction(data, idx): opcode = Operand.read_op(data[idx]) @@ -217,3 +224,10 @@ class InstructionIn(Instruction): cpu.SYN_STDIN_BUF = list(line) self.args[0].set(cpu, ord(cpu.SYN_STDIN_BUF.pop(0))) + +# Not actually an instruction, but convenient to think of it as one for the purposes of assembling +# self.args is an array of literal values, rather than Operands +class InstructionData(Instruction): + def assemble(self): + return self.args +instructions_by_name['data'] = InstructionData