From f260c3e4b5317bb11f35564a8a4c8204382e6c2c Mon Sep 17 00:00:00 2001 From: RunasSudo Date: Mon, 13 Feb 2017 20:55:08 +1030 Subject: [PATCH] Annotate self test --- asm.py | 106 ---------------------------- disasm.py | 53 +++++++++----- disasm_hints/annotations.txt | 96 ++++++++++++++++++++++++-- libsynacor/__init__.py | 1 + libsynacor/assembly.py | 129 +++++++++++++++++++++++++++++++++++ libsynacor/bytecode.py | 6 +- 6 files changed, 261 insertions(+), 130 deletions(-) create mode 100644 libsynacor/assembly.py diff --git a/asm.py b/asm.py index e4377e4..58e17df 100755 --- a/asm.py +++ b/asm.py @@ -25,112 +25,6 @@ parser.add_argument('file', help='.asm file to read') parser.add_argument('output', help='.bin file to write') args = parser.parse_args() -line_no = 0 - -def split_line(line): - tokens = [] - token = '' - idx = 0 - - in_comment = False - in_string = False - in_escape = False - - while idx < len(line): - if in_comment: - pass - elif in_string: - if in_escape: - token += line[idx] - else: - if line[idx] == '\\': - in_escape = True - token += line[idx] - elif line[idx] == '"': - in_string = False - token += line[idx] - else: - token += line[idx] - else: - if line[idx] == ' ': - if token != '': - tokens.append(token) - token = '' - elif line[idx] == '"': - in_string = True - token += line[idx] - elif line[idx] == ';': - in_comment = True - else: - token += line[idx] - idx += 1 - # Final token - if token != '': - tokens.append(token) - - return tokens - -def unescape_char(char): - return char.encode('utf-8').decode('unicode_escape') - -def assemble_next_instruction(source): - line = source.readline() - global line_no; line_no += 1 - if line == '': - return None, [] - - tokens = split_line(line.strip()) - return assemble_instruction(source, tokens) - -def assemble_instruction(source, tokens): - if len(tokens) == 0: - return assemble_next_instruction(source) - if tokens[0].endswith(':'): - # Label - label = tokens[0][:-1] - instructions, inst_labels = assemble_instruction(source, tokens[1:]) - return instructions, inst_labels + [label] - else: - # Instruction - name = tokens[0] - if name not in instructions_by_name: - raise Exception('Unknown instruction {}'.format(name)) - instruction = instructions_by_name[name]() - - # Special cases - if isinstance(instruction, InstructionOut) and tokens[1].startswith('"'): - chars = unescape_char(tokens[1][1:-1]) - instructions = [] - for char in chars: - instruction = InstructionOut() - instruction.args.append(OpLiteral(ord(char))) - instructions.append(instruction) - return instructions, [] - elif isinstance(instruction, InstructionData): - if tokens[1].startswith('"'): - chars = unescape_char(tokens[1][1:-1]) - instruction.args = [ord(char) for char in chars] - return [instruction], [] - else: - instruction.args = [int(x, 16) for x in tokens[1:]] - return [instruction], [] - else: - if len(tokens) != instruction.nargs + 1: - raise Exception('Invalid number of arguments: Expected {}, got {}'.format(instruction.nargs, len(tokens) - 1)) - for i in range(instruction.nargs): - argstr = tokens[i + 1] - if argstr.startswith('R'): - # Register - arg = OpRegister(int(argstr[1:])) - elif argstr.startswith('$'): - # Label - arg = OpLabel(argstr[1:]) - else: - # Hex literal - arg = OpLiteral(int(argstr, 16)) - instruction.args.append(arg) - return [instruction], [] - # First pass labels = {} SYN_MEM = [0] * 32768 diff --git a/disasm.py b/disasm.py index f89f349..c3d1ee8 100755 --- a/disasm.py +++ b/disasm.py @@ -28,7 +28,7 @@ with open(args.file, 'rb') as data: SYN_MEM = memory_from_file(data) # Find things to label -labels, comments_before, comments_inline = {}, {}, {} +labels, comments_before, comments_inline, replacements = {}, {}, {}, {} SYN_PTR = 0 while SYN_PTR < len(SYN_MEM): word = SYN_MEM[SYN_PTR] @@ -62,6 +62,9 @@ if args.hints: elif line.startswith('call '): loc = int(line.split()[1], 16) labels['sub_{:04x}'.format(loc)] = loc + elif line.startswith('lbl '): + loc = int(line.split()[1], 16) + labels[line.split()[2]] = loc elif line.startswith('ren '): old_label = line.split()[1] new_label = line.split()[2] @@ -73,18 +76,25 @@ if args.hints: if loc not in comments_before: comments_before[loc] = [] comments_before[loc].append(comment) + elif line.startswith('cmi '): + loc = int(line.split()[1], 16) + comment = line[line.index(' ', line.index(' ') + 1) + 1:].strip() + comments_inline[loc] = comment + elif line.startswith('rep '): + loc = int(line.split()[1], 16) + code = line[line.index(' ', line.index(' ') + 1) + 1:].strip() + instruction = assemble_line(None, code)[0][0] + replacements[loc] = instruction else: raise Exception('Invalid line in hint file: {}'.format(line)) -def escape_char(char): - return char.encode('unicode_escape').decode('utf-8').replace('"', '\\"') - MODE_OUT = False MODE_DAT = False #False, 1 (data), 2 (text) SYN_PTR = 0 while SYN_PTR < len(SYN_MEM): + # Handle comments if SYN_PTR in comments_before: if MODE_OUT: print('"') @@ -97,6 +107,12 @@ while SYN_PTR < len(SYN_MEM): MODE_DAT = False for comment in comments_before[SYN_PTR]: print('; {}'.format(comment)) + if SYN_PTR in comments_inline: + comment_inline = ' ; {}'.format(comments_inline[SYN_PTR]) + else: + comment_inline = '' + + # Handle labels if any(v == SYN_PTR for k, v in labels.items()): if MODE_OUT: print('"') @@ -109,6 +125,13 @@ while SYN_PTR < len(SYN_MEM): MODE_DAT = False print('${}:'.format(next(k for k, v in labels.items() if v == SYN_PTR))) + # Handle replacements + if SYN_PTR in replacements: + instruction = replacements[SYN_PTR] + print('{:04x}: {}{}'.format(SYN_PTR, instruction.describe(), comment_inline)) + SYN_PTR += len(instruction.assemble(None)) + continue + word = SYN_MEM[SYN_PTR] if MODE_OUT and word != 19: @@ -162,24 +185,18 @@ while SYN_PTR < len(SYN_MEM): if MODE_OUT: print('"') MODE_OUT = False - print('{:04x}: {}'.format(SYN_PTR, instruction.describe())) + print('{:04x}: {}{}'.format(SYN_PTR, instruction.describe(), comment_inline)) elif isinstance(instruction, InstructionJmp) or isinstance(instruction, InstructionJt) or isinstance(instruction, InstructionJf) or isinstance(instruction, InstructionCall): if isinstance(instruction, InstructionJmp) or isinstance(instruction, InstructionCall): - op = instruction.args[0] + argidx = 0 else: - op = instruction.args[1] - if isinstance(op, OpLiteral): - loc = op.get(None) + argidx = 1 + if isinstance(instruction.args[argidx], OpLiteral): + loc = instruction.args[argidx].get(None) if any(v == loc for k, v in labels.items()): label = next(k for k, v in labels.items() if v == loc) - if isinstance(instruction, InstructionJmp) or isinstance(instruction, InstructionCall): - print('{:04x}: {: <4} ${}'.format(SYN_PTR, instruction.name, label)) - else: - print('{:04x}: {: <4} {} ${}'.format(SYN_PTR, instruction.name, instruction.args[0].describe(), label)) - else: - print('{:04x}: {}'.format(SYN_PTR, instruction.describe())) - else: - print('{:04x}: {}'.format(SYN_PTR, instruction.describe())) + instruction.args[argidx] = OpLabel(label) + print('{:04x}: {}{}'.format(SYN_PTR, instruction.describe(), comment_inline)) else: - print('{:04x}: {}'.format(SYN_PTR, instruction.describe())) + print('{:04x}: {}{}'.format(SYN_PTR, instruction.describe(), comment_inline)) SYN_PTR = next_SYN_PTR diff --git a/disasm_hints/annotations.txt b/disasm_hints/annotations.txt index a482af9..37b3543 100644 --- a/disasm_hints/annotations.txt +++ b/disasm_hints/annotations.txt @@ -1,6 +1,94 @@ -jmp 0000 -ren label_0000 start -jmp 0110 -ren label_0110 self_test cmb 0140 Test jmp ren label_015b self_test_jmp1 +cmi 0142 jmp lands here if fails +cmi 0162 jmp from 0160 lands here if -2 +cmi 0164 jmp from 0160 lands here if -1 +cmi 0166 jmp from 0160 lands here if correct +cmi 0168 jmp from 0160 lands here if +1 +cmi 016a jmp from 0160 lands here if +2 +ren label_0166 self_test_jmp2 +ren label_0170 self_test_jmp_diagnose-2 +ren label_018d self_test_jmp_diagnose-1 +ren label_01a8 self_test_jmp_diagnose+1 +ren label_01c5 self_test_jmp_diagnose+2 +cmi 016e jmp from 0162 lands here +cmi 018c jmp from 0164 lands here +cmi 01a9 jmp from 0168 lands here +cmi 01c7 jmp from 016a lands here +cmb 01e4 Test jt/jf +cmb 01e4 ?? Does not test behaviour for values other than 0 and 1 +ren label_01e4 self_test_jtjf +ren label_0432 self_test_jtjf_fail +cmi 01e4 Jump to fail if 0 is true +cmi 01e7 Jump to fail if 1 is false +ren label_01ef self_test_jtjf2 +ren label_01f4 self_test_regzero +cmi 01ed Fail if 1 is not true +cmi 01f2 Fail if 0 is not false +cmb 01f4 Test that all registers are zero +cmb 01f4 ?? Because this uses jt/jf, errors may not be detected if involving values other than 0 and 1 +ren label_0445 self_test_regzero_fail +cmb 020c Test set +ren label_045e self_test_set_fail +cmb 0218 Test add +cmb 0218 ?? This only tests if 1 + 1 != 0, and will fail to detect almost all other errors +cmi 021c Dodgy! +ren label_0234 self_test_add +cmb 0234 Test add +cmb 0234 ?? This reuses the result from the add test, so will erroneously report an eq failure (instead of an add failure) if 1 + 1 != 2 +cmb 0234 It would probably have been a better idea to test eq first, then use that to verify add +cmi 0238 Dodgy! +ren label_024e self_test_pushpop +cmb 024e Test push/pop by exchanging R0 and R1 +cmb 024e ?? Because R1 is reused, the test will erroneously report a push/pop failure (instead of an eq failure) if eq returns any other truthy value in the previous test +cmi 025d Dodgy! +ren label_0486 self_test_pushpop_fail +cmb 0264 Test gt +ren label_0473 self_test_gt_fail +cmb 0279 Test and +ren label_0499 self_test_and_fail +cmb 0284 Test or +cmb 02ac Test not +ren label_02ac self_test_not +ren label_04b8 self_test_not_fail +cmb 02c0 Test call +ren sub_0505 self_test_call_subroutine1 +lbl 02c2 self_test_call_returnloc1 +ren label_0509 self_test_call_fail +ren label_02c4 self_test_call_check_stack1 +rep 02c6 eq R1 R0 $self_test_call_check_stack1 +cmi 02c6 This test is strange. If R0 == 02c2 as tested below, R0 != 02c4 is guaranteed +rep 02cd eq R1 R0 $self_test_call_returnloc1 +cmb 02d4 Test call register value +ren sub_0507 self_test_call_subroutine2 +lbl 02d9 self_test_call_returnloc2 +rep 02d4 set R0 $self_test_call_subroutine2 +ren label_02db self_test_call_check_stack2 +rep 02dd eq R1 R0 $self_test_call_check_stack2 +rep 02e4 eq R1 R0 $self_test_call_returnloc2 +cmb 02eb Test add overflow +ren label_0520 self_test_overflow_fail +cmi 0304 Just to be safe! +cmb 030b Test mult +cmi 030f Test for HHGTTG (non-)compatibility +ren label_0565 self_test_hhgttg_fail +ren label_0586 self_test_mult_fail +cmb 0328 Test mod +ren label_059d self_test_mod_fail +ren label_034d self_test_rwmem +lbl 034b self_test_rwmem_data +cmb 034b Test rmem/wmem +ren label_04d7 self_test_rmem_fail +ren label_04ee self_test_wmem_fail +rep 034d rmem R0 $self_test_rwmem_data +rep 0357 add R2 $self_test_rwmem_data 0001 +rep 0365 set R0 $self_test_rwmem_data +ren sub_06bb decrypt_data +cmi 0375 Sneaky! +lbl 17b4 encrypted_data +rep 0377 rmem R0 $encrypted_data +rep 0381 add R2 $encrypted_data 0001 +ren label_03ad self_test_wmem_cmd_fail +rep 03ab out $self_test_complete +cmi 03a9 Becomes noop; jt 0013 $self_test_complete +lbl 03d2 self_test_complete diff --git a/libsynacor/__init__.py b/libsynacor/__init__.py index 9595d2b..08d899e 100644 --- a/libsynacor/__init__.py +++ b/libsynacor/__init__.py @@ -16,4 +16,5 @@ from .binfile import * from .bytecode import * +from .assembly import * from .cpu import * diff --git a/libsynacor/assembly.py b/libsynacor/assembly.py new file mode 100644 index 0000000..d58651e --- /dev/null +++ b/libsynacor/assembly.py @@ -0,0 +1,129 @@ +# synacor.py - An implementation of the Synacor Challenge +# Copyright © 2017 RunasSudo +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +from libsynacor import * + +def escape_char(char): + return char.encode('unicode_escape').decode('utf-8').replace('"', '\\"') + +def unescape_char(char): + return char.encode('utf-8').decode('unicode_escape') + +def split_line(line): + tokens = [] + token = '' + idx = 0 + + in_comment = False + in_string = False + in_escape = False + + while idx < len(line): + if in_comment: + pass + elif in_string: + if in_escape: + token += line[idx] + else: + if line[idx] == '\\': + in_escape = True + token += line[idx] + elif line[idx] == '"': + in_string = False + token += line[idx] + else: + token += line[idx] + else: + if line[idx] == ' ': + if token != '': + tokens.append(token) + token = '' + elif line[idx] == '"': + in_string = True + token += line[idx] + elif line[idx] == ';': + in_comment = True + else: + token += line[idx] + idx += 1 + # Final token + if token != '': + tokens.append(token) + + return tokens + +line_no = 0 + +def assemble_next_instruction(source): + line = source.readline() + global line_no; line_no += 1 + if line == '': + return None, [] + + return assemble_line(source, line) + +def assemble_line(source, line): + tokens = split_line(line.strip()) + return assemble_instruction(source, tokens) + +def assemble_instruction(source, tokens): + if len(tokens) == 0: + return assemble_next_instruction(source) + if tokens[0].endswith(':'): + # Label + label = tokens[0][:-1] + instructions, inst_labels = assemble_instruction(source, tokens[1:]) + return instructions, inst_labels + [label] + else: + # Instruction + name = tokens[0] + if name not in instructions_by_name: + raise Exception('Unknown instruction {}'.format(name)) + instruction = instructions_by_name[name]() + + # Special cases + if isinstance(instruction, InstructionOut) and tokens[1].startswith('"'): + chars = unescape_char(tokens[1][1:-1]) + instructions = [] + for char in chars: + instruction = InstructionOut() + instruction.args.append(OpLiteral(ord(char))) + instructions.append(instruction) + return instructions, [] + elif isinstance(instruction, InstructionData): + if tokens[1].startswith('"'): + chars = unescape_char(tokens[1][1:-1]) + instruction.args = [ord(char) for char in chars] + return [instruction], [] + else: + instruction.args = [int(x, 16) for x in tokens[1:]] + return [instruction], [] + else: + if len(tokens) != instruction.nargs + 1: + raise Exception('Invalid number of arguments: Expected {}, got {}'.format(instruction.nargs, len(tokens) - 1)) + for i in range(instruction.nargs): + argstr = tokens[i + 1] + if argstr.startswith('R'): + # Register + arg = OpRegister(int(argstr[1:])) + elif argstr.startswith('$'): + # Label + arg = OpLabel(argstr[1:]) + else: + # Hex literal + arg = OpLiteral(int(argstr, 16)) + instruction.args.append(arg) + return [instruction], [] diff --git a/libsynacor/bytecode.py b/libsynacor/bytecode.py index ea5224c..479f687 100644 --- a/libsynacor/bytecode.py +++ b/libsynacor/bytecode.py @@ -49,11 +49,13 @@ class OpRegister(Operand): def assemble(self, labels): return self.register + 32768 -# Used only in assembling process +# Used only in dis/assembling process class OpLabel(Operand): def __init__(self, label): self.label = label + def describe(self): + return '${}'.format(self.label) def assemble(self, labels): if labels is None: # First pass @@ -85,7 +87,7 @@ class Instruction: description = '{: <4}'.format(self.name) for i in range(self.nargs): description += ' {}'.format(self.args[i].describe()) - return description + return description.strip() def assemble(self, labels): return [self.opcode] + [self.args[i].assemble(labels) for i in range(self.nargs)]