diff --git a/.gitignore b/.gitignore index 98bb947..ba27fb4 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,5 @@ /tools/ackermann.o /tools/venv /tools/*.pdf + +__pycache__ diff --git a/README.md b/README.md index 29d6345..be48290 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # synacor.py -My sort-of-OOP, ~~poorly-documented~~concise response to the Synacor challenge +My OOP, ~~poorly-documented~~ ~~concise~~ working response to the Synacor challenge ## Debug commands @@ -12,15 +12,15 @@ This will execute the file `.py` with `dbg_args[0]` set to `` and ` + ./synacor.py For example, to load the `dumps/init` state to skip the self-test and decryption, run: - ./synacor.py dbg_load dumps/init + ./synacor.py challenge.bin dbg/load dumps/init Dump files are stored in Python [pickle](https://docs.python.org/3/library/pickle.html) format, so if you want to inspect the memory in a hex editor, for example, it will be necessary to extract a raw memory dump: diff --git a/asm.py b/asm.py new file mode 100755 index 0000000..58e17df --- /dev/null +++ b/asm.py @@ -0,0 +1,67 @@ +#!/usr/bin/env python3 +# synacor.py - An implementation of the Synacor Challenge +# Copyright © 2017 RunasSudo +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +from libsynacor import * + +import argparse +import struct + +parser = argparse.ArgumentParser() +parser.add_argument('file', help='.asm file to read') +parser.add_argument('output', help='.bin file to write') +args = parser.parse_args() + +# First pass +labels = {} +SYN_MEM = [0] * 32768 +SYN_PTR = 0 + +with open(args.file, 'r') as source: + try: + while True: + instructions, inst_labels = assemble_next_instruction(source) + for label in inst_labels: + if label.startswith('$'): + labels[label[1:]] = SYN_PTR + if instructions is None: + break + for instruction in instructions: + code = instruction.assemble(None) + SYN_MEM[SYN_PTR:SYN_PTR+len(code)] = code + SYN_PTR += len(code) + except Exception as ex: + raise Exception('Error at line {}'.format(line_no)) from ex + +# Second pass +SYN_MEM = [0] * 32768 +SYN_PTR = 0 + +with open(args.file, 'r') as source: + try: + while True: + instructions, inst_labels = assemble_next_instruction(source) + if instructions is None: + break + for instruction in instructions: + code = instruction.assemble(labels) + SYN_MEM[SYN_PTR:SYN_PTR+len(code)] = code + SYN_PTR += len(code) + except Exception as ex: + raise Exception('Error at line {}'.format(line_no)) from ex + +with open(args.output, 'wb') as f: + f.write(struct.pack('<32768H', *SYN_MEM)) diff --git a/dbg_dump.py b/dbg/dump.py similarity index 86% rename from dbg_dump.py rename to dbg/dump.py index 89acbb2..0b562f7 100644 --- a/dbg_dump.py +++ b/dbg/dump.py @@ -1,5 +1,5 @@ # synacor.py - An implementation of the Synacor Challenge -# Copyright © 2016 RunasSudo +# Copyright © 2016–2017 RunasSudo # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as published by @@ -20,10 +20,10 @@ if len(dbg_args) < 2: print('Usage: .{} '.format(dbg_args[0])) else: model = { - 'SYN_PTR': SYN_PTR, - 'SYN_MEM': SYN_MEM, - 'SYN_REG': SYN_REG, - 'SYN_STK': SYN_STK + 'SYN_PTR': cpu.SYN_PTR, + 'SYN_MEM': cpu.SYN_MEM, + 'SYN_REG': cpu.SYN_REG, + 'SYN_STK': cpu.SYN_STK } with open(dbg_args[1], 'wb') as f: diff --git a/dbg/fastboot.py b/dbg/fastboot.py new file mode 100644 index 0000000..41c7750 --- /dev/null +++ b/dbg/fastboot.py @@ -0,0 +1,25 @@ +# synacor.py - An implementation of the Synacor Challenge +# Copyright © 2017 RunasSudo +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +# Emulate 06bb +for R1 in range(0x17b4, 0x7562): + R0 = cpu.SYN_MEM[R1] + R0 ^= pow(R1, 2, 32768) + R0 ^= 0x4154 + cpu.SYN_MEM[R1] = R0 + +# Jump past self-test +cpu.SYN_PTR = 0x0377 diff --git a/dbg_load.py b/dbg/load.py similarity index 84% rename from dbg_load.py rename to dbg/load.py index 0e46501..a2d4c8f 100644 --- a/dbg_load.py +++ b/dbg/load.py @@ -1,5 +1,5 @@ # synacor.py - An implementation of the Synacor Challenge -# Copyright © 2016 RunasSudo +# Copyright © 2016–2017 RunasSudo # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as published by @@ -22,7 +22,7 @@ else: with open(dbg_args[1], 'rb') as f: model = pickle.load(f) - SYN_PTR = model['SYN_PTR'] - SYN_MEM = model['SYN_MEM'] - SYN_REG = model['SYN_REG'] - SYN_STK = model['SYN_STK'] + cpu.SYN_PTR = model['SYN_PTR'] + cpu.SYN_MEM = model['SYN_MEM'] + cpu.SYN_REG = model['SYN_REG'] + cpu.SYN_STK = model['SYN_STK'] diff --git a/dbg_speedrun.py b/dbg/speedrun.py similarity index 86% rename from dbg_speedrun.py rename to dbg/speedrun.py index a1dccf9..652c5be 100644 --- a/dbg_speedrun.py +++ b/dbg/speedrun.py @@ -68,7 +68,7 @@ use corroded coin north take teleporter use teleporter -.dbg_teleporter +.dbg/teleporter use teleporter north north @@ -97,16 +97,5 @@ take mirror use mirror """ -# Read code into memory -SYN_PTR = 0 -with open('challenge.bin', 'rb') as data: - while True: - byteData = data.read(2) - if len(byteData) < 2: - break - SYN_MEM[SYN_PTR] = struct.unpack('. -# Set R8 to 6486 -SYN_REG[7] = 0x6486 +# Set R7 to 6486 +cpu.SYN_REG[7] = 0x6486 # Patch instructions 1571 to 1579 inclusive with nop's -SYN_MEM[0x1571:0x157a] = [21] * 9 +cpu.SYN_MEM[0x1571:0x157a] = [21] * 9 print('Patched. Ready to run "use teleporter".') diff --git a/disasm.py b/disasm.py new file mode 100755 index 0000000..cd21734 --- /dev/null +++ b/disasm.py @@ -0,0 +1,223 @@ +#!/usr/bin/env python3 +# synacor.py - An implementation of the Synacor Challenge +# Copyright © 2017 RunasSudo +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +from libsynacor import * + +import argparse + +parser = argparse.ArgumentParser() +parser.add_argument('file', help='.bin file containing the initial memory dump') +parser.add_argument('--hints', help='File(s) outlining additional jmp/call targets, label names, comments, etc', action='append') +parser.add_argument('--smart', help='Given a raw Synacor challenge file, disassemble in a Synacor-aware manner', action='store_true') +parser.add_argument('--aggressive-labels', help='Replace values with corresponding labels irrespective of where they appear', action='store_true') +args = parser.parse_args() + +with open(args.file, 'rb') as data: + SYN_MEM = memory_from_file(data) + disassemble_end = len(SYN_MEM) + +labels, comments_before, comments_inline, replacements = {}, {}, {}, {} + +# Do smart things if requested +if args.smart: + disassemble_end = 0x17b4 + # Emulate 06bb to decrypt data + for R1 in range(disassemble_end, 0x7562): + R0 = SYN_MEM[R1] + R0 ^= pow(R1, 2, 32768) + R0 ^= 0x4154 + SYN_MEM[R1] = R0 + +# Find things to label +SYN_PTR = 0 +while SYN_PTR < disassemble_end: + word = SYN_MEM[SYN_PTR] + if word in instructions_by_opcode: + instruction, SYN_PTR = Instruction.next_instruction(SYN_MEM, SYN_PTR) + if isinstance(instruction, InstructionJmp) or isinstance(instruction, InstructionJt) or isinstance(instruction, InstructionJf): + if isinstance(instruction, InstructionJmp): + op = instruction.args[0] + else: + op = instruction.args[1] + if isinstance(op, OpLiteral): + loc = op.get(None) + labels['label_{:04x}'.format(loc)] = loc + elif isinstance(instruction, InstructionCall): + if isinstance(instruction.args[0], OpLiteral): + loc = instruction.args[0].get(None) + labels['sub_{:04x}'.format(loc)] = loc + else: + SYN_PTR += 1 +# Read hints +if args.hints: + for hintfile in args.hints: + with open(hintfile, 'r') as data: + while True: + line = data.readline() + if line == '': + break + if line.startswith('jmp '): + loc = int(line.split()[1], 16) + labels['label_{:04x}'.format(loc)] = loc + elif line.startswith('call '): + loc = int(line.split()[1], 16) + labels['sub_{:04x}'.format(loc)] = loc + elif line.startswith('lbl '): + loc = int(line.split()[1], 16) + labels[line.split()[2]] = loc + elif line.startswith('ren '): + old_label = line.split()[1] + new_label = line.split()[2] + labels[new_label] = labels[old_label] + del labels[old_label] + elif line.startswith('cmb '): + loc = int(line.split()[1], 16) + comment = line[line.index(' ', line.index(' ') + 1) + 1:].strip() + if loc not in comments_before: + comments_before[loc] = [] + comments_before[loc].append(comment) + elif line.startswith('cmi '): + loc = int(line.split()[1], 16) + comment = line[line.index(' ', line.index(' ') + 1) + 1:].strip() + comments_inline[loc] = comment + elif line.startswith('rep '): + loc = int(line.split()[1], 16) + code = line[line.index(' ', line.index(' ') + 1) + 1:].strip() + instruction = assemble_line(None, code)[0][0] + replacements[loc] = instruction + else: + raise Exception('Invalid line in hint file: {}'.format(line)) + +MODE_OUT = False +MODE_DAT = False #False, 1 (data), 2 (text) + +SYN_PTR = 0 + +def set_mode_out(mode): + global MODE_OUT + if MODE_OUT == mode: + pass + elif mode == False: + # Switching off + print('"') + else: + # Switching on + print('{:04x}: out "'.format(SYN_PTR), end='') + MODE_OUT = mode +def set_mode_dat(mode): + global MODE_DAT + if MODE_DAT == mode: + pass + elif mode == False: + # Switching off + if MODE_DAT == 2: + print('"', end='') + print() + elif MODE_DAT == 1: + # Switching from data to text + print() + print('{:04x}: data "'.format(SYN_PTR), end='') + elif MODE_DAT == 2: + # Switching from text to data + print('"') + print('{:04x}: data'.format(SYN_PTR), end='') + else: + # Switching to a new mode + print('{:04x}: data'.format(SYN_PTR), end='') + if mode == 2: + print('"', end='') + MODE_DAT = mode +def clear_modes(): + set_mode_out(False) + set_mode_dat(False) + +while SYN_PTR < len(SYN_MEM): + # Handle comments + if SYN_PTR in comments_before: + clear_modes() + for comment in comments_before[SYN_PTR]: + print('; {}'.format(comment)) + if SYN_PTR in comments_inline: + comment_inline = ' ; {}'.format(comments_inline[SYN_PTR]) + else: + comment_inline = '' + + # Handle labels + if any(v == SYN_PTR for k, v in labels.items()): + clear_modes() + print('${}:'.format(next(k for k, v in labels.items() if v == SYN_PTR))) + + # Handle replacements + if SYN_PTR in replacements: + instruction = replacements[SYN_PTR] + print('{:04x}: {}{}'.format(SYN_PTR, instruction.describe(), comment_inline)) + SYN_PTR += len(instruction.assemble(None)) + continue + + word = SYN_MEM[SYN_PTR] + + if SYN_PTR >= disassemble_end or word not in instructions_by_opcode: + # Data + if 32 <= word <= 126: + # Looks like letters - unfortunately "\n" looks like MULT + set_mode_dat(2) + print(escape_char(chr(word)), end='') + if word == 0x0a: + clear_modes() # Break on newlines + else: + set_mode_dat(1) + print(' {:04x}'.format(word), end='') + SYN_PTR += 1 + else: + # Instruction + instruction, next_SYN_PTR = Instruction.next_instruction(SYN_MEM, SYN_PTR) + # Special cases + if isinstance(instruction, InstructionOut): + if isinstance(instruction.args[0], OpLiteral): + set_mode_out(True) + print(escape_char(chr(instruction.args[0].get(None))), end='') + if instruction.args[0].get(None) == 0x0a: + clear_modes() # Break on newlines + else: + clear_modes() + print('{:04x}: {}{}'.format(SYN_PTR, instruction.describe(), comment_inline)) + elif isinstance(instruction, InstructionJmp) or isinstance(instruction, InstructionJt) or isinstance(instruction, InstructionJf) or isinstance(instruction, InstructionCall): + clear_modes() + if isinstance(instruction, InstructionJmp) or isinstance(instruction, InstructionCall): + argidx = 0 + else: + argidx = 1 + # Aggressively replace labels if requested + for argnum in range(instruction.nargs) if args.aggressive_labels else [argidx]: + if isinstance(instruction.args[argnum], OpLiteral): + loc = instruction.args[argnum].get(None) + if any(v == loc for k, v in labels.items()): + label = next(k for k, v in labels.items() if v == loc) + instruction.args[argnum] = OpLabel(label) + print('{:04x}: {}{}'.format(SYN_PTR, instruction.describe(), comment_inline)) + else: + # Aggressively replace labels if requested + if args.aggressive_labels: + for argnum in range(instruction.nargs): + if isinstance(instruction.args[argnum], OpLiteral): + loc = instruction.args[argnum].get(None) + if any(v == loc for k, v in labels.items()): + label = next(k for k, v in labels.items() if v == loc) + instruction.args[argnum] = OpLabel(label) + clear_modes() + print('{:04x}: {}{}'.format(SYN_PTR, instruction.describe(), comment_inline)) + SYN_PTR = next_SYN_PTR diff --git a/disasm_hints/annotations.txt b/disasm_hints/annotations.txt new file mode 100644 index 0000000..2ba96aa --- /dev/null +++ b/disasm_hints/annotations.txt @@ -0,0 +1,122 @@ +cmb 0140 Test jmp +ren label_015b self_test_jmp1 +cmi 0142 jmp lands here if fails +cmi 0162 jmp from 0160 lands here if -2 +cmi 0164 jmp from 0160 lands here if -1 +cmi 0166 jmp from 0160 lands here if correct +cmi 0168 jmp from 0160 lands here if +1 +cmi 016a jmp from 0160 lands here if +2 +ren label_0166 self_test_jmp2 +ren label_0170 self_test_jmp_diagnose-2 +ren label_018d self_test_jmp_diagnose-1 +ren label_01a8 self_test_jmp_diagnose+1 +ren label_01c5 self_test_jmp_diagnose+2 +cmi 016e jmp from 0162 lands here +cmi 018c jmp from 0164 lands here +cmi 01a9 jmp from 0168 lands here +cmi 01c7 jmp from 016a lands here +cmb 01e4 Test jt/jf +cmb 01e4 ?? Does not test behaviour for values other than 0 and 1 +ren label_01e4 self_test_jtjf +ren label_0432 self_test_jtjf_fail +cmi 01e4 Jump to fail if 0 is true +cmi 01e7 Jump to fail if 1 is false +ren label_01ef self_test_jtjf2 +ren label_01f4 self_test_regzero +cmi 01ed Fail if 1 is not true +cmi 01f2 Fail if 0 is not false +cmb 01f4 Test that all registers are zero +cmb 01f4 ?? Because this uses jt/jf, errors may not be detected if involving values other than 0 and 1 +ren label_0445 self_test_regzero_fail +cmb 020c Test set +ren label_045e self_test_set_fail +cmb 0218 Test add +cmb 0218 ?? This only tests if 1 + 1 != 0, and will fail to detect almost all other errors +cmi 021c Dodgy! +ren label_0234 self_test_add +cmb 0234 Test add +cmb 0234 ?? This reuses the result from the add test, so will erroneously report an eq failure (instead of an add failure) if 1 + 1 != 2 +cmb 0234 It would probably have been a better idea to test eq first, then use that to verify add +cmi 0238 Dodgy! +ren label_024e self_test_pushpop +cmb 024e Test push/pop by exchanging R0 and R1 +cmb 024e ?? Because R1 is reused, the test will erroneously report a push/pop failure (instead of an eq failure) if eq returns any other truthy value in the previous test +cmi 025d Dodgy! +ren label_0486 self_test_pushpop_fail +cmb 0264 Test gt +ren label_0473 self_test_gt_fail +cmb 0279 Test and +ren label_0499 self_test_and_fail +cmb 0284 Test or +cmb 02ac Test not +ren label_02ac self_test_not +ren label_04b8 self_test_not_fail +cmb 02c0 Test call +ren sub_0505 self_test_call_subroutine1 +lbl 02c2 self_test_call_returnloc1 +ren label_0509 self_test_call_fail +ren label_02c4 self_test_call_check_stack1 +cmi 02c6 This test is strange. If R0 == 02c2 as tested below, R0 != 02c4 is guaranteed +cmb 02d4 Test call register value +ren sub_0507 self_test_call_subroutine2 +lbl 02d9 self_test_call_returnloc2 +ren label_02db self_test_call_check_stack2 +cmb 02eb Test add overflow +ren label_0520 self_test_overflow_fail +cmi 0304 Just to be safe! +cmb 030b Test mult +cmi 030f Test for HHGTTG (non-)compatibility +ren label_0565 self_test_hhgttg_fail +ren label_0586 self_test_mult_fail +cmb 0328 Test mod +ren label_059d self_test_mod_fail +ren label_034d self_test_rwmem +lbl 034b self_test_rwmem_data +cmb 034b Test rmem/wmem +ren label_04d7 self_test_rmem_fail +ren label_04ee self_test_wmem_fail +ren sub_06bb decrypt_data +cmb 0375 Sneaky! +cmb 0375 call $decrypt_data +rep 0375 nop +rep 0376 nop +lbl 17b4 encrypted_data +ren label_03ad self_test_wmem_cmd_fail +rep 03ab out $self_test_complete +cmi 03a9 Becomes noop; jt 0013 $self_test_complete +lbl 03d2 self_test_complete +lbl 17c0 str_self_test_result +cmi 03d6 The "F" +lbl 17e4 str_complete +ren label_03e8 self_test_loop_copy_complete +cmb 03e8 Copy the "complete" string over the "FAILED!!" substring of $str_self_test_result +ren label_03ff self_test_concat_all_pass +cmb 03ff Extend $str_self_test_result to include $str_self_test_all_pass +lbl 17d3 str_self_test_all_pass +cmi 0410 ASCII comma +cmi 0428 Print self-test completion string +cmi 0430 Jump to game loop +ren label_0aae after_self_test +ren sub_05b2 loop_string +cmb 05b2 % Loops over a string and calls a callback for each character +cmb 05b2 % @param R0 Pointer to the string to loop over +cmb 05b2 % @param R1 Pointer to the callback to call with the character in R0 +ren label_05c8 loop_string_loop_chars +ren label_05e3 loop_string_loop_chars_done +ren sub_05ee print_string +cmb 05ee % Prints a string to output +cmb 05ee % @param R0 Pointer to the string to print +ren sub_05f8 print_char +cmb 05f8 % Prints a single character to output +cmb 05f8 % Most useful as a callback to $loop_string +cmb 05f8 % @param R0 The character to print +ren sub_05fb print_char_xored +cmb 05fb % Prints a single character to output after XORing with a fixed value +cmb 05fb % Most useful as a callback to $loop_string +cmb 05fb % @param R0 The input character +cmb 05fb % @param R2 The other XOR operand +ren sub_084d xor +cmb 084d % Calculates the exclusive OR (XOR) of two values +cmb 084d % @param R0 The first XOR operand +cmb 084d % @param R1 The second XOR operand +cmb 084d % @return R0 The value R0 XOR R1 diff --git a/disasm_hints/jmpcall.txt b/disasm_hints/jmpcall.txt new file mode 100644 index 0000000..72d2f23 --- /dev/null +++ b/disasm_hints/jmpcall.txt @@ -0,0 +1,95 @@ +call 0505 +call 0507 +call 05b2 +call 05ee +call 05f8 +call 05fb +call 0607 +call 0623 +call 0634 +call 0645 +call 0653 +call 0670 +call 0683 +call 06bb +call 06e7 +call 0731 +call 07d1 +call 084d +call 0865 +call 086e +call 0882 +call 08c8 +call 08e9 +call 0b94 +call 0cad +call 0d48 +call 0df0 +call 0e48 +call 0e8f +call 0e9e +call 0ec0 +call 0eca +call 0ede +call 0f35 +call 0f98 +call 0fa9 +call 0fcd +call 0fde +call 1000 +call 1011 +call 1022 +call 1033 +call 1044 +call 107a +call 10b7 +call 1135 +call 11a3 +call 11b5 +call 1203 +call 1252 +call 1270 +call 12bf +call 1315 +call 1371 +call 14f0 +call 1501 +call 1512 +call 1523 +call 1534 +call 1545 +call 1659 +call 16b6 +call 16bf +call 16d6 +call 16f4 +call 1705 +call 1721 +call 174c +call 1766 +jmp 015b +jmp 0166 +jmp 01e4 +jmp 02c4 +jmp 02db +jmp 034d +jmp 03e8 +jmp 061e +jmp 06b2 +jmp 06fb +jmp 07e3 +jmp 07ff +jmp 0833 +jmp 08bc +jmp 08cc +jmp 08f8 +jmp 0aae +jmp 0b86 +jmp 0c79 +jmp 0cfe +jmp 0d99 +jmp 0e43 +jmp 112e +jmp 1312 +jmp 1652 +jmp 1747 diff --git a/electric_boogaloo.md b/electric_boogaloo.md index 817a3e9..057daae 100644 --- a/electric_boogaloo.md +++ b/electric_boogaloo.md @@ -20,7 +20,7 @@ Hex values refer to the instruction lines, not the actual ranges spanned by the * Surprisingly, this is where it is checked that 1 + 1 = 2, but if `add` gives an incorrect non-zero result for 1 + 1, the test will erroneously report that it is `eq` which is not supported! * It would probably have been a better idea to test `eq` first, before `add`, then use `eq` to test `add`. * `024e` to `0261`: Tests `push` and `pop` - * Since only `R1` and `R2` are checked, this would not detect errors involving other registers. + * Since only `R0` and `R1` are checked, this would not detect errors involving other registers. * This test, like the last one, reuses the results of previous tests, since that worked out so well for `eq`… * `0264` to `0276`: Tests `gt` * The tests performed seem quite reasonable, but yet again reuse the results of previous tests… @@ -29,7 +29,7 @@ Hex values refer to the instruction lines, not the actual ranges spanned by the * `02ac` to `02bd`: Tests `not` * Okay, I admit this one was pretty helpful. What the hell is a ‘15-bit bitwise inverse’? Well the test passes if I do just do mod 32768, so that works I guess… * `02c0` to `02e8`: Tests `call` - * Although, notably, not `ret`. The tests operates by `jmp`ing back and forth to test the various values of `R1`. + * Although, notably, not `ret`. The tests operates by `jmp`ing back and forth to test the various values of `R0`. * `02eb` to `0308`: Checks that `add` overflows correctly * `030b` to `0313`: Checks that 6 × 9 ≠ 42. * I suspect there is a mistake in this test. Since Adams (1979) demonstrated unequivocally that 6 × 9 is equal to 42, I believe the `jt` should in fact be a `jf`. @@ -50,36 +50,36 @@ After a wild goose chase examining the code after the self-test, we find that th This is the magic line. Digging into the `06bb` subroutine: - 06bb push R1 - 06bd push R2 - 06bf set R2 17b4 - 06c2 rmem R1 R2 - 06c5 push R2 - 06c7 mult R2 R2 R2 + 06bb push R0 + 06bd push R1 + 06bf set R1 17b4 + 06c2 rmem R0 R1 + 06c5 push R1 + 06c7 mult R1 R1 R1 06cb call 084d - 06cd set R2 4154 + 06cd set R1 4154 06d0 call 084d - 06d2 pop R2 - 06d4 wmem R2 R1 - 06d7 add R2 R2 0001 - 06db eq R1 7562 R2 - 06df jf R1 06c2 - 06e2 pop R2 - 06e4 pop R1 + 06d2 pop R1 + 06d4 wmem R1 R0 + 06d7 add R1 R1 0001 + 06db eq R0 7562 R1 + 06df jf R0 06c2 + 06e2 pop R1 + 06e4 pop R0 06e6 ret -Inspecting the `084d` subroutine reveals that this is simply an XOR function: `R1 XOR R2`. Crypto rating: 1/10 +Inspecting the `084d` subroutine reveals that this is simply an XOR function: `R0 XOR R1`. Crypto rating: 1/10 Rewriting `06bb` function using higher-level syntax reveals that the ‘encryption’ algorithm is really very simple: ```c 06bb() { - R2 = 17b4; - for (R2 = 17b4; R2 != 7562; R2++) { - R1 = [R2]; - R1 ^= R2 * R2; - R1 ^= 4154; - [R2] = R1; + R1 = 17b4; + for (R1 = 17b4; R1 != 7562; R1++) { + R0 = [R1]; + R0 ^= R1 * R1; + R0 ^= 4154; + [R1] = R0; } } ``` @@ -94,19 +94,19 @@ So earlier, we produced a tool-assisted speed-run that would complete and dump t Looking through the code following the self-test, we find: - 0413 set R1 17c0 + 0413 set R0 17c0 0416 call 05ee -Digging deeper, `05ee` calls `05b2` with `R2` set to `05f8`. `05b2` appears to iterate over the characters in a string whose length is stored in address `R1`, and calls `R2` for each character, storing the character in `R1`. `05f8` (the callback provided by `05ee`) simply outputs every character in `R1` it gets. +Digging deeper, `05ee` calls `05b2` with `R1` set to `05f8`. `05b2` appears to iterate over the characters in a string whose length is stored in address `R0`, and calls `R1` for each character, storing the character in `R0`. `05f8` (the callback provided by `05ee`) simply outputs every character in `R0` it gets. Immediately after this call to `05ee`, we find: - 041e set R1 68e3 - 0421 set R2 05fb - 0424 add R3 XXXX XXXX + 041e set R0 68e3 + 0421 set R1 05fb + 0424 add R2 XXXX XXXX 0428 call 05b2 -In other words, a similar string-handling subroutine is called, but instead of `05f8` (which would simply print the string), `05fb` is called. `05fb` also outputs the character, but only after calling `084d` (XOR) with `R2` set to `R3`. +In other words, a similar string-handling subroutine is called, but instead of `05f8` (which would simply print the string), `05fb` is called. `05fb` also outputs the character, but only after calling `084d` (XOR) with `R1` set to `R2`. Now we have everything we need to [extract these encrypted (double-encrypted??) strings](https://github.com/RunasSudo/synacor.py/blob/master/tools/decrypt_strings.py) from the binary! @@ -116,67 +116,67 @@ Only the self-test completion code appears to be stored there, though, so I'm no We may not have the codes themselves, but we can now easily locate where they are printed. The tablet code, for example, is conveniently sandwiched between strings `6ed1` and `6ef1`. Thus the code we are looking for is: - 1290 set R1 1092 - 1293 set R2 650a - 1296 set R3 7fff - 1299 set R4 6eed + 1290 set R0 1092 + 1293 set R1 650a + 1296 set R2 7fff + 1299 set R3 6eed 129c call 0731 Looking into `0731`: - 0731 push R4 - 0733 push R5 - 0735 push R6 - 0737 push R7 - 0739 set R7 0001 - 073c add R5 R4 R7 - 0740 rmem R5 R5 - 0743 add R6 17ed R7 - 0747 wmem R6 R5 - 074a add R7 R7 0001 - 074e rmem R6 17ed - 0751 gt R5 R7 R6 - 0755 jf R5 073c - 0758 set R4 0000 - 075b set R5 0000 - 075e rmem R6 17ed - 0761 mod R6 R5 R6 - 0765 add R6 R6 17ed - 0769 add R6 R6 0001 - 076d rmem R7 R6 - 0770 mult R7 R7 1481 - 0774 add R7 R7 3039 - 0778 wmem R6 R7 - 077b push R1 - 077d push R2 - 077f set R2 R7 + 0731 push R3 + 0733 push R4 + 0735 push R5 + 0737 push R6 + 0739 set R6 0001 + 073c add R4 R3 R6 + 0740 rmem R4 R4 + 0743 add R5 17ed R6 + 0747 wmem R5 R4 + 074a add R6 R6 0001 + 074e rmem R5 17ed + 0751 gt R4 R6 R5 + 0755 jf R4 073c + 0758 set R3 0000 + 075b set R4 0000 + 075e rmem R5 17ed + 0761 mod R5 R4 R5 + 0765 add R5 R5 17ed + 0769 add R5 R5 0001 + 076d rmem R6 R5 + 0770 mult R6 R6 1481 + 0774 add R6 R6 3039 + 0778 wmem R5 R6 + 077b push R0 + 077d push R1 + 077f set R1 R6 0782 call 084d - 0784 set R7 R1 - 0787 pop R2 - 0789 pop R1 - 078b rmem R6 R2 - 078e mod R7 R7 R6 - 0792 add R7 R7 0001 - 0796 gt R6 R7 R3 - 079a jt R6 07a0 - 079d set R4 0001 - 07a0 add R7 R7 R2 - 07a4 rmem R7 R7 - 07a7 add R5 R5 0001 - 07ab add R6 R5 17f1 - 07af wmem R6 R7 - 07b2 rmem R6 17f1 - 07b5 eq R6 R5 R6 - 07b9 jf R6 075e - 07bc jf R4 0758 - 07bf push R1 - 07c1 set R1 17f1 + 0784 set R6 R0 + 0787 pop R1 + 0789 pop R0 + 078b rmem R5 R1 + 078e mod R6 R6 R5 + 0792 add R6 R6 0001 + 0796 gt R5 R6 R2 + 079a jt R5 07a0 + 079d set R3 0001 + 07a0 add R6 R6 R1 + 07a4 rmem R6 R6 + 07a7 add R4 R4 0001 + 07ab add R5 R4 17f1 + 07af wmem R5 R6 + 07b2 rmem R5 17f1 + 07b5 eq R5 R4 R5 + 07b9 jf R5 075e + 07bc jf R3 0758 + 07bf push R0 + 07c1 set R0 17f1 07c4 call 05ee - 07c6 pop R1 - 07c8 pop R7 - 07ca pop R6 - 07cc pop R5 - 07ce pop R4 + 07c6 pop R0 + 07c8 pop R6 + 07ca pop R5 + 07cc pop R4 + 07ce pop R3 07d0 ret Umm… Sorry, could you repeat that? @@ -184,59 +184,59 @@ Umm… Sorry, could you repeat that? Rewriting this again in more friendly terms: ```c -// R1: A seed of sorts - the same for all users -// R2: The length and alphabet to use, usually 650a, but 653f for the mirror -// R3: Usually 7fff, but 0004 for the mirror -// R4: An initialisation vector of sorts - contents different for every user - points to the length, but this is always 3 -0731(R1, R2, R3, R4) { - // copy the string at R4 to 17ed - R7 = 0001; +// R0: A seed of sorts - the same for all users +// R1: The length and alphabet to use, usually 650a, but 653f for the mirror +// R2: Usually 7fff, but 0004 for the mirror +// R3: An initialisation vector of sorts - contents different for every user - points to the length, but this is always 3 +0731(R0, R1, R2, R3) { + // copy the string at R3 to 17ed + R6 = 0001; do { - R5 = R4 + R7; - R5 = [R5]; - R6 = 17ed + R7; - [R6] = R5; - R7 = R7 + 0001; - R6 = [17ed]; // 3, the length - this never seems to change - } while (R7 <= R6); + R4 = R3 + R6; + R4 = [R4]; + R5 = 17ed + R6; + [R5] = R4; + R6 = R6 + 0001; + R5 = [17ed]; // 3, the length - this never seems to change + } while (R6 <= R5); - // the string at 17ed is now what was at R4 + // the string at 17ed is now what was at R3 do { - R4 = 0000; // done flag - R5 = 0000; // index + R3 = 0000; // done flag + R4 = 0000; // index do { - R6 = [17ed]; // 3, the length - R6 = R5 % R6; - R6 = R6 + 17ed; - R6 = R6 + 0001; // will cycle through three addresses of 17ed/R4 string: 17ee, 17ef, 17f0 - R7 = [R6]; - R7 = R7 * 1481; - R7 = R7 + 3039; - [R6] = R7; // mutate that value of the 17ed string - R7 = R1 ^ R7; // combine R1 into R7 - R6 = [R2]; // length of the alphabet - R7 = R7 % R6; - R7 = R7 + 0001; // calculate index in alphabet - if (R7 <= R3) { - R4 = 0001; // we are done with the entire code - this returns immediately for all except the mirror + R5 = [17ed]; // 3, the length + R5 = R4 % R5; + R5 = R5 + 17ed; + R5 = R5 + 0001; // will cycle through three addresses of 17ed/R3 string: 17ee, 17ef, 17f0 + R6 = [R5]; + R6 = R6 * 1481; + R6 = R6 + 3039; + [R5] = R6; // mutate that value of the 17ed string + R6 = R0 ^ R6; // combine R0 into R6 + R5 = [R1]; // length of the alphabet + R6 = R6 % R5; + R6 = R6 + 0001; // calculate index in alphabet + if (R6 <= R2) { + R3 = 0001; // we are done with the entire code - this returns immediately for all except the mirror } - R7 = R7 + R2; // calculate address of letter to use - R7 = [R7]; // the letter to use - R5 = R5 + 0001; // increment the index - R6 = R5 + 17f1; // index of new letter in code - [R6] = R7; // set the letter - R6 = [17f1]; // length of the code: twelve letters - } while (R5 != R6); // loop until we've generated all the letters - } while (!R4); + R6 = R6 + R1; // calculate address of letter to use + R6 = [R6]; // the letter to use + R4 = R4 + 0001; // increment the index + R5 = R4 + 17f1; // index of new letter in code + [R5] = R6; // set the letter + R5 = [17f1]; // length of the code: twelve letters + } while (R4 != R5); // loop until we've generated all the letters + } while (!R3); print(17f1); } ``` Re-implementing this in Python, we can now extract the code for the tablet directly from the binary! -Unfortunately, each of the other codes uses an `R1` based on the solution to the puzzle. In the case of the maze code: +Unfortunately, each of the other codes uses an `R0` based on the solution to the puzzle. In the case of the maze code: - 0f03 rmem R1 0e8e + 0f03 rmem R0 0e8e The value at `0e8e` is derived from which rooms are visited in the maze, as mentioned in the main note file. Armed with our [trusty map](https://github.com/RunasSudo/synacor.py/blob/master/tools/graph.py), and cross-referencing the callbacks with the files, we identify: @@ -247,37 +247,37 @@ The value at `0e8e` is derived from which rooms are visited in the maze, as ment Putting it all together, the final value at `0e8e` is `0008 | 0010 | 0040` = `0058`. -Similarly, the `R1` for the teleporter code is the value of `R8` from the Ackermann function, which we determined earlier to be `6486`: +Similarly, the `R0` for the teleporter code is the value of `R7` from the Ackermann function, which we determined earlier to be `6486`: - 1592 set R1 R8 + 1592 set R0 R7 -The remaining two codes, for the coins and the vault, are more complicated still, but follow the same pattern of determining `R1` based on the player's history. +The remaining two codes, for the coins and the vault, are more complicated still, but follow the same pattern of determining `R0` based on the player's history. -For the coins, the call to `0731` derives an `R1` from the values at memory locations `69de` to `69e2`. One would presume that this relates to the order in which coins are inserted into the puzzle, and following the trail of callbacks for the coins confirms this. The important lines are: +For the coins, the call to `0731` derives an `R0` from the values at memory locations `69de` to `69e2`. One would presume that this relates to the order in which coins are inserted into the puzzle, and following the trail of callbacks for the coins confirms this. The important lines are: - 13c0 rmem R3 099e # nr of coins inserted - 13c3 add R3 R3 69dd - 13c7 add R3 R3 0001 - 13cb wmem R3 R2 # R2 is the number of dots on the coin + 13c0 rmem R2 099e # nr of coins inserted + 13c3 add R2 R2 69dd + 13c7 add R2 R2 0001 + 13cb wmem R2 R1 # R1 is the number of dots on the coin Thus the values should be 9, 2, 5, 7 and 3: the missing numbers in the solved equation. -It is now trivial to generate the correct value of `R1`. Based on the code beginning, `15fd`: +It is now trivial to generate the correct value of `R0`. Based on the code beginning, `15fd`: ```python data_69de = [9, 2, 5, 7, 3] -R1 = 0 +R0 = 0 for i in range(len(data_69de)): - R2 = data_69de[i] - R1 = R1 + R2 - R1 = (R1 * 0x7bac) % 0x8000 - R1 = R1 ^ R2 + R1 = data_69de[i] + R0 = R0 + R1 + R0 = (R0 * 0x7bac) % 0x8000 + R0 = R0 ^ R1 ``` The result is `0b3b`. -On to the final code, now: the vault lock. At `1679`, `R1` is computed as `([0f73] ^ [0f74]) ^ [0f75]`. Searching the code for references to these, we find: +On to the final code, now: the vault lock. At `1679`, `R0` is computed as `([0f73] ^ [0f74]) ^ [0f75]`. Searching the code for references to these, we find: 1236 wmem 0f70 0016 // weight 1239 wmem 0f71 0000 // counter @@ -297,19 +297,19 @@ data_0f74 = 0 data_0f75 = 0 # 08c8 -def rotatey_thing(R1, R2): - while R2 != 0: - R2 = (R2 + 0x7fff) % 0x8000 - R3 = R1 & 0x4000 - R1 = (R1 * 0x0002) % 0x8000 - if R3 == 0: +def rotatey_thing(R0, R1): + while R1 != 0: + R1 = (R1 + 0x7fff) % 0x8000 + R2 = R0 & 0x4000 + R0 = (R0 * 0x0002) % 0x8000 + if R2 == 0: continue - R1 = R1 | 0x0001 - return R1 + R0 = R0 | 0x0001 + return R0 # 11a3 -def mutate(R1val, R2, R3): - return rotatey_thing(R1val, R2) ^ R3 +def mutate(R0val, R1, R2): + return rotatey_thing(R0val, R1) ^ R2 # 1135 def update(value, room): diff --git a/dbg_pdb.py b/libsynacor/__init__.py similarity index 89% rename from dbg_pdb.py rename to libsynacor/__init__.py index aa9dc03..08d899e 100644 --- a/dbg_pdb.py +++ b/libsynacor/__init__.py @@ -14,4 +14,7 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . -DBG_FLAG = True +from .binfile import * +from .bytecode import * +from .assembly import * +from .cpu import * diff --git a/libsynacor/assembly.py b/libsynacor/assembly.py new file mode 100644 index 0000000..d58651e --- /dev/null +++ b/libsynacor/assembly.py @@ -0,0 +1,129 @@ +# synacor.py - An implementation of the Synacor Challenge +# Copyright © 2017 RunasSudo +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +from libsynacor import * + +def escape_char(char): + return char.encode('unicode_escape').decode('utf-8').replace('"', '\\"') + +def unescape_char(char): + return char.encode('utf-8').decode('unicode_escape') + +def split_line(line): + tokens = [] + token = '' + idx = 0 + + in_comment = False + in_string = False + in_escape = False + + while idx < len(line): + if in_comment: + pass + elif in_string: + if in_escape: + token += line[idx] + else: + if line[idx] == '\\': + in_escape = True + token += line[idx] + elif line[idx] == '"': + in_string = False + token += line[idx] + else: + token += line[idx] + else: + if line[idx] == ' ': + if token != '': + tokens.append(token) + token = '' + elif line[idx] == '"': + in_string = True + token += line[idx] + elif line[idx] == ';': + in_comment = True + else: + token += line[idx] + idx += 1 + # Final token + if token != '': + tokens.append(token) + + return tokens + +line_no = 0 + +def assemble_next_instruction(source): + line = source.readline() + global line_no; line_no += 1 + if line == '': + return None, [] + + return assemble_line(source, line) + +def assemble_line(source, line): + tokens = split_line(line.strip()) + return assemble_instruction(source, tokens) + +def assemble_instruction(source, tokens): + if len(tokens) == 0: + return assemble_next_instruction(source) + if tokens[0].endswith(':'): + # Label + label = tokens[0][:-1] + instructions, inst_labels = assemble_instruction(source, tokens[1:]) + return instructions, inst_labels + [label] + else: + # Instruction + name = tokens[0] + if name not in instructions_by_name: + raise Exception('Unknown instruction {}'.format(name)) + instruction = instructions_by_name[name]() + + # Special cases + if isinstance(instruction, InstructionOut) and tokens[1].startswith('"'): + chars = unescape_char(tokens[1][1:-1]) + instructions = [] + for char in chars: + instruction = InstructionOut() + instruction.args.append(OpLiteral(ord(char))) + instructions.append(instruction) + return instructions, [] + elif isinstance(instruction, InstructionData): + if tokens[1].startswith('"'): + chars = unescape_char(tokens[1][1:-1]) + instruction.args = [ord(char) for char in chars] + return [instruction], [] + else: + instruction.args = [int(x, 16) for x in tokens[1:]] + return [instruction], [] + else: + if len(tokens) != instruction.nargs + 1: + raise Exception('Invalid number of arguments: Expected {}, got {}'.format(instruction.nargs, len(tokens) - 1)) + for i in range(instruction.nargs): + argstr = tokens[i + 1] + if argstr.startswith('R'): + # Register + arg = OpRegister(int(argstr[1:])) + elif argstr.startswith('$'): + # Label + arg = OpLabel(argstr[1:]) + else: + # Hex literal + arg = OpLiteral(int(argstr, 16)) + instruction.args.append(arg) + return [instruction], [] diff --git a/dbg_fastboot.py b/libsynacor/binfile.py similarity index 72% rename from dbg_fastboot.py rename to libsynacor/binfile.py index 69d716a..941f122 100644 --- a/dbg_fastboot.py +++ b/libsynacor/binfile.py @@ -14,26 +14,14 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . -import pickle - -# Read code into memory -SYN_MEM = [0] * 32768 - -with open('challenge.bin', 'rb') as data: - i = 0 +def memory_from_file(data): + import struct + SYN_MEM = [0] * 32768 + SYN_PTR = 0 while True: byteData = data.read(2) if len(byteData) < 2: break - SYN_MEM[i] = struct.unpack('. + +class Operand: + @staticmethod + def read_op(word): + if 0 <= word <= 32767: + return OpLiteral(word) + if 32768 <= word <= 32775: + return OpRegister(word - 32768) + raise Exception('Invalid word {}') + +class OpLiteral(Operand): + def __init__(self, value): + self.value = value + def get(self, cpu): + return self.value + def set(self, cpu, value): + raise Exception('Attempted to set literal value {} to {} at {}'.format(self.value, value, cpu.SYN_PTR)) + + def describe(self): + return '{:04x}'.format(self.value) + def assemble(self, labels): + return self.value + +class OpRegister(Operand): + def __init__(self, register): + self.register = register + def get(self, cpu): + return cpu.SYN_REG[self.register] + def set(self, cpu, value): + cpu.SYN_REG[self.register] = value + + def describe(self): + return 'R{}'.format(self.register) + def assemble(self, labels): + return self.register + 32768 + +# Used only in dis/assembling process +class OpLabel(Operand): + def __init__(self, label): + self.label = label + + def describe(self): + return '${}'.format(self.label) + def assemble(self, labels): + if labels is None: + # First pass + return 0xffff + if self.label not in labels: + raise Exception('Unknown label {}'.format(self.label)) + return OpLiteral(labels[self.label]).assemble(labels) + +instructions_by_opcode = {} +instructions_by_name = {} + +def instruction(opcode, name, nargs=0): + def decorator(cls): + cls.opcode = opcode + cls.name = name + cls.nargs = nargs + + instructions_by_opcode[opcode] = cls + instructions_by_name[name] = cls + + return cls + return decorator + +class Instruction: + def __init__(self): + self.args = [] + + def describe(self): + description = '{: <4}'.format(self.name) + for i in range(self.nargs): + description += ' {}'.format(self.args[i].describe()) + return description.strip() + + def assemble(self, labels): + return [self.opcode] + [self.args[i].assemble(labels) for i in range(self.nargs)] + + @staticmethod + def next_instruction(data, idx): + opcode = Operand.read_op(data[idx]) + idx += 1 + + if not isinstance(opcode, OpLiteral): + raise Exception('Invalid value for opcode {} at {}'.format(data[idx], idx)) + opcode_value = opcode.get(None) + if opcode_value not in instructions_by_opcode: + raise Exception('Invalid opcode {} at {}'.format(opcode_value, idx)) + + instruction = instructions_by_opcode[opcode_value]() + for _ in range(instruction.nargs): + instruction.args.append(Operand.read_op(data[idx])) + idx += 1 + + return instruction, idx + +@instruction(21, 'nop') +class InstructionNop(Instruction): + def run(self, cpu): + pass + +@instruction(0, 'halt') +class InstructionHalt(Instruction): + def run(self, cpu): + import sys + sys.exit(0) + +@instruction(1, 'set', 2) +class InstructionSet(Instruction): + def run(self, cpu): + self.args[0].set(cpu, self.args[1].get(cpu)) + +@instruction(2, 'push', 1) +class InstructionPush(Instruction): + def run(self, cpu): + cpu.SYN_STK.append(self.args[0].get(cpu)) + +@instruction(3, 'pop', 1) +class InstructionPop(Instruction): + def run(self, cpu): + if len(cpu.SYN_STK) == 0: + raise Exception('Attempted to pop from empty stack at {}'.format(cpu.SYN_PTR)) + self.args[0].set(cpu, cpu.SYN_STK.pop()) + +@instruction(4, 'eq', 3) +class InstructionEq(Instruction): + def run(self, cpu): + self.args[0].set(cpu, 1 if self.args[1].get(cpu) == self.args[2].get(cpu) else 0) + +@instruction(5, 'gt', 3) +class InstructionGt(Instruction): + def run(self, cpu): + self.args[0].set(cpu, 1 if self.args[1].get(cpu) > self.args[2].get(cpu) else 0) + +@instruction(6, 'jmp', 1) +class InstructionJmp(Instruction): + def run(self, cpu): + cpu.SYN_PTR = self.args[0].get(cpu) + +@instruction(7, 'jt', 2) +class InstructionJt(Instruction): + def run(self, cpu): + if self.args[0].get(cpu) != 0: + cpu.SYN_PTR = self.args[1].get(cpu) + +@instruction(8, 'jf', 2) +class InstructionJf(Instruction): + def run(self, cpu): + if self.args[0].get(cpu) == 0: + cpu.SYN_PTR = self.args[1].get(cpu) + +@instruction(9, 'add', 3) +class InstructionAdd(Instruction): + def run(self, cpu): + self.args[0].set(cpu, (self.args[1].get(cpu) + self.args[2].get(cpu)) % 32768) + +@instruction(10, 'mult', 3) +class InstructionMult(Instruction): + def run(self, cpu): + self.args[0].set(cpu, (self.args[1].get(cpu) * self.args[2].get(cpu)) % 32768) + +@instruction(11, 'mod', 3) +class InstructionMod(Instruction): + def run(self, cpu): + self.args[0].set(cpu, self.args[1].get(cpu) % self.args[2].get(cpu)) + +@instruction(12, 'and', 3) +class InstructionAnd(Instruction): + def run(self, cpu): + self.args[0].set(cpu, self.args[1].get(cpu) & self.args[2].get(cpu)) + +@instruction(13, 'or', 3) +class InstructionOr(Instruction): + def run(self, cpu): + self.args[0].set(cpu, self.args[1].get(cpu) | self.args[2].get(cpu)) + +@instruction(14, 'not', 2) +class InstructionNot(Instruction): + def run(self, cpu): + self.args[0].set(cpu, ~self.args[1].get(cpu) % 32768) + +@instruction(15, 'rmem', 2) +class InstructionRmem(Instruction): + def run(self, cpu): + self.args[0].set(cpu, cpu.SYN_MEM[self.args[1].get(cpu)]) + +@instruction(16, 'wmem', 2) +class InstructionWmem(Instruction): + def run(self, cpu): + cpu.SYN_MEM[self.args[0].get(cpu)] = self.args[1].get(cpu) + +@instruction(17, 'call', 1) +class InstructionCall(Instruction): + def run(self, cpu): + cpu.SYN_STK.append(cpu.SYN_PTR) + cpu.SYN_PTR = self.args[0].get(cpu) + +@instruction(18, 'ret') +class InstructionRet(Instruction): + def run(self, cpu): + if len(cpu.SYN_STK) == 0: + raise Exception('Attempted to return with empty stack at {}'.format(cpu.SYN_PTR)) + cpu.SYN_PTR = cpu.SYN_STK.pop() + +@instruction(19, 'out', 1) +class InstructionOut(Instruction): + def run(self, cpu): + print(chr(self.args[0].get(cpu)), end='') + +@instruction(20, 'in', 1) +class InstructionIn(Instruction): + def run(self, cpu): + import sys + + while len(cpu.SYN_STDIN_BUF) == 0: + line = sys.stdin.readline() + if line.startswith('.'): + # Debug command + dbg_args = line[1:].split() + with open(dbg_args[0] + '.py', 'r') as f: + exec(f.read(), globals(), locals()) + else: + cpu.SYN_STDIN_BUF = list(line) + + self.args[0].set(cpu, ord(cpu.SYN_STDIN_BUF.pop(0))) + +# Not actually an instruction, but convenient to think of it as one for the purposes of assembling +# self.args is an array of literal values, rather than Operands +class InstructionData(Instruction): + def assemble(self, labels): + return self.args +instructions_by_name['data'] = InstructionData diff --git a/libsynacor/cpu.py b/libsynacor/cpu.py new file mode 100644 index 0000000..b23c809 --- /dev/null +++ b/libsynacor/cpu.py @@ -0,0 +1,33 @@ +# synacor.py - An implementation of the Synacor Challenge +# Copyright © 2017 RunasSudo +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +from libsynacor import * + +class CPU: + def __init__(self): + self.SYN_PTR = 0 + self.SYN_MEM = [0] * 32768 + self.SYN_REG = [0] * 8 + self.SYN_STK = [] + self.SYN_STDIN_BUF = [] + + def swallow_op(self): + op = Operand.read_op(self.SYN_MEM[self.SYN_PTR]) + self.SYN_PTR += 1 + + def step(self): + instruction, self.SYN_PTR = Instruction.next_instruction(self.SYN_MEM, self.SYN_PTR) + instruction.run(self) diff --git a/notes.md b/notes.md index 6e1fada..e220247 100644 --- a/notes.md +++ b/notes.md @@ -5,8 +5,6 @@ Note 1: Codes seem to be unique for every user. Note 2: Numeric values in `monospace` font are in hexadecimal. -Note 3: For some reason, I decided to number the registers `R1` through `R8`, contrary to the spec… - ## The programming codes ### Code 1 @@ -90,7 +88,7 @@ Proceed to the `north` door and `use` the `teleporter` to obtain the code: ## The true-believers-only codes At this point, you will almost certainly need to delve into the code of the challenge, if you haven't already. The code in `challenge.bin` past the self-test is encrypted, so disassembling and analysing the code is most easily done based off a memory dump from a running copy: - .dbg_dump dumps/init (From inside the game) + .dbg/dump dumps/init (From inside the game) ./tools/dump_to_raw.py dumps/init dumps/init.raw ./tools/disasm.py dumps/init.raw > dumps/init.asm @@ -166,7 +164,7 @@ Now, let's see what this `1545` does. C-style, because assembly makes my brain s ```c int 1545() { - if (R8 == 0) + if (R7 == 0) return 15e5(); // Ground state teleport 05b2(70ac, 05fb, 1807 + 585a); // Secure text print for(i = 0; i < 5; i++); // Speed up loop, because why not? @@ -176,7 +174,7 @@ int 1545() { 05b2(7156, 05fb, 1ed6 + 0992); - 0731(R8, 650a, 7fff, 7239); + 0731(R7, 650a, 7fff, 7239); 05b2(723d, 05fb, 7c1f + 0146); @@ -188,53 +186,53 @@ int 1545() { return 1652(); } -int 178b(R1, R2) { +int 178b(R0, R1) { + if (R0 != 0) { + return 1793(R0, R1); + } + return R1 + 0001; +} + +int 1793(R0, R1) { if (R1 != 0) { - return 1793(R1, R2); + return 17a0(R0, R1); } - return R2 + 0001; + R0 = R0 + 7fff; + R1 = R7; + R0 = 178b(R0, R1); + return R0; } -int 1793(R1, R2) { - if (R2 != 0) { - return 17a0(R1, R2); - } +int 17a0(R0, R1) { R1 = R1 + 7fff; - R2 = R8; - R1 = 178b(R1, R2); - return R1; -} - -int 17a0(R1, R2) { - R2 = R2 + 7fff; - R2 = 178b(R1, R2); - R1 = R1 + 7fff; - R1 = 178b(R1, R2); - return R1; + R1 = 178b(R0, R1); + R0 = R0 + 7fff; + R0 = 178b(R0, R1); + return R0; } ``` -Phew, so in other words, we seek an `R8` such that `178b(4, 1, R8)` equals 6. Let's see if we can't rewrite that function. Note that adding 0x7fff = 32767 to a number modulo 32768 is equivalent to subtracting 1. Thinking through the code, then, +Phew, so in other words, we seek an `R7` such that `178b(4, 1, R7)` equals 6. Let's see if we can't rewrite that function. Note that adding 0x7fff = 32767 to a number modulo 32768 is equivalent to subtracting 1. Thinking through the code, then, - A(R1, R2) = R2 + 1 , if R1 = 0 - A(R1 - 1, R8) , if R1 ≠ 0 and R2 = 0 - A(R1 - 1, A(R1, R2 - 1)) , if R1 ≠ 0 and R2 ≠ 0 + A(R0, R1) = R1 + 1 , if R0 = 0 + A(R0 - 1, R7) , if R0 ≠ 0 and R1 = 0 + A(R0 - 1, A(R0, R1 - 1)) , if R0 ≠ 0 and R1 ≠ 0 -Recognise anything? Well, neither did I the first time, and I'd [already seen the video](https://www.youtube.com/watch?v=i7sm9dzFtEI). It's the [Ackermann function](https://en.wikipedia.org/wiki/Ackermann_function)! With the slight twist that instead of the second line being `A(R1 - 1, 1)`, it's `A(R1 - 1, R8)`. +Recognise anything? Well, neither did I the first time, and I'd [already seen the video](https://www.youtube.com/watch?v=i7sm9dzFtEI). It's the [Ackermann function](https://en.wikipedia.org/wiki/Ackermann_function)! With the slight twist that instead of the second line being `A(R0 - 1, 1)`, it's `A(R0 - 1, R7)`. -No mathematical wizardry here, just implementing this and run a brute-force on all possible values of `R8`. And as much as it pains me to admit this, this is a tool for the raw processing efficiency of C, which I am not very proficient in, so I based my solution on [this wonderful code](https://github.com/glguy/synacor-vm/blob/master/teleport.c) by [glguy](https://github.com/glguy). My only contribution is the parallelisation of the computation. (500% speed-up! Whoo!) +No mathematical wizardry here, just implementing this and run a brute-force on all possible values of `R7`. And as much as it pains me to admit this, this is a tool for the raw processing efficiency of C, which I am not very proficient in, so I based my solution on [this wonderful code](https://github.com/glguy/synacor-vm/blob/master/teleport.c) by [glguy](https://github.com/glguy). My only contribution is the parallelisation of the computation. (500% speed-up! Whoo!) gcc ackermann.c -o ackermann -lpthread -O3 && ./ackermann -Running the algorithm, the correct value is revealed to be `0x6486`. Now we simply set `R8` to `0x6486` and patch the code to skip the check, before `use`ing the `teleporter`: +Running the algorithm, the correct value is revealed to be `0x6486`. Now we simply set `R7` to `0x6486` and patch the code to skip the check, before `use`ing the `teleporter`: 1571 call 178b -> nop nop - 1573 eq R2 R1 0006 -> nop nop nop nop - 1577 jf R2 15cb -> nop nop nop + 1573 eq R1 R0 0006 -> nop nop nop nop + 1577 jf R1 15cb -> nop nop nop I've implemented this as a debug function to prepare the teleporter: - > .dbg_teleporter + > .dbg/teleporter Patched. Ready to run "use teleporter". > use teleporter @@ -286,4 +284,4 @@ Given that you've made it this far (You didn't cheat, did you? I did warn you at Now that we've solved the puzzle, the only thing left is to write a [tool-assisted speed-run](https://github.com/RunasSudo/synacor.py/blob/master/dbg_speedrun.py) to completely break any given instance of the challenge in 5 seconds. - time python -u synacor.py dbg_speedrun | head -n 849 + time python -u synacor.py challenge.bin dbg/speedrun | head -n 849 diff --git a/synacor.py b/synacor.py index 593341d..1d23249 100755 --- a/synacor.py +++ b/synacor.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # synacor.py - An implementation of the Synacor Challenge -# Copyright © 2016 RunasSudo +# Copyright © 2017 RunasSudo # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as published by @@ -15,136 +15,18 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . -import struct # for bytes<-->word handling -import sys # for args, stdin +from libsynacor import * -SYN_PTR = 0 -SYN_MEM = [0] * 32768 -SYN_REG = [0] * 8 -SYN_STK = [] -SYN_STDIN_BUF = [] +import sys -DBG_FLAG = False -DBG_CSTK = [] +cpu = CPU() +with open(sys.argv[1], 'rb') as data: + cpu.SYN_MEM = memory_from_file(data) -class OpLiteral: - def __init__(self, value): - self.value = value; - def get(self): - return self.value; - def set(self, value): - raise Exception('Attempted to set literal value {} to {} at {}'.format(self.value, value, SYN_PTR)) - -class OpRegister: - def __init__(self, register): - self.register = register; - def get(self): - return SYN_REG[self.register]; - def set(self, value): - SYN_REG[self.register] = value; - -def readOp(word): - if 0 <= word <= 32767: - return OpLiteral(word) - if 32768 <= word <= 32775: - return OpRegister(word - 32768) - raise Exception('Invalid word {} at {}'.format(word, SYN_PTR)) - -def swallowOp(): - global SYN_PTR - op = readOp(SYN_MEM[SYN_PTR]) - SYN_PTR += 1 - return op - -if len(sys.argv) > 1: - dbg_args = sys.argv[1:] +if len(sys.argv) > 2: + dbg_args = sys.argv[2:] with open(dbg_args[0] + '.py', 'r') as f: exec(f.read(), globals(), locals()) -else: - # Read code into memory - with open('challenge.bin', 'rb') as data: - while True: - byteData = data.read(2) - if len(byteData) < 2: - break - SYN_MEM[SYN_PTR] = struct.unpack(' swallowOp().get() else 0) - elif instruction == 6: #JMP - SYN_PTR = swallowOp().get() - elif instruction == 7: #JT (jump if not zero) - a, b = swallowOp().get(), swallowOp().get() - if a != 0: - SYN_PTR = b - elif instruction == 8: #JF (jump if zero) - a, b = swallowOp().get(), swallowOp().get() - if a == 0: - SYN_PTR = b - elif instruction == 9: #ADD - swallowOp().set((swallowOp().get() + swallowOp().get()) % 32768) - elif instruction == 10: #MULT - swallowOp().set((swallowOp().get() * swallowOp().get()) % 32768) - elif instruction == 11: #MOD - swallowOp().set(swallowOp().get() % swallowOp().get()) - elif instruction == 12: #AND - swallowOp().set(swallowOp().get() & swallowOp().get()) - elif instruction == 13: #OR - swallowOp().set(swallowOp().get() | swallowOp().get()) - elif instruction == 14: #NOT - swallowOp().set(~swallowOp().get() % 32768) # mathemagic - elif instruction == 15: #RMEM - swallowOp().set(SYN_MEM[swallowOp().get()]) - elif instruction == 16: #WMEM - a, b = swallowOp().get(), swallowOp().get() - SYN_MEM[a] = b # order of operations - elif instruction == 17: #CALL - SYN_STK.append(SYN_PTR + 1) # skip one word for the operand - addr = swallowOp().get() - DBG_CSTK.append(addr) - SYN_PTR = addr - elif instruction == 18: #RET - if len(SYN_STK) == 0: - raise Exception('Attempted to return with empty stack at {}'.format(SYN_PTR)) - SYN_PTR = SYN_STK.pop() - - if len(DBG_CSTK) > 0: - DBG_CSTK.pop() - elif instruction == 19: #OUT - print(chr(swallowOp().get()), end='') - elif instruction == 20: #IN - while len(SYN_STDIN_BUF) == 0: - line = sys.stdin.readline() - if line.startswith('.'): # debug command - dbg_args = line.rstrip()[1:].split() - with open(dbg_args[0] + '.py', 'r') as f: - SYN_PTR -= 1; exec(f.read(), globals(), locals()); SYN_PTR += 1 - else: - SYN_STDIN_BUF = list(line) - - swallowOp().set(ord(SYN_STDIN_BUF.pop(0))) - else: - raise Exception('Unimplemented opcode {} at {}'.format(instruction, SYN_PTR)) + cpu.step() diff --git a/tools/decrypt_data.py b/tools/decrypt_data.py index 3a76a58..3edcf60 100755 --- a/tools/decrypt_data.py +++ b/tools/decrypt_data.py @@ -34,11 +34,11 @@ else: i += 1 # Emulate 06bb - for R2 in range(0x17b4, 0x7562): - R1 = SYN_MEM[R2] - R1 ^= pow(R2, 2, 32768) - R1 ^= 0x4154 - SYN_MEM[R2] = R1 + for R1 in range(0x17b4, 0x7562): + R0 = SYN_MEM[R1] + R0 ^= pow(R1, 2, 32768) + R0 ^= 0x4154 + SYN_MEM[R1] = R0 # Write with open(sys.argv[2], 'wb') as f: diff --git a/tools/decrypt_strings.py b/tools/decrypt_strings.py index 793a6df..95a147b 100755 --- a/tools/decrypt_strings.py +++ b/tools/decrypt_strings.py @@ -31,11 +31,11 @@ with open(sys.argv[1], 'rb') as data: i += 1 # Emulate 06bb -for R2 in range(0x17b4, 0x7562): - R1 = SYN_MEM[R2] - R1 ^= pow(R2, 2, 32768) - R1 ^= 0x4154 - SYN_MEM[R2] = R1 +for R1 in range(0x17b4, 0x7562): + R0 = SYN_MEM[R1] + R0 ^= pow(R1, 2, 32768) + R0 ^= 0x4154 + SYN_MEM[R1] = R0 class OpLiteral: def __init__(self, value): @@ -133,22 +133,22 @@ while SYN_PTR < len(SYN_MEM): readOp() elif word == 17: #CALL if readWord() == 0x05b2: - if (SYN_MEM[SYN_PTR-9:SYN_PTR-6] == [1, 32769, 0x05fb] # set R2 05fb - and SYN_MEM[SYN_PTR-12:SYN_PTR-10] == [1, 32768] # set R1 XXXX - and SYN_MEM[SYN_PTR-6:SYN_PTR-4] == [9, 32770]): # add R3 XXXX XXXX + if (SYN_MEM[SYN_PTR-9:SYN_PTR-6] == [1, 32769, 0x05fb] # set R1 05fb + and SYN_MEM[SYN_PTR-12:SYN_PTR-10] == [1, 32768] # set R0 XXXX + and SYN_MEM[SYN_PTR-6:SYN_PTR-4] == [9, 32770]): # add R2 XXXX XXXX # Got an encrypted string! - R1 = SYN_MEM[SYN_PTR-10] - R3 = (SYN_MEM[SYN_PTR-4] + SYN_MEM[SYN_PTR-3]) % 32768 - #print('{:04x} {:04x}'.format(R1, R3)) + R0 = SYN_MEM[SYN_PTR-10] + R2 = (SYN_MEM[SYN_PTR-4] + SYN_MEM[SYN_PTR-3]) % 32768 + #print('{:04x} {:04x}'.format(R0, R2)) - strlen = SYN_MEM[R1] + strlen = SYN_MEM[R0] strbuf = '' for i in range(strlen): - encrypted = SYN_MEM[R1 + 1 + i] - decrypted = encrypted ^ R3 + encrypted = SYN_MEM[R0 + 1 + i] + decrypted = encrypted ^ R2 strbuf += escapeChar(chr(decrypted)) - print('{:04x}: "{}"'.format(R1, strbuf)) + print('{:04x}: "{}"'.format(R0, strbuf)) elif word == 18: #RET pass elif word == 19: #OUT diff --git a/tools/disasm.py b/tools/disasm.py deleted file mode 100755 index b6e21e8..0000000 --- a/tools/disasm.py +++ /dev/null @@ -1,154 +0,0 @@ -#!/usr/bin/env python3 -# synacor.py - An implementation of the Synacor Challenge -# Copyright © 2016 RunasSudo -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . - -import struct # for bytes<-->word handling -import sys # for args - -class OpLiteral: - def __init__(self, value): - self.value = value; - def get(self): - return '{:04x}'.format(self.value); - def set(self): - return '{:04x}'.format(self.value); - -class OpRegister: - def __init__(self, register): - self.register = register; - def get(self): - return 'R{}'.format(self.register + 1); - def set(self): - return 'R{}'.format(self.register + 1); - -def readWord(): - byteData = data.read(2) - if len(byteData) < 2: - return None - return struct.unpack('