From 7aaaa45d4eb34636910eba80b7b81868709174c3 Mon Sep 17 00:00:00 2001 From: RunasSudo Date: Mon, 13 Feb 2017 22:00:14 +1030 Subject: [PATCH] Annotate some more of the code --- disasm.py | 147 ++++++++++++++++++++--------------- disasm_hints/annotations.txt | 50 +++++++++--- 2 files changed, 123 insertions(+), 74 deletions(-) diff --git a/disasm.py b/disasm.py index c3d1ee8..cd21734 100755 --- a/disasm.py +++ b/disasm.py @@ -22,15 +22,29 @@ import argparse parser = argparse.ArgumentParser() parser.add_argument('file', help='.bin file containing the initial memory dump') parser.add_argument('--hints', help='File(s) outlining additional jmp/call targets, label names, comments, etc', action='append') +parser.add_argument('--smart', help='Given a raw Synacor challenge file, disassemble in a Synacor-aware manner', action='store_true') +parser.add_argument('--aggressive-labels', help='Replace values with corresponding labels irrespective of where they appear', action='store_true') args = parser.parse_args() with open(args.file, 'rb') as data: SYN_MEM = memory_from_file(data) + disassemble_end = len(SYN_MEM) + +labels, comments_before, comments_inline, replacements = {}, {}, {}, {} + +# Do smart things if requested +if args.smart: + disassemble_end = 0x17b4 + # Emulate 06bb to decrypt data + for R1 in range(disassemble_end, 0x7562): + R0 = SYN_MEM[R1] + R0 ^= pow(R1, 2, 32768) + R0 ^= 0x4154 + SYN_MEM[R1] = R0 # Find things to label -labels, comments_before, comments_inline, replacements = {}, {}, {}, {} SYN_PTR = 0 -while SYN_PTR < len(SYN_MEM): +while SYN_PTR < disassemble_end: word = SYN_MEM[SYN_PTR] if word in instructions_by_opcode: instruction, SYN_PTR = Instruction.next_instruction(SYN_MEM, SYN_PTR) @@ -93,18 +107,48 @@ MODE_DAT = False #False, 1 (data), 2 (text) SYN_PTR = 0 +def set_mode_out(mode): + global MODE_OUT + if MODE_OUT == mode: + pass + elif mode == False: + # Switching off + print('"') + else: + # Switching on + print('{:04x}: out "'.format(SYN_PTR), end='') + MODE_OUT = mode +def set_mode_dat(mode): + global MODE_DAT + if MODE_DAT == mode: + pass + elif mode == False: + # Switching off + if MODE_DAT == 2: + print('"', end='') + print() + elif MODE_DAT == 1: + # Switching from data to text + print() + print('{:04x}: data "'.format(SYN_PTR), end='') + elif MODE_DAT == 2: + # Switching from text to data + print('"') + print('{:04x}: data'.format(SYN_PTR), end='') + else: + # Switching to a new mode + print('{:04x}: data'.format(SYN_PTR), end='') + if mode == 2: + print('"', end='') + MODE_DAT = mode +def clear_modes(): + set_mode_out(False) + set_mode_dat(False) + while SYN_PTR < len(SYN_MEM): # Handle comments if SYN_PTR in comments_before: - if MODE_OUT: - print('"') - MODE_OUT = False - if MODE_DAT == 1: - print() - MODE_DAT = False - if MODE_DAT == 2: - print('"') - MODE_DAT = False + clear_modes() for comment in comments_before[SYN_PTR]: print('; {}'.format(comment)) if SYN_PTR in comments_inline: @@ -114,15 +158,7 @@ while SYN_PTR < len(SYN_MEM): # Handle labels if any(v == SYN_PTR for k, v in labels.items()): - if MODE_OUT: - print('"') - MODE_OUT = False - if MODE_DAT == 1: - print() - MODE_DAT = False - if MODE_DAT == 2: - print('"') - MODE_DAT = False + clear_modes() print('${}:'.format(next(k for k, v in labels.items() if v == SYN_PTR))) # Handle replacements @@ -134,38 +170,17 @@ while SYN_PTR < len(SYN_MEM): word = SYN_MEM[SYN_PTR] - if MODE_OUT and word != 19: - print('"') - MODE_OUT = False - if MODE_DAT and 0 <= word <= 21: - if MODE_DAT == 1: - print() - if MODE_DAT == 2: - print('"') - MODE_DAT = False - - if word not in instructions_by_opcode: + if SYN_PTR >= disassemble_end or word not in instructions_by_opcode: # Data if 32 <= word <= 126: - # looks like letters - unfortunately "\n" looks like MULT - if MODE_DAT == 2: - print(escape_char(chr(word)), end='') - else: - if MODE_DAT == 1: - print() - print('{:04x}: data "{}'.format(SYN_PTR, escape_char(chr(word))), end='') - MODE_DAT = 2 + # Looks like letters - unfortunately "\n" looks like MULT + set_mode_dat(2) + print(escape_char(chr(word)), end='') if word == 0x0a: - print('"') - MODE_DAT = False # break on newlines + clear_modes() # Break on newlines else: - if MODE_DAT == 1: - print(' {:04x}'.format(word), end='') - else: - if MODE_DAT == 2: - print('"') - print('{:04x}: data {:04x}'.format(SYN_PTR, word), end='') - MODE_DAT = 1 + set_mode_dat(1) + print(' {:04x}'.format(word), end='') SYN_PTR += 1 else: # Instruction @@ -173,30 +188,36 @@ while SYN_PTR < len(SYN_MEM): # Special cases if isinstance(instruction, InstructionOut): if isinstance(instruction.args[0], OpLiteral): - if MODE_OUT: - print(escape_char(chr(instruction.args[0].get(None))), end='') - else: - print('{:04x}: out "{}'.format(SYN_PTR, escape_char(chr(instruction.args[0].get(None)))), end='') - MODE_OUT = True + set_mode_out(True) + print(escape_char(chr(instruction.args[0].get(None))), end='') if instruction.args[0].get(None) == 0x0a: - print('"') - MODE_OUT = False # break on newlines + clear_modes() # Break on newlines else: - if MODE_OUT: - print('"') - MODE_OUT = False + clear_modes() print('{:04x}: {}{}'.format(SYN_PTR, instruction.describe(), comment_inline)) elif isinstance(instruction, InstructionJmp) or isinstance(instruction, InstructionJt) or isinstance(instruction, InstructionJf) or isinstance(instruction, InstructionCall): + clear_modes() if isinstance(instruction, InstructionJmp) or isinstance(instruction, InstructionCall): argidx = 0 else: argidx = 1 - if isinstance(instruction.args[argidx], OpLiteral): - loc = instruction.args[argidx].get(None) - if any(v == loc for k, v in labels.items()): - label = next(k for k, v in labels.items() if v == loc) - instruction.args[argidx] = OpLabel(label) + # Aggressively replace labels if requested + for argnum in range(instruction.nargs) if args.aggressive_labels else [argidx]: + if isinstance(instruction.args[argnum], OpLiteral): + loc = instruction.args[argnum].get(None) + if any(v == loc for k, v in labels.items()): + label = next(k for k, v in labels.items() if v == loc) + instruction.args[argnum] = OpLabel(label) print('{:04x}: {}{}'.format(SYN_PTR, instruction.describe(), comment_inline)) else: + # Aggressively replace labels if requested + if args.aggressive_labels: + for argnum in range(instruction.nargs): + if isinstance(instruction.args[argnum], OpLiteral): + loc = instruction.args[argnum].get(None) + if any(v == loc for k, v in labels.items()): + label = next(k for k, v in labels.items() if v == loc) + instruction.args[argnum] = OpLabel(label) + clear_modes() print('{:04x}: {}{}'.format(SYN_PTR, instruction.describe(), comment_inline)) SYN_PTR = next_SYN_PTR diff --git a/disasm_hints/annotations.txt b/disasm_hints/annotations.txt index 37b3543..2ba96aa 100644 --- a/disasm_hints/annotations.txt +++ b/disasm_hints/annotations.txt @@ -56,16 +56,11 @@ ren sub_0505 self_test_call_subroutine1 lbl 02c2 self_test_call_returnloc1 ren label_0509 self_test_call_fail ren label_02c4 self_test_call_check_stack1 -rep 02c6 eq R1 R0 $self_test_call_check_stack1 cmi 02c6 This test is strange. If R0 == 02c2 as tested below, R0 != 02c4 is guaranteed -rep 02cd eq R1 R0 $self_test_call_returnloc1 cmb 02d4 Test call register value ren sub_0507 self_test_call_subroutine2 lbl 02d9 self_test_call_returnloc2 -rep 02d4 set R0 $self_test_call_subroutine2 ren label_02db self_test_call_check_stack2 -rep 02dd eq R1 R0 $self_test_call_check_stack2 -rep 02e4 eq R1 R0 $self_test_call_returnloc2 cmb 02eb Test add overflow ren label_0520 self_test_overflow_fail cmi 0304 Just to be safe! @@ -80,15 +75,48 @@ lbl 034b self_test_rwmem_data cmb 034b Test rmem/wmem ren label_04d7 self_test_rmem_fail ren label_04ee self_test_wmem_fail -rep 034d rmem R0 $self_test_rwmem_data -rep 0357 add R2 $self_test_rwmem_data 0001 -rep 0365 set R0 $self_test_rwmem_data ren sub_06bb decrypt_data -cmi 0375 Sneaky! +cmb 0375 Sneaky! +cmb 0375 call $decrypt_data +rep 0375 nop +rep 0376 nop lbl 17b4 encrypted_data -rep 0377 rmem R0 $encrypted_data -rep 0381 add R2 $encrypted_data 0001 ren label_03ad self_test_wmem_cmd_fail rep 03ab out $self_test_complete cmi 03a9 Becomes noop; jt 0013 $self_test_complete lbl 03d2 self_test_complete +lbl 17c0 str_self_test_result +cmi 03d6 The "F" +lbl 17e4 str_complete +ren label_03e8 self_test_loop_copy_complete +cmb 03e8 Copy the "complete" string over the "FAILED!!" substring of $str_self_test_result +ren label_03ff self_test_concat_all_pass +cmb 03ff Extend $str_self_test_result to include $str_self_test_all_pass +lbl 17d3 str_self_test_all_pass +cmi 0410 ASCII comma +cmi 0428 Print self-test completion string +cmi 0430 Jump to game loop +ren label_0aae after_self_test +ren sub_05b2 loop_string +cmb 05b2 % Loops over a string and calls a callback for each character +cmb 05b2 % @param R0 Pointer to the string to loop over +cmb 05b2 % @param R1 Pointer to the callback to call with the character in R0 +ren label_05c8 loop_string_loop_chars +ren label_05e3 loop_string_loop_chars_done +ren sub_05ee print_string +cmb 05ee % Prints a string to output +cmb 05ee % @param R0 Pointer to the string to print +ren sub_05f8 print_char +cmb 05f8 % Prints a single character to output +cmb 05f8 % Most useful as a callback to $loop_string +cmb 05f8 % @param R0 The character to print +ren sub_05fb print_char_xored +cmb 05fb % Prints a single character to output after XORing with a fixed value +cmb 05fb % Most useful as a callback to $loop_string +cmb 05fb % @param R0 The input character +cmb 05fb % @param R2 The other XOR operand +ren sub_084d xor +cmb 084d % Calculates the exclusive OR (XOR) of two values +cmb 084d % @param R0 The first XOR operand +cmb 084d % @param R1 The second XOR operand +cmb 084d % @return R0 The value R0 XOR R1