From 4322fe57e791f10061d5137ed28a07ac430b2fc0 Mon Sep 17 00:00:00 2001 From: RunasSudo Date: Mon, 30 Oct 2017 18:40:03 +1100 Subject: [PATCH] Add built-in string support in dis/assembler --- disasm.py | 58 +++++++++++++++++++++++++++++++----- disasm_hints/annotations.txt | 12 ++++++++ libsynacor/assembly.py | 10 +++++-- libsynacor/bytecode.py | 27 +++++++++++++++++ 4 files changed, 97 insertions(+), 10 deletions(-) diff --git a/disasm.py b/disasm.py index cd21734..0397e8d 100755 --- a/disasm.py +++ b/disasm.py @@ -30,7 +30,7 @@ with open(args.file, 'rb') as data: SYN_MEM = memory_from_file(data) disassemble_end = len(SYN_MEM) -labels, comments_before, comments_inline, replacements = {}, {}, {}, {} +labels, comments_before, comments_inline, replacements, strings = {}, {}, {}, {}, [] # Do smart things if requested if args.smart: @@ -99,11 +99,18 @@ if args.hints: code = line[line.index(' ', line.index(' ') + 1) + 1:].strip() instruction = assemble_line(None, code)[0][0] replacements[loc] = instruction + elif line.startswith('del '): + loc = int(line.split()[1], 16) + replacements[loc] = None + elif line.startswith('str '): + loc = int(line.split()[1], 16) + strings.append(loc) else: raise Exception('Invalid line in hint file: {}'.format(line)) MODE_OUT = False -MODE_DAT = False #False, 1 (data), 2 (text) +MODE_DAT = False #False, 1 (data), 2 (text), 3 (unknown string), 4 (data string), 5 (text string) +str_ctr = None SYN_PTR = 0 @@ -124,7 +131,7 @@ def set_mode_dat(mode): pass elif mode == False: # Switching off - if MODE_DAT == 2: + if MODE_DAT == 2 or MODE_DAT == 5: print('"', end='') print() elif MODE_DAT == 1: @@ -135,11 +142,20 @@ def set_mode_dat(mode): # Switching from text to data print('"') print('{:04x}: data'.format(SYN_PTR), end='') - else: + elif mode == 4: + # Detected data string + pass + elif mode == 5: + # Detected text string + print(' "', end='') + elif mode == 1 or mode == 2: # Switching to a new mode print('{:04x}: data'.format(SYN_PTR), end='') if mode == 2: print('"', end='') + elif mode == 3: + # Switching to a new string mode + print('{:04x}: str'.format(SYN_PTR), end='') MODE_DAT = mode def clear_modes(): set_mode_out(False) @@ -164,13 +180,41 @@ while SYN_PTR < len(SYN_MEM): # Handle replacements if SYN_PTR in replacements: instruction = replacements[SYN_PTR] - print('{:04x}: {}{}'.format(SYN_PTR, instruction.describe(), comment_inline)) - SYN_PTR += len(instruction.assemble(None)) + if instruction is not None: + print('{:04x}: {}{}'.format(SYN_PTR, instruction.describe(), comment_inline)) + SYN_PTR += len(instruction.assemble(None)) continue word = SYN_MEM[SYN_PTR] - if SYN_PTR >= disassemble_end or word not in instructions_by_opcode: + if SYN_PTR in strings: + # String length + set_mode_dat(3) + str_ctr = word + SYN_PTR += 1 + elif MODE_DAT == 3: + # Detect string type + if 32 <= word <= 126: + set_mode_dat(5) + print(escape_char(chr(word)), end='') + else: + set_mode_dat(4) + print(' {:04x}'.format(word), end='') + SYN_PTR += 1 + str_ctr -= 1 + if str_ctr <= 0: + set_mode_dat(False) + elif MODE_DAT == 4 or MODE_DAT == 5: + # String + if MODE_DAT == 4: + print(' {:04x}'.format(word), end='') + else: + print(escape_char(chr(word)), end='') + SYN_PTR += 1 + str_ctr -= 1 + if str_ctr <= 0: + set_mode_dat(False) + elif SYN_PTR >= disassemble_end or word not in instructions_by_opcode: # Data if 32 <= word <= 126: # Looks like letters - unfortunately "\n" looks like MULT diff --git a/disasm_hints/annotations.txt b/disasm_hints/annotations.txt index 2ba96aa..93800ed 100644 --- a/disasm_hints/annotations.txt +++ b/disasm_hints/annotations.txt @@ -73,6 +73,12 @@ ren label_059d self_test_mod_fail ren label_034d self_test_rwmem lbl 034b self_test_rwmem_data cmb 034b Test rmem/wmem +rep 034d rmem R0 $self_test_rwmem_data +rep 0357 add R2 $self_test_rwmem_data 0001 +rep 0365 set R0 $self_test_rwmem_data +cmb 0377 Check the data has decrypted successfully +rep 0377 rmem R0 $encrypted_data +rep 0381 add R2 $encrypted_data 0001 ren label_04d7 self_test_rmem_fail ren label_04ee self_test_wmem_fail ren sub_06bb decrypt_data @@ -80,14 +86,20 @@ cmb 0375 Sneaky! cmb 0375 call $decrypt_data rep 0375 nop rep 0376 nop +cmb 038f Replace the "t" in "test string" with "T" lbl 17b4 encrypted_data ren label_03ad self_test_wmem_cmd_fail +cmb 03a3 Try replacing the next instructions +rep 03a3 wmem $self_test_dynamic 0015 +rep 03a6 wmem $($self_test_dynamic+1) 0007 rep 03ab out $self_test_complete cmi 03a9 Becomes noop; jt 0013 $self_test_complete +lbl 03a9 self_test_dynamic lbl 03d2 self_test_complete lbl 17c0 str_self_test_result cmi 03d6 The "F" lbl 17e4 str_complete +str 17e4 ren label_03e8 self_test_loop_copy_complete cmb 03e8 Copy the "complete" string over the "FAILED!!" substring of $str_self_test_result ren label_03ff self_test_concat_all_pass diff --git a/libsynacor/assembly.py b/libsynacor/assembly.py index d58651e..51a65a8 100644 --- a/libsynacor/assembly.py +++ b/libsynacor/assembly.py @@ -103,7 +103,7 @@ def assemble_instruction(source, tokens): instruction.args.append(OpLiteral(ord(char))) instructions.append(instruction) return instructions, [] - elif isinstance(instruction, InstructionData): + elif isinstance(instruction, InstructionData) or isinstance(instruction, InstructionString): if tokens[1].startswith('"'): chars = unescape_char(tokens[1][1:-1]) instruction.args = [ord(char) for char in chars] @@ -120,8 +120,12 @@ def assemble_instruction(source, tokens): # Register arg = OpRegister(int(argstr[1:])) elif argstr.startswith('$'): - # Label - arg = OpLabel(argstr[1:]) + if argstr.startswith('$('): + # Expression + arg = OpExpression(argstr[2:-1]) + else: + # Label + arg = OpLabel(argstr[1:]) else: # Hex literal arg = OpLiteral(int(argstr, 16)) diff --git a/libsynacor/bytecode.py b/libsynacor/bytecode.py index 479f687..598ce1e 100644 --- a/libsynacor/bytecode.py +++ b/libsynacor/bytecode.py @@ -64,6 +64,28 @@ class OpLabel(Operand): raise Exception('Unknown label {}'.format(self.label)) return OpLiteral(labels[self.label]).assemble(labels) +# Used only in assembling process +class OpExpression(Operand): + def __init__(self, expr): + self.expr = expr + + def describe(self): + return '$({})'.format(self.expr) + def assemble(self, labels): + if labels is None: + # First pass + return 0xffff + + # Replace labels + expr = self.expr + for label in labels: + expr = expr.replace('$' + label, str(labels[label])) + + # Warning: Not safe for untrusted code! + value = eval(expr) + + return OpLiteral(value).assemble(labels) + instructions_by_opcode = {} instructions_by_name = {} @@ -246,3 +268,8 @@ class InstructionData(Instruction): def assemble(self, labels): return self.args instructions_by_name['data'] = InstructionData + +class InstructionString(Instruction): + def assemble(self, labels): + return [len(self.args)] + self.args +instructions_by_name['str'] = InstructionString