From 773a1c4e76bf463a5842efa0bc5f63f114f2cad0 Mon Sep 17 00:00:00 2001 From: RunasSudo Date: Sun, 12 Feb 2017 21:53:40 +1030 Subject: [PATCH 01/10] Switch notation to R{0..7} per spec --- dbg_fastboot.py | 10 +- dbg_teleporter.py | 2 +- electric_boogaloo.md | 300 +++++++++++++++++++-------------------- notes.md | 60 ++++---- tools/decrypt_data.py | 10 +- tools/decrypt_strings.py | 30 ++-- tools/disasm.py | 6 +- tools/generate_codes.py | 56 ++++---- 8 files changed, 236 insertions(+), 238 deletions(-) diff --git a/dbg_fastboot.py b/dbg_fastboot.py index 69d716a..44b686b 100644 --- a/dbg_fastboot.py +++ b/dbg_fastboot.py @@ -29,11 +29,11 @@ with open('challenge.bin', 'rb') as data: i += 1 # Emulate 06bb -for R2 in range(0x17b4, 0x7562): - R1 = SYN_MEM[R2] - R1 ^= pow(R2, 2, 32768) - R1 ^= 0x4154 - SYN_MEM[R2] = R1 +for R1 in range(0x17b4, 0x7562): + R0 = SYN_MEM[R1] + R0 ^= pow(R1, 2, 32768) + R0 ^= 0x4154 + SYN_MEM[R1] = R0 # Jump past self-test SYN_PTR = 0x0377 diff --git a/dbg_teleporter.py b/dbg_teleporter.py index fe2abc7..ebf5bfd 100644 --- a/dbg_teleporter.py +++ b/dbg_teleporter.py @@ -14,7 +14,7 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . -# Set R8 to 6486 +# Set R7 to 6486 SYN_REG[7] = 0x6486 # Patch instructions 1571 to 1579 inclusive with nop's diff --git a/electric_boogaloo.md b/electric_boogaloo.md index 817a3e9..057daae 100644 --- a/electric_boogaloo.md +++ b/electric_boogaloo.md @@ -20,7 +20,7 @@ Hex values refer to the instruction lines, not the actual ranges spanned by the * Surprisingly, this is where it is checked that 1 + 1 = 2, but if `add` gives an incorrect non-zero result for 1 + 1, the test will erroneously report that it is `eq` which is not supported! * It would probably have been a better idea to test `eq` first, before `add`, then use `eq` to test `add`. * `024e` to `0261`: Tests `push` and `pop` - * Since only `R1` and `R2` are checked, this would not detect errors involving other registers. + * Since only `R0` and `R1` are checked, this would not detect errors involving other registers. * This test, like the last one, reuses the results of previous tests, since that worked out so well for `eq`… * `0264` to `0276`: Tests `gt` * The tests performed seem quite reasonable, but yet again reuse the results of previous tests… @@ -29,7 +29,7 @@ Hex values refer to the instruction lines, not the actual ranges spanned by the * `02ac` to `02bd`: Tests `not` * Okay, I admit this one was pretty helpful. What the hell is a ‘15-bit bitwise inverse’? Well the test passes if I do just do mod 32768, so that works I guess… * `02c0` to `02e8`: Tests `call` - * Although, notably, not `ret`. The tests operates by `jmp`ing back and forth to test the various values of `R1`. + * Although, notably, not `ret`. The tests operates by `jmp`ing back and forth to test the various values of `R0`. * `02eb` to `0308`: Checks that `add` overflows correctly * `030b` to `0313`: Checks that 6 × 9 ≠ 42. * I suspect there is a mistake in this test. Since Adams (1979) demonstrated unequivocally that 6 × 9 is equal to 42, I believe the `jt` should in fact be a `jf`. @@ -50,36 +50,36 @@ After a wild goose chase examining the code after the self-test, we find that th This is the magic line. Digging into the `06bb` subroutine: - 06bb push R1 - 06bd push R2 - 06bf set R2 17b4 - 06c2 rmem R1 R2 - 06c5 push R2 - 06c7 mult R2 R2 R2 + 06bb push R0 + 06bd push R1 + 06bf set R1 17b4 + 06c2 rmem R0 R1 + 06c5 push R1 + 06c7 mult R1 R1 R1 06cb call 084d - 06cd set R2 4154 + 06cd set R1 4154 06d0 call 084d - 06d2 pop R2 - 06d4 wmem R2 R1 - 06d7 add R2 R2 0001 - 06db eq R1 7562 R2 - 06df jf R1 06c2 - 06e2 pop R2 - 06e4 pop R1 + 06d2 pop R1 + 06d4 wmem R1 R0 + 06d7 add R1 R1 0001 + 06db eq R0 7562 R1 + 06df jf R0 06c2 + 06e2 pop R1 + 06e4 pop R0 06e6 ret -Inspecting the `084d` subroutine reveals that this is simply an XOR function: `R1 XOR R2`. Crypto rating: 1/10 +Inspecting the `084d` subroutine reveals that this is simply an XOR function: `R0 XOR R1`. Crypto rating: 1/10 Rewriting `06bb` function using higher-level syntax reveals that the ‘encryption’ algorithm is really very simple: ```c 06bb() { - R2 = 17b4; - for (R2 = 17b4; R2 != 7562; R2++) { - R1 = [R2]; - R1 ^= R2 * R2; - R1 ^= 4154; - [R2] = R1; + R1 = 17b4; + for (R1 = 17b4; R1 != 7562; R1++) { + R0 = [R1]; + R0 ^= R1 * R1; + R0 ^= 4154; + [R1] = R0; } } ``` @@ -94,19 +94,19 @@ So earlier, we produced a tool-assisted speed-run that would complete and dump t Looking through the code following the self-test, we find: - 0413 set R1 17c0 + 0413 set R0 17c0 0416 call 05ee -Digging deeper, `05ee` calls `05b2` with `R2` set to `05f8`. `05b2` appears to iterate over the characters in a string whose length is stored in address `R1`, and calls `R2` for each character, storing the character in `R1`. `05f8` (the callback provided by `05ee`) simply outputs every character in `R1` it gets. +Digging deeper, `05ee` calls `05b2` with `R1` set to `05f8`. `05b2` appears to iterate over the characters in a string whose length is stored in address `R0`, and calls `R1` for each character, storing the character in `R0`. `05f8` (the callback provided by `05ee`) simply outputs every character in `R0` it gets. Immediately after this call to `05ee`, we find: - 041e set R1 68e3 - 0421 set R2 05fb - 0424 add R3 XXXX XXXX + 041e set R0 68e3 + 0421 set R1 05fb + 0424 add R2 XXXX XXXX 0428 call 05b2 -In other words, a similar string-handling subroutine is called, but instead of `05f8` (which would simply print the string), `05fb` is called. `05fb` also outputs the character, but only after calling `084d` (XOR) with `R2` set to `R3`. +In other words, a similar string-handling subroutine is called, but instead of `05f8` (which would simply print the string), `05fb` is called. `05fb` also outputs the character, but only after calling `084d` (XOR) with `R1` set to `R2`. Now we have everything we need to [extract these encrypted (double-encrypted??) strings](https://github.com/RunasSudo/synacor.py/blob/master/tools/decrypt_strings.py) from the binary! @@ -116,67 +116,67 @@ Only the self-test completion code appears to be stored there, though, so I'm no We may not have the codes themselves, but we can now easily locate where they are printed. The tablet code, for example, is conveniently sandwiched between strings `6ed1` and `6ef1`. Thus the code we are looking for is: - 1290 set R1 1092 - 1293 set R2 650a - 1296 set R3 7fff - 1299 set R4 6eed + 1290 set R0 1092 + 1293 set R1 650a + 1296 set R2 7fff + 1299 set R3 6eed 129c call 0731 Looking into `0731`: - 0731 push R4 - 0733 push R5 - 0735 push R6 - 0737 push R7 - 0739 set R7 0001 - 073c add R5 R4 R7 - 0740 rmem R5 R5 - 0743 add R6 17ed R7 - 0747 wmem R6 R5 - 074a add R7 R7 0001 - 074e rmem R6 17ed - 0751 gt R5 R7 R6 - 0755 jf R5 073c - 0758 set R4 0000 - 075b set R5 0000 - 075e rmem R6 17ed - 0761 mod R6 R5 R6 - 0765 add R6 R6 17ed - 0769 add R6 R6 0001 - 076d rmem R7 R6 - 0770 mult R7 R7 1481 - 0774 add R7 R7 3039 - 0778 wmem R6 R7 - 077b push R1 - 077d push R2 - 077f set R2 R7 + 0731 push R3 + 0733 push R4 + 0735 push R5 + 0737 push R6 + 0739 set R6 0001 + 073c add R4 R3 R6 + 0740 rmem R4 R4 + 0743 add R5 17ed R6 + 0747 wmem R5 R4 + 074a add R6 R6 0001 + 074e rmem R5 17ed + 0751 gt R4 R6 R5 + 0755 jf R4 073c + 0758 set R3 0000 + 075b set R4 0000 + 075e rmem R5 17ed + 0761 mod R5 R4 R5 + 0765 add R5 R5 17ed + 0769 add R5 R5 0001 + 076d rmem R6 R5 + 0770 mult R6 R6 1481 + 0774 add R6 R6 3039 + 0778 wmem R5 R6 + 077b push R0 + 077d push R1 + 077f set R1 R6 0782 call 084d - 0784 set R7 R1 - 0787 pop R2 - 0789 pop R1 - 078b rmem R6 R2 - 078e mod R7 R7 R6 - 0792 add R7 R7 0001 - 0796 gt R6 R7 R3 - 079a jt R6 07a0 - 079d set R4 0001 - 07a0 add R7 R7 R2 - 07a4 rmem R7 R7 - 07a7 add R5 R5 0001 - 07ab add R6 R5 17f1 - 07af wmem R6 R7 - 07b2 rmem R6 17f1 - 07b5 eq R6 R5 R6 - 07b9 jf R6 075e - 07bc jf R4 0758 - 07bf push R1 - 07c1 set R1 17f1 + 0784 set R6 R0 + 0787 pop R1 + 0789 pop R0 + 078b rmem R5 R1 + 078e mod R6 R6 R5 + 0792 add R6 R6 0001 + 0796 gt R5 R6 R2 + 079a jt R5 07a0 + 079d set R3 0001 + 07a0 add R6 R6 R1 + 07a4 rmem R6 R6 + 07a7 add R4 R4 0001 + 07ab add R5 R4 17f1 + 07af wmem R5 R6 + 07b2 rmem R5 17f1 + 07b5 eq R5 R4 R5 + 07b9 jf R5 075e + 07bc jf R3 0758 + 07bf push R0 + 07c1 set R0 17f1 07c4 call 05ee - 07c6 pop R1 - 07c8 pop R7 - 07ca pop R6 - 07cc pop R5 - 07ce pop R4 + 07c6 pop R0 + 07c8 pop R6 + 07ca pop R5 + 07cc pop R4 + 07ce pop R3 07d0 ret Umm… Sorry, could you repeat that? @@ -184,59 +184,59 @@ Umm… Sorry, could you repeat that? Rewriting this again in more friendly terms: ```c -// R1: A seed of sorts - the same for all users -// R2: The length and alphabet to use, usually 650a, but 653f for the mirror -// R3: Usually 7fff, but 0004 for the mirror -// R4: An initialisation vector of sorts - contents different for every user - points to the length, but this is always 3 -0731(R1, R2, R3, R4) { - // copy the string at R4 to 17ed - R7 = 0001; +// R0: A seed of sorts - the same for all users +// R1: The length and alphabet to use, usually 650a, but 653f for the mirror +// R2: Usually 7fff, but 0004 for the mirror +// R3: An initialisation vector of sorts - contents different for every user - points to the length, but this is always 3 +0731(R0, R1, R2, R3) { + // copy the string at R3 to 17ed + R6 = 0001; do { - R5 = R4 + R7; - R5 = [R5]; - R6 = 17ed + R7; - [R6] = R5; - R7 = R7 + 0001; - R6 = [17ed]; // 3, the length - this never seems to change - } while (R7 <= R6); + R4 = R3 + R6; + R4 = [R4]; + R5 = 17ed + R6; + [R5] = R4; + R6 = R6 + 0001; + R5 = [17ed]; // 3, the length - this never seems to change + } while (R6 <= R5); - // the string at 17ed is now what was at R4 + // the string at 17ed is now what was at R3 do { - R4 = 0000; // done flag - R5 = 0000; // index + R3 = 0000; // done flag + R4 = 0000; // index do { - R6 = [17ed]; // 3, the length - R6 = R5 % R6; - R6 = R6 + 17ed; - R6 = R6 + 0001; // will cycle through three addresses of 17ed/R4 string: 17ee, 17ef, 17f0 - R7 = [R6]; - R7 = R7 * 1481; - R7 = R7 + 3039; - [R6] = R7; // mutate that value of the 17ed string - R7 = R1 ^ R7; // combine R1 into R7 - R6 = [R2]; // length of the alphabet - R7 = R7 % R6; - R7 = R7 + 0001; // calculate index in alphabet - if (R7 <= R3) { - R4 = 0001; // we are done with the entire code - this returns immediately for all except the mirror + R5 = [17ed]; // 3, the length + R5 = R4 % R5; + R5 = R5 + 17ed; + R5 = R5 + 0001; // will cycle through three addresses of 17ed/R3 string: 17ee, 17ef, 17f0 + R6 = [R5]; + R6 = R6 * 1481; + R6 = R6 + 3039; + [R5] = R6; // mutate that value of the 17ed string + R6 = R0 ^ R6; // combine R0 into R6 + R5 = [R1]; // length of the alphabet + R6 = R6 % R5; + R6 = R6 + 0001; // calculate index in alphabet + if (R6 <= R2) { + R3 = 0001; // we are done with the entire code - this returns immediately for all except the mirror } - R7 = R7 + R2; // calculate address of letter to use - R7 = [R7]; // the letter to use - R5 = R5 + 0001; // increment the index - R6 = R5 + 17f1; // index of new letter in code - [R6] = R7; // set the letter - R6 = [17f1]; // length of the code: twelve letters - } while (R5 != R6); // loop until we've generated all the letters - } while (!R4); + R6 = R6 + R1; // calculate address of letter to use + R6 = [R6]; // the letter to use + R4 = R4 + 0001; // increment the index + R5 = R4 + 17f1; // index of new letter in code + [R5] = R6; // set the letter + R5 = [17f1]; // length of the code: twelve letters + } while (R4 != R5); // loop until we've generated all the letters + } while (!R3); print(17f1); } ``` Re-implementing this in Python, we can now extract the code for the tablet directly from the binary! -Unfortunately, each of the other codes uses an `R1` based on the solution to the puzzle. In the case of the maze code: +Unfortunately, each of the other codes uses an `R0` based on the solution to the puzzle. In the case of the maze code: - 0f03 rmem R1 0e8e + 0f03 rmem R0 0e8e The value at `0e8e` is derived from which rooms are visited in the maze, as mentioned in the main note file. Armed with our [trusty map](https://github.com/RunasSudo/synacor.py/blob/master/tools/graph.py), and cross-referencing the callbacks with the files, we identify: @@ -247,37 +247,37 @@ The value at `0e8e` is derived from which rooms are visited in the maze, as ment Putting it all together, the final value at `0e8e` is `0008 | 0010 | 0040` = `0058`. -Similarly, the `R1` for the teleporter code is the value of `R8` from the Ackermann function, which we determined earlier to be `6486`: +Similarly, the `R0` for the teleporter code is the value of `R7` from the Ackermann function, which we determined earlier to be `6486`: - 1592 set R1 R8 + 1592 set R0 R7 -The remaining two codes, for the coins and the vault, are more complicated still, but follow the same pattern of determining `R1` based on the player's history. +The remaining two codes, for the coins and the vault, are more complicated still, but follow the same pattern of determining `R0` based on the player's history. -For the coins, the call to `0731` derives an `R1` from the values at memory locations `69de` to `69e2`. One would presume that this relates to the order in which coins are inserted into the puzzle, and following the trail of callbacks for the coins confirms this. The important lines are: +For the coins, the call to `0731` derives an `R0` from the values at memory locations `69de` to `69e2`. One would presume that this relates to the order in which coins are inserted into the puzzle, and following the trail of callbacks for the coins confirms this. The important lines are: - 13c0 rmem R3 099e # nr of coins inserted - 13c3 add R3 R3 69dd - 13c7 add R3 R3 0001 - 13cb wmem R3 R2 # R2 is the number of dots on the coin + 13c0 rmem R2 099e # nr of coins inserted + 13c3 add R2 R2 69dd + 13c7 add R2 R2 0001 + 13cb wmem R2 R1 # R1 is the number of dots on the coin Thus the values should be 9, 2, 5, 7 and 3: the missing numbers in the solved equation. -It is now trivial to generate the correct value of `R1`. Based on the code beginning, `15fd`: +It is now trivial to generate the correct value of `R0`. Based on the code beginning, `15fd`: ```python data_69de = [9, 2, 5, 7, 3] -R1 = 0 +R0 = 0 for i in range(len(data_69de)): - R2 = data_69de[i] - R1 = R1 + R2 - R1 = (R1 * 0x7bac) % 0x8000 - R1 = R1 ^ R2 + R1 = data_69de[i] + R0 = R0 + R1 + R0 = (R0 * 0x7bac) % 0x8000 + R0 = R0 ^ R1 ``` The result is `0b3b`. -On to the final code, now: the vault lock. At `1679`, `R1` is computed as `([0f73] ^ [0f74]) ^ [0f75]`. Searching the code for references to these, we find: +On to the final code, now: the vault lock. At `1679`, `R0` is computed as `([0f73] ^ [0f74]) ^ [0f75]`. Searching the code for references to these, we find: 1236 wmem 0f70 0016 // weight 1239 wmem 0f71 0000 // counter @@ -297,19 +297,19 @@ data_0f74 = 0 data_0f75 = 0 # 08c8 -def rotatey_thing(R1, R2): - while R2 != 0: - R2 = (R2 + 0x7fff) % 0x8000 - R3 = R1 & 0x4000 - R1 = (R1 * 0x0002) % 0x8000 - if R3 == 0: +def rotatey_thing(R0, R1): + while R1 != 0: + R1 = (R1 + 0x7fff) % 0x8000 + R2 = R0 & 0x4000 + R0 = (R0 * 0x0002) % 0x8000 + if R2 == 0: continue - R1 = R1 | 0x0001 - return R1 + R0 = R0 | 0x0001 + return R0 # 11a3 -def mutate(R1val, R2, R3): - return rotatey_thing(R1val, R2) ^ R3 +def mutate(R0val, R1, R2): + return rotatey_thing(R0val, R1) ^ R2 # 1135 def update(value, room): diff --git a/notes.md b/notes.md index 6e1fada..85c8575 100644 --- a/notes.md +++ b/notes.md @@ -5,8 +5,6 @@ Note 1: Codes seem to be unique for every user. Note 2: Numeric values in `monospace` font are in hexadecimal. -Note 3: For some reason, I decided to number the registers `R1` through `R8`, contrary to the spec… - ## The programming codes ### Code 1 @@ -166,7 +164,7 @@ Now, let's see what this `1545` does. C-style, because assembly makes my brain s ```c int 1545() { - if (R8 == 0) + if (R7 == 0) return 15e5(); // Ground state teleport 05b2(70ac, 05fb, 1807 + 585a); // Secure text print for(i = 0; i < 5; i++); // Speed up loop, because why not? @@ -176,7 +174,7 @@ int 1545() { 05b2(7156, 05fb, 1ed6 + 0992); - 0731(R8, 650a, 7fff, 7239); + 0731(R7, 650a, 7fff, 7239); 05b2(723d, 05fb, 7c1f + 0146); @@ -188,49 +186,49 @@ int 1545() { return 1652(); } -int 178b(R1, R2) { +int 178b(R0, R1) { + if (R0 != 0) { + return 1793(R0, R1); + } + return R1 + 0001; +} + +int 1793(R0, R1) { if (R1 != 0) { - return 1793(R1, R2); + return 17a0(R0, R1); } - return R2 + 0001; + R0 = R0 + 7fff; + R1 = R7; + R0 = 178b(R0, R1); + return R0; } -int 1793(R1, R2) { - if (R2 != 0) { - return 17a0(R1, R2); - } +int 17a0(R0, R1) { R1 = R1 + 7fff; - R2 = R8; - R1 = 178b(R1, R2); - return R1; -} - -int 17a0(R1, R2) { - R2 = R2 + 7fff; - R2 = 178b(R1, R2); - R1 = R1 + 7fff; - R1 = 178b(R1, R2); - return R1; + R1 = 178b(R0, R1); + R0 = R0 + 7fff; + R0 = 178b(R0, R1); + return R0; } ``` -Phew, so in other words, we seek an `R8` such that `178b(4, 1, R8)` equals 6. Let's see if we can't rewrite that function. Note that adding 0x7fff = 32767 to a number modulo 32768 is equivalent to subtracting 1. Thinking through the code, then, +Phew, so in other words, we seek an `R7` such that `178b(4, 1, R7)` equals 6. Let's see if we can't rewrite that function. Note that adding 0x7fff = 32767 to a number modulo 32768 is equivalent to subtracting 1. Thinking through the code, then, - A(R1, R2) = R2 + 1 , if R1 = 0 - A(R1 - 1, R8) , if R1 ≠ 0 and R2 = 0 - A(R1 - 1, A(R1, R2 - 1)) , if R1 ≠ 0 and R2 ≠ 0 + A(R0, R1) = R1 + 1 , if R0 = 0 + A(R0 - 1, R7) , if R0 ≠ 0 and R1 = 0 + A(R0 - 1, A(R0, R1 - 1)) , if R0 ≠ 0 and R1 ≠ 0 -Recognise anything? Well, neither did I the first time, and I'd [already seen the video](https://www.youtube.com/watch?v=i7sm9dzFtEI). It's the [Ackermann function](https://en.wikipedia.org/wiki/Ackermann_function)! With the slight twist that instead of the second line being `A(R1 - 1, 1)`, it's `A(R1 - 1, R8)`. +Recognise anything? Well, neither did I the first time, and I'd [already seen the video](https://www.youtube.com/watch?v=i7sm9dzFtEI). It's the [Ackermann function](https://en.wikipedia.org/wiki/Ackermann_function)! With the slight twist that instead of the second line being `A(R0 - 1, 1)`, it's `A(R0 - 1, R7)`. -No mathematical wizardry here, just implementing this and run a brute-force on all possible values of `R8`. And as much as it pains me to admit this, this is a tool for the raw processing efficiency of C, which I am not very proficient in, so I based my solution on [this wonderful code](https://github.com/glguy/synacor-vm/blob/master/teleport.c) by [glguy](https://github.com/glguy). My only contribution is the parallelisation of the computation. (500% speed-up! Whoo!) +No mathematical wizardry here, just implementing this and run a brute-force on all possible values of `R7`. And as much as it pains me to admit this, this is a tool for the raw processing efficiency of C, which I am not very proficient in, so I based my solution on [this wonderful code](https://github.com/glguy/synacor-vm/blob/master/teleport.c) by [glguy](https://github.com/glguy). My only contribution is the parallelisation of the computation. (500% speed-up! Whoo!) gcc ackermann.c -o ackermann -lpthread -O3 && ./ackermann -Running the algorithm, the correct value is revealed to be `0x6486`. Now we simply set `R8` to `0x6486` and patch the code to skip the check, before `use`ing the `teleporter`: +Running the algorithm, the correct value is revealed to be `0x6486`. Now we simply set `R7` to `0x6486` and patch the code to skip the check, before `use`ing the `teleporter`: 1571 call 178b -> nop nop - 1573 eq R2 R1 0006 -> nop nop nop nop - 1577 jf R2 15cb -> nop nop nop + 1573 eq R1 R0 0006 -> nop nop nop nop + 1577 jf R1 15cb -> nop nop nop I've implemented this as a debug function to prepare the teleporter: diff --git a/tools/decrypt_data.py b/tools/decrypt_data.py index 3a76a58..3edcf60 100755 --- a/tools/decrypt_data.py +++ b/tools/decrypt_data.py @@ -34,11 +34,11 @@ else: i += 1 # Emulate 06bb - for R2 in range(0x17b4, 0x7562): - R1 = SYN_MEM[R2] - R1 ^= pow(R2, 2, 32768) - R1 ^= 0x4154 - SYN_MEM[R2] = R1 + for R1 in range(0x17b4, 0x7562): + R0 = SYN_MEM[R1] + R0 ^= pow(R1, 2, 32768) + R0 ^= 0x4154 + SYN_MEM[R1] = R0 # Write with open(sys.argv[2], 'wb') as f: diff --git a/tools/decrypt_strings.py b/tools/decrypt_strings.py index 793a6df..95a147b 100755 --- a/tools/decrypt_strings.py +++ b/tools/decrypt_strings.py @@ -31,11 +31,11 @@ with open(sys.argv[1], 'rb') as data: i += 1 # Emulate 06bb -for R2 in range(0x17b4, 0x7562): - R1 = SYN_MEM[R2] - R1 ^= pow(R2, 2, 32768) - R1 ^= 0x4154 - SYN_MEM[R2] = R1 +for R1 in range(0x17b4, 0x7562): + R0 = SYN_MEM[R1] + R0 ^= pow(R1, 2, 32768) + R0 ^= 0x4154 + SYN_MEM[R1] = R0 class OpLiteral: def __init__(self, value): @@ -133,22 +133,22 @@ while SYN_PTR < len(SYN_MEM): readOp() elif word == 17: #CALL if readWord() == 0x05b2: - if (SYN_MEM[SYN_PTR-9:SYN_PTR-6] == [1, 32769, 0x05fb] # set R2 05fb - and SYN_MEM[SYN_PTR-12:SYN_PTR-10] == [1, 32768] # set R1 XXXX - and SYN_MEM[SYN_PTR-6:SYN_PTR-4] == [9, 32770]): # add R3 XXXX XXXX + if (SYN_MEM[SYN_PTR-9:SYN_PTR-6] == [1, 32769, 0x05fb] # set R1 05fb + and SYN_MEM[SYN_PTR-12:SYN_PTR-10] == [1, 32768] # set R0 XXXX + and SYN_MEM[SYN_PTR-6:SYN_PTR-4] == [9, 32770]): # add R2 XXXX XXXX # Got an encrypted string! - R1 = SYN_MEM[SYN_PTR-10] - R3 = (SYN_MEM[SYN_PTR-4] + SYN_MEM[SYN_PTR-3]) % 32768 - #print('{:04x} {:04x}'.format(R1, R3)) + R0 = SYN_MEM[SYN_PTR-10] + R2 = (SYN_MEM[SYN_PTR-4] + SYN_MEM[SYN_PTR-3]) % 32768 + #print('{:04x} {:04x}'.format(R0, R2)) - strlen = SYN_MEM[R1] + strlen = SYN_MEM[R0] strbuf = '' for i in range(strlen): - encrypted = SYN_MEM[R1 + 1 + i] - decrypted = encrypted ^ R3 + encrypted = SYN_MEM[R0 + 1 + i] + decrypted = encrypted ^ R2 strbuf += escapeChar(chr(decrypted)) - print('{:04x}: "{}"'.format(R1, strbuf)) + print('{:04x}: "{}"'.format(R0, strbuf)) elif word == 18: #RET pass elif word == 19: #OUT diff --git a/tools/disasm.py b/tools/disasm.py index b6e21e8..b02a89a 100755 --- a/tools/disasm.py +++ b/tools/disasm.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # synacor.py - An implementation of the Synacor Challenge -# Copyright © 2016 RunasSudo +# Copyright © 2017 RunasSudo # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as published by @@ -30,9 +30,9 @@ class OpRegister: def __init__(self, register): self.register = register; def get(self): - return 'R{}'.format(self.register + 1); + return 'R{}'.format(self.register); def set(self): - return 'R{}'.format(self.register + 1); + return 'R{}'.format(self.register); def readWord(): byteData = data.read(2) diff --git a/tools/generate_codes.py b/tools/generate_codes.py index 98fd132..f0c4eec 100755 --- a/tools/generate_codes.py +++ b/tools/generate_codes.py @@ -24,27 +24,27 @@ IV_LEN = 3 CODE_LEN = 12 # Emulate 0731 -def generate_code(R1, R2, R3, R4): - R2data = SYN_MEM[R2+1:R2+1+SYN_MEM[R2]] - R4data = SYN_MEM[R4+1:R4+1+SYN_MEM[R4]] +def generate_code(R0, R1, R2, R3): + R1data = SYN_MEM[R1+1:R1+1+SYN_MEM[R1]] + R3data = SYN_MEM[R3+1:R3+1+SYN_MEM[R3]] - assert len(R4data) == IV_LEN + assert len(R3data) == IV_LEN done = False while not done: code = '' for i in range(CODE_LEN): - R6 = i % IV_LEN - R7 = R4data[R6] - R7 = (R7 * 0x1481) % 0x8000 - R7 = (R7 + 0x3039) % 0x8000 - R4data[R6] = R7 - R7 = R1 ^ R7 - R7 = R7 % len(R2data) - if R7 + 1 <= R3: + R5 = i % IV_LEN + R6 = R3data[R5] + R6 = (R6 * 0x1481) % 0x8000 + R6 = (R6 + 0x3039) % 0x8000 + R3data[R5] = R6 + R6 = R0 ^ R6 + R6 = R6 % len(R1data) + if R6 + 1 <= R2: done = True - R7 = R2data[R7] - code += chr(R7) + R6 = R1data[R6] + code += chr(R6) return code @@ -77,33 +77,33 @@ with tarfile.open(sys.argv[1], 'r:gz') as challenge_file: print(re.search(r"Here's a code for the challenge website: (............)", spec_data).group(1)) # Emulate 06bb -for R2 in range(0x17b4, 0x7562): - R1 = SYN_MEM[R2] - R1 ^= pow(R2, 2, 32768) - R1 ^= 0x4154 - SYN_MEM[R2] = R1 +for R1 in range(0x17b4, 0x7562): + R0 = SYN_MEM[R1] + R0 ^= pow(R1, 2, 32768) + R0 ^= 0x4154 + SYN_MEM[R1] = R0 # Basic codes print(bytes(SYN_MEM[0x00f5:0x010c:2]).decode('utf-8')) -R1 = 0x68e3 -R3 = (SYN_MEM[0x0426] + SYN_MEM[0x0427]) % 0x8000 -strlen = SYN_MEM[R1] +R0 = 0x68e3 +R2 = (SYN_MEM[0x0426] + SYN_MEM[0x0427]) % 0x8000 +strlen = SYN_MEM[R0] strbuf = '' for i in range(strlen): - encrypted = SYN_MEM[R1 + 1 + i] - decrypted = encrypted ^ R3 + encrypted = SYN_MEM[R0 + 1 + i] + decrypted = encrypted ^ R2 strbuf += chr(decrypted) print(re.match(r"The self-test completion code is: (............)", strbuf).group(1)) # Generated codes # Calls to 0731 CODE_PARAMS = [ - (0x0058, 0x650a, 0x7fff, 0x6e8b), # R1 from the maze + (0x0058, 0x650a, 0x7fff, 0x6e8b), # R0 from the maze (0x1092, 0x650a, 0x7fff, 0x6eed), - (0x6486, 0x650a, 0x7fff, 0x7239), # R1 is R8 from Ackermann - (0x0b3b, 0x650a, 0x7fff, 0x73df), # R1 from the dots on the coins - (0x7714, 0x653f, 0x0004, 0x74f6), # R1 based on solution to vaults + (0x6486, 0x650a, 0x7fff, 0x7239), # R0 is R7 from Ackermann + (0x0b3b, 0x650a, 0x7fff, 0x73df), # R0 from the dots on the coins + (0x7714, 0x653f, 0x0004, 0x74f6), # R0 based on solution to vaults ] for cp in CODE_PARAMS: From 68e428ecc7fc0428eb600d3ee4c88365cc5d1924 Mon Sep 17 00:00:00 2001 From: RunasSudo Date: Sun, 12 Feb 2017 22:38:21 +1030 Subject: [PATCH 02/10] Get started on a shared API --- libsynacor/__init__.py | 19 ++++ libsynacor/binfile.py | 27 ++++++ libsynacor/bytecode.py | 201 +++++++++++++++++++++++++++++++++++++++++ libsynacor/cpu.py | 33 +++++++ synacor.py | 137 ++-------------------------- 5 files changed, 287 insertions(+), 130 deletions(-) create mode 100644 libsynacor/__init__.py create mode 100644 libsynacor/binfile.py create mode 100644 libsynacor/bytecode.py create mode 100644 libsynacor/cpu.py diff --git a/libsynacor/__init__.py b/libsynacor/__init__.py new file mode 100644 index 0000000..9595d2b --- /dev/null +++ b/libsynacor/__init__.py @@ -0,0 +1,19 @@ +# synacor.py - An implementation of the Synacor Challenge +# Copyright © 2017 RunasSudo +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +from .binfile import * +from .bytecode import * +from .cpu import * diff --git a/libsynacor/binfile.py b/libsynacor/binfile.py new file mode 100644 index 0000000..941f122 --- /dev/null +++ b/libsynacor/binfile.py @@ -0,0 +1,27 @@ +# synacor.py - An implementation of the Synacor Challenge +# Copyright © 2017 RunasSudo +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +def memory_from_file(data): + import struct + SYN_MEM = [0] * 32768 + SYN_PTR = 0 + while True: + byteData = data.read(2) + if len(byteData) < 2: + break + SYN_MEM[SYN_PTR] = struct.unpack('. + +class Operand: + @staticmethod + def read_op(word): + if 0 <= word <= 32767: + return OpLiteral(word) + if 32768 <= word <= 32775: + return OpRegister(word - 32768) + raise Exception('Invalid word {}') + +class OpLiteral(Operand): + def __init__(self, value): + self.value = value + def get(self, cpu): + return self.value + def set(self, cpu, value): + raise Exception('Attempted to set literal value {} to {} at {}'.format(self.value, value, cpu.SYN_PTR)) + +class OpRegister(Operand): + def __init__(self, register): + self.register = register + def get(self, cpu): + return cpu.SYN_REG[self.register] + def set(self, cpu, value): + cpu.SYN_REG[self.register] = value + +instructions_by_opcode = {} +instructions_by_name = {} + +def instruction(opcode, name, nargs=0): + def decorator(cls): + cls.opcode = opcode + cls.name = name + cls.nargs = nargs + + instructions_by_opcode[opcode] = cls + instructions_by_name[name] = cls + + return cls + return decorator + +class Instruction: + def __init__(self): + self.args = [] + + @staticmethod + def next_instruction(data, idx): + opcode = Operand.read_op(data[idx]) + idx += 1 + + if not isinstance(opcode, OpLiteral): + raise Exception('Invalid value for opcode {} at {}'.format(data[idx], idx)) + opcode_value = opcode.get(None) + if opcode_value not in instructions_by_opcode: + raise Exception('Invalid opcode {} at {}'.format(opcode_value, idx)) + + instruction = instructions_by_opcode[opcode_value]() + for _ in range(instruction.nargs): + instruction.args.append(Operand.read_op(data[idx])) + idx += 1 + + return instruction, idx + +@instruction(21, 'nop') +class InstructionNop(Instruction): + def run(self, cpu): + pass + +@instruction(0, 'halt') +class InstructionHalt(Instruction): + def run(self, cpu): + import sys + sys.exit(0) + +@instruction(1, 'set', 2) +class InstructionSet(Instruction): + def run(self, cpu): + self.args[0].set(cpu, self.args[1].get(cpu)) + +@instruction(2, 'push', 1) +class InstructionPush(Instruction): + def run(self, cpu): + cpu.SYN_STK.append(self.args[0].get(cpu)) + +@instruction(3, 'pop', 1) +class InstructionPop(Instruction): + def run(self, cpu): + if len(cpu.SYN_STK) == 0: + raise Exception('Attempted to pop from empty stack at {}'.format(cpu.SYN_PTR)) + self.args[0].set(cpu, cpu.SYN_STK.pop()) + +@instruction(4, 'eq', 3) +class InstructionEq(Instruction): + def run(self, cpu): + self.args[0].set(cpu, 1 if self.args[1].get(cpu) == self.args[2].get(cpu) else 0) + +@instruction(5, 'gt', 3) +class InstructionGt(Instruction): + def run(self, cpu): + self.args[0].set(cpu, 1 if self.args[1].get(cpu) > self.args[2].get(cpu) else 0) + +@instruction(6, 'jmp', 1) +class InstructionJmp(Instruction): + def run(self, cpu): + cpu.SYN_PTR = self.args[0].get(cpu) + +@instruction(7, 'jt', 2) +class InstructionJt(Instruction): + def run(self, cpu): + if self.args[0].get(cpu) != 0: + cpu.SYN_PTR = self.args[1].get(cpu) + +@instruction(8, 'jf', 2) +class InstructionJf(Instruction): + def run(self, cpu): + if self.args[0].get(cpu) == 0: + cpu.SYN_PTR = self.args[1].get(cpu) + +@instruction(9, 'add', 3) +class InstructionAdd(Instruction): + def run(self, cpu): + self.args[0].set(cpu, (self.args[1].get(cpu) + self.args[2].get(cpu)) % 32768) + +@instruction(10, 'mult', 3) +class InstructionMult(Instruction): + def run(self, cpu): + self.args[0].set(cpu, (self.args[1].get(cpu) * self.args[2].get(cpu)) % 32768) + +@instruction(11, 'mod', 3) +class InstructionMod(Instruction): + def run(self, cpu): + self.args[0].set(cpu, self.args[1].get(cpu) % self.args[2].get(cpu)) + +@instruction(12, 'and', 3) +class InstructionAnd(Instruction): + def run(self, cpu): + self.args[0].set(cpu, self.args[1].get(cpu) & self.args[2].get(cpu)) + +@instruction(13, 'or', 3) +class InstructionOr(Instruction): + def run(self, cpu): + self.args[0].set(cpu, self.args[1].get(cpu) | self.args[2].get(cpu)) + +@instruction(14, 'not', 2) +class InstructionNot(Instruction): + def run(self, cpu): + self.args[0].set(cpu, ~self.args[1].get(cpu) % 32768) + +@instruction(15, 'rmem', 2) +class InstructionRmem(Instruction): + def run(self, cpu): + self.args[0].set(cpu, cpu.SYN_MEM[self.args[1].get(cpu)]) + +@instruction(16, 'wmem', 2) +class InstructionWmem(Instruction): + def run(self, cpu): + cpu.SYN_MEM[self.args[0].get(cpu)] = self.args[1].get(cpu) + +@instruction(17, 'call', 1) +class InstructionCall(Instruction): + def run(self, cpu): + cpu.SYN_STK.append(cpu.SYN_PTR) + cpu.SYN_PTR = self.args[0].get(cpu) + +@instruction(18, 'ret') +class InstructionRet(Instruction): + def run(self, cpu): + if len(cpu.SYN_STK) == 0: + raise Exception('Attempted to return with empty stack at {}'.format(cpu.SYN_PTR)) + cpu.SYN_PTR = cpu.SYN_STK.pop() + +@instruction(19, 'out', 1) +class InstructionOut(Instruction): + def run(self, cpu): + print(chr(self.args[0].get(cpu)), end='') + +@instruction(20, 'in', 1) +class InstructionIn(Instruction): + def run(self, cpu): + import sys + + while len(cpu.SYN_STDIN_BUF) == 0: + line = sys.stdin.readline() + cpu.SYN_STDIN_BUF = list(line) + + self.args[0].set(cpu, ord(cpu.SYN_STDIN_BUF.pop(0))) diff --git a/libsynacor/cpu.py b/libsynacor/cpu.py new file mode 100644 index 0000000..b23c809 --- /dev/null +++ b/libsynacor/cpu.py @@ -0,0 +1,33 @@ +# synacor.py - An implementation of the Synacor Challenge +# Copyright © 2017 RunasSudo +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +from libsynacor import * + +class CPU: + def __init__(self): + self.SYN_PTR = 0 + self.SYN_MEM = [0] * 32768 + self.SYN_REG = [0] * 8 + self.SYN_STK = [] + self.SYN_STDIN_BUF = [] + + def swallow_op(self): + op = Operand.read_op(self.SYN_MEM[self.SYN_PTR]) + self.SYN_PTR += 1 + + def step(self): + instruction, self.SYN_PTR = Instruction.next_instruction(self.SYN_MEM, self.SYN_PTR) + instruction.run(self) diff --git a/synacor.py b/synacor.py index 593341d..db96583 100755 --- a/synacor.py +++ b/synacor.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # synacor.py - An implementation of the Synacor Challenge -# Copyright © 2016 RunasSudo +# Copyright © 2017 RunasSudo # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as published by @@ -15,136 +15,13 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . -import struct # for bytes<-->word handling -import sys # for args, stdin +from libsynacor import * -SYN_PTR = 0 -SYN_MEM = [0] * 32768 -SYN_REG = [0] * 8 -SYN_STK = [] -SYN_STDIN_BUF = [] +import sys -DBG_FLAG = False -DBG_CSTK = [] +cpu = CPU() +with open(sys.argv[1], 'rb') as data: + cpu.SYN_MEM = memory_from_file(data) -class OpLiteral: - def __init__(self, value): - self.value = value; - def get(self): - return self.value; - def set(self, value): - raise Exception('Attempted to set literal value {} to {} at {}'.format(self.value, value, SYN_PTR)) - -class OpRegister: - def __init__(self, register): - self.register = register; - def get(self): - return SYN_REG[self.register]; - def set(self, value): - SYN_REG[self.register] = value; - -def readOp(word): - if 0 <= word <= 32767: - return OpLiteral(word) - if 32768 <= word <= 32775: - return OpRegister(word - 32768) - raise Exception('Invalid word {} at {}'.format(word, SYN_PTR)) - -def swallowOp(): - global SYN_PTR - op = readOp(SYN_MEM[SYN_PTR]) - SYN_PTR += 1 - return op - -if len(sys.argv) > 1: - dbg_args = sys.argv[1:] - with open(dbg_args[0] + '.py', 'r') as f: - exec(f.read(), globals(), locals()) -else: - # Read code into memory - with open('challenge.bin', 'rb') as data: - while True: - byteData = data.read(2) - if len(byteData) < 2: - break - SYN_MEM[SYN_PTR] = struct.unpack(' swallowOp().get() else 0) - elif instruction == 6: #JMP - SYN_PTR = swallowOp().get() - elif instruction == 7: #JT (jump if not zero) - a, b = swallowOp().get(), swallowOp().get() - if a != 0: - SYN_PTR = b - elif instruction == 8: #JF (jump if zero) - a, b = swallowOp().get(), swallowOp().get() - if a == 0: - SYN_PTR = b - elif instruction == 9: #ADD - swallowOp().set((swallowOp().get() + swallowOp().get()) % 32768) - elif instruction == 10: #MULT - swallowOp().set((swallowOp().get() * swallowOp().get()) % 32768) - elif instruction == 11: #MOD - swallowOp().set(swallowOp().get() % swallowOp().get()) - elif instruction == 12: #AND - swallowOp().set(swallowOp().get() & swallowOp().get()) - elif instruction == 13: #OR - swallowOp().set(swallowOp().get() | swallowOp().get()) - elif instruction == 14: #NOT - swallowOp().set(~swallowOp().get() % 32768) # mathemagic - elif instruction == 15: #RMEM - swallowOp().set(SYN_MEM[swallowOp().get()]) - elif instruction == 16: #WMEM - a, b = swallowOp().get(), swallowOp().get() - SYN_MEM[a] = b # order of operations - elif instruction == 17: #CALL - SYN_STK.append(SYN_PTR + 1) # skip one word for the operand - addr = swallowOp().get() - DBG_CSTK.append(addr) - SYN_PTR = addr - elif instruction == 18: #RET - if len(SYN_STK) == 0: - raise Exception('Attempted to return with empty stack at {}'.format(SYN_PTR)) - SYN_PTR = SYN_STK.pop() - - if len(DBG_CSTK) > 0: - DBG_CSTK.pop() - elif instruction == 19: #OUT - print(chr(swallowOp().get()), end='') - elif instruction == 20: #IN - while len(SYN_STDIN_BUF) == 0: - line = sys.stdin.readline() - if line.startswith('.'): # debug command - dbg_args = line.rstrip()[1:].split() - with open(dbg_args[0] + '.py', 'r') as f: - SYN_PTR -= 1; exec(f.read(), globals(), locals()); SYN_PTR += 1 - else: - SYN_STDIN_BUF = list(line) - - swallowOp().set(ord(SYN_STDIN_BUF.pop(0))) - else: - raise Exception('Unimplemented opcode {} at {}'.format(instruction, SYN_PTR)) + cpu.step() From 86bcc094faf4cfbfb896d5362f615ff8690bac4d Mon Sep 17 00:00:00 2001 From: RunasSudo Date: Sun, 12 Feb 2017 22:43:28 +1030 Subject: [PATCH 03/10] Port debug scripts to new API --- .gitignore | 2 ++ README.md | 8 +++--- dbg_dump.py => dbg/dump.py | 10 +++---- dbg_pdb.py => dbg/fastboot.py | 10 ++++++- dbg_load.py => dbg/load.py | 10 +++---- dbg_speedrun.py => dbg/speedrun.py | 11 -------- dbg_teleporter.py => dbg/teleporter.py | 6 ++-- dbg_fastboot.py | 39 -------------------------- libsynacor/bytecode.py | 8 +++++- notes.md | 6 ++-- synacor.py | 5 ++++ 11 files changed, 43 insertions(+), 72 deletions(-) rename dbg_dump.py => dbg/dump.py (86%) rename dbg_pdb.py => dbg/fastboot.py (81%) rename dbg_load.py => dbg/load.py (84%) rename dbg_speedrun.py => dbg/speedrun.py (87%) rename dbg_teleporter.py => dbg/teleporter.py (89%) delete mode 100644 dbg_fastboot.py diff --git a/.gitignore b/.gitignore index 98bb947..ba27fb4 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,5 @@ /tools/ackermann.o /tools/venv /tools/*.pdf + +__pycache__ diff --git a/README.md b/README.md index 29d6345..be48290 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # synacor.py -My sort-of-OOP, ~~poorly-documented~~concise response to the Synacor challenge +My OOP, ~~poorly-documented~~ ~~concise~~ working response to the Synacor challenge ## Debug commands @@ -12,15 +12,15 @@ This will execute the file `.py` with `dbg_args[0]` set to `` and ` + ./synacor.py For example, to load the `dumps/init` state to skip the self-test and decryption, run: - ./synacor.py dbg_load dumps/init + ./synacor.py challenge.bin dbg/load dumps/init Dump files are stored in Python [pickle](https://docs.python.org/3/library/pickle.html) format, so if you want to inspect the memory in a hex editor, for example, it will be necessary to extract a raw memory dump: diff --git a/dbg_dump.py b/dbg/dump.py similarity index 86% rename from dbg_dump.py rename to dbg/dump.py index 89acbb2..0b562f7 100644 --- a/dbg_dump.py +++ b/dbg/dump.py @@ -1,5 +1,5 @@ # synacor.py - An implementation of the Synacor Challenge -# Copyright © 2016 RunasSudo +# Copyright © 2016–2017 RunasSudo # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as published by @@ -20,10 +20,10 @@ if len(dbg_args) < 2: print('Usage: .{} '.format(dbg_args[0])) else: model = { - 'SYN_PTR': SYN_PTR, - 'SYN_MEM': SYN_MEM, - 'SYN_REG': SYN_REG, - 'SYN_STK': SYN_STK + 'SYN_PTR': cpu.SYN_PTR, + 'SYN_MEM': cpu.SYN_MEM, + 'SYN_REG': cpu.SYN_REG, + 'SYN_STK': cpu.SYN_STK } with open(dbg_args[1], 'wb') as f: diff --git a/dbg_pdb.py b/dbg/fastboot.py similarity index 81% rename from dbg_pdb.py rename to dbg/fastboot.py index aa9dc03..41c7750 100644 --- a/dbg_pdb.py +++ b/dbg/fastboot.py @@ -14,4 +14,12 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . -DBG_FLAG = True +# Emulate 06bb +for R1 in range(0x17b4, 0x7562): + R0 = cpu.SYN_MEM[R1] + R0 ^= pow(R1, 2, 32768) + R0 ^= 0x4154 + cpu.SYN_MEM[R1] = R0 + +# Jump past self-test +cpu.SYN_PTR = 0x0377 diff --git a/dbg_load.py b/dbg/load.py similarity index 84% rename from dbg_load.py rename to dbg/load.py index 0e46501..a2d4c8f 100644 --- a/dbg_load.py +++ b/dbg/load.py @@ -1,5 +1,5 @@ # synacor.py - An implementation of the Synacor Challenge -# Copyright © 2016 RunasSudo +# Copyright © 2016–2017 RunasSudo # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as published by @@ -22,7 +22,7 @@ else: with open(dbg_args[1], 'rb') as f: model = pickle.load(f) - SYN_PTR = model['SYN_PTR'] - SYN_MEM = model['SYN_MEM'] - SYN_REG = model['SYN_REG'] - SYN_STK = model['SYN_STK'] + cpu.SYN_PTR = model['SYN_PTR'] + cpu.SYN_MEM = model['SYN_MEM'] + cpu.SYN_REG = model['SYN_REG'] + cpu.SYN_STK = model['SYN_STK'] diff --git a/dbg_speedrun.py b/dbg/speedrun.py similarity index 87% rename from dbg_speedrun.py rename to dbg/speedrun.py index a1dccf9..6807120 100644 --- a/dbg_speedrun.py +++ b/dbg/speedrun.py @@ -97,16 +97,5 @@ take mirror use mirror """ -# Read code into memory -SYN_PTR = 0 -with open('challenge.bin', 'rb') as data: - while True: - byteData = data.read(2) - if len(byteData) < 2: - break - SYN_MEM[SYN_PTR] = struct.unpack('. # Set R7 to 6486 -SYN_REG[7] = 0x6486 +cpu.SYN_REG[7] = 0x6486 # Patch instructions 1571 to 1579 inclusive with nop's -SYN_MEM[0x1571:0x157a] = [21] * 9 +cpu.SYN_MEM[0x1571:0x157a] = [21] * 9 print('Patched. Ready to run "use teleporter".') diff --git a/dbg_fastboot.py b/dbg_fastboot.py deleted file mode 100644 index 44b686b..0000000 --- a/dbg_fastboot.py +++ /dev/null @@ -1,39 +0,0 @@ -# synacor.py - An implementation of the Synacor Challenge -# Copyright © 2017 RunasSudo -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . - -import pickle - -# Read code into memory -SYN_MEM = [0] * 32768 - -with open('challenge.bin', 'rb') as data: - i = 0 - while True: - byteData = data.read(2) - if len(byteData) < 2: - break - SYN_MEM[i] = struct.unpack(' dumps/init.asm @@ -232,7 +232,7 @@ Running the algorithm, the correct value is revealed to be `0x6486`. Now we simp I've implemented this as a debug function to prepare the teleporter: - > .dbg_teleporter + > .dbg/teleporter Patched. Ready to run "use teleporter". > use teleporter @@ -284,4 +284,4 @@ Given that you've made it this far (You didn't cheat, did you? I did warn you at Now that we've solved the puzzle, the only thing left is to write a [tool-assisted speed-run](https://github.com/RunasSudo/synacor.py/blob/master/dbg_speedrun.py) to completely break any given instance of the challenge in 5 seconds. - time python -u synacor.py dbg_speedrun | head -n 849 + time python -u synacor.py challenge.bin dbg/speedrun | head -n 849 diff --git a/synacor.py b/synacor.py index db96583..1d23249 100755 --- a/synacor.py +++ b/synacor.py @@ -23,5 +23,10 @@ cpu = CPU() with open(sys.argv[1], 'rb') as data: cpu.SYN_MEM = memory_from_file(data) +if len(sys.argv) > 2: + dbg_args = sys.argv[2:] + with open(dbg_args[0] + '.py', 'r') as f: + exec(f.read(), globals(), locals()) + while True: cpu.step() From 2418376c0ac6322f84f3d7f02cdf912b3a2d5c09 Mon Sep 17 00:00:00 2001 From: RunasSudo Date: Sun, 12 Feb 2017 23:01:56 +1030 Subject: [PATCH 04/10] Port disassembler to new API --- disasm.py | 89 ++++++++++++++++++++++++ libsynacor/bytecode.py | 12 ++++ tools/disasm.py | 154 ----------------------------------------- 3 files changed, 101 insertions(+), 154 deletions(-) create mode 100755 disasm.py delete mode 100755 tools/disasm.py diff --git a/disasm.py b/disasm.py new file mode 100755 index 0000000..13edaa1 --- /dev/null +++ b/disasm.py @@ -0,0 +1,89 @@ +#!/usr/bin/env python3 +# synacor.py - An implementation of the Synacor Challenge +# Copyright © 2017 RunasSudo +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +from libsynacor import * + +import sys + +with open(sys.argv[1], 'rb') as data: + SYN_MEM = memory_from_file(data) + +def escape_char(char): + return char.replace('\\', '\\\\').replace('\n', '\\n').replace('"', '\\"') + +MODE_OUT = False +MODE_DAT = False #False, 1 (data), 2 (text) + +SYN_PTR = 0 + +while SYN_PTR < len(SYN_MEM): + word = SYN_MEM[SYN_PTR] + + if MODE_OUT and word != 19: + print('"') + MODE_OUT = False + if MODE_DAT and 0 <= word <= 21: + if MODE_DAT == 1: + print() + if MODE_DAT == 2: + print('"') + MODE_DAT = False + + if word not in instructions_by_opcode: + # Data + if 32 <= word <= 126: + # looks like letters - unfortunately "\n" looks like MULT + if MODE_DAT == 2: + print(escape_char(chr(word)), end='') + else: + if MODE_DAT == 1: + print() + print('{:04x}: data "{}'.format(SYN_PTR, escape_char(chr(word))), end='') + MODE_DAT = 2 + if word == 0x0a: + print('"') + MODE_DAT = False # break on newlines + else: + if MODE_DAT == 1: + print(' {:04x}'.format(word), end='') + else: + if MODE_DAT == 2: + print('"') + print('{:04x}: data {:04x}'.format(SYN_PTR, word), end='') + MODE_DAT = 1 + SYN_PTR += 1 + else: + # Instruction + instruction, SYN_PTR = Instruction.next_instruction(SYN_MEM, SYN_PTR) + # Special cases + if isinstance(instruction, InstructionOut): + if isinstance(instruction.args[0], OpLiteral): + if MODE_OUT: + print(escape_char(chr(instruction.args[0].get(None))), end='') + else: + print('{:04x}: out "{}'.format(SYN_PTR, escape_char(chr(instruction.args[0].get(None)))), end='') + MODE_OUT = True + if instruction.args[0].get(None) == 0x0a: + print('"') + MODE_OUT = False # break on newlines + else: + if MODE_OUT: + print('"') + MODE_OUT = False + print('{:04x}: {}'.format(SYN_PTR, instruction.describe())) + else: + print('{:04x}: {}'.format(SYN_PTR, instruction.describe())) diff --git a/libsynacor/bytecode.py b/libsynacor/bytecode.py index 544d01e..d62497c 100644 --- a/libsynacor/bytecode.py +++ b/libsynacor/bytecode.py @@ -30,6 +30,9 @@ class OpLiteral(Operand): return self.value def set(self, cpu, value): raise Exception('Attempted to set literal value {} to {} at {}'.format(self.value, value, cpu.SYN_PTR)) + + def describe(self): + return '{:04x}'.format(self.value) class OpRegister(Operand): def __init__(self, register): @@ -38,6 +41,9 @@ class OpRegister(Operand): return cpu.SYN_REG[self.register] def set(self, cpu, value): cpu.SYN_REG[self.register] = value + + def describe(self): + return 'R{}'.format(self.register) instructions_by_opcode = {} instructions_by_name = {} @@ -58,6 +64,12 @@ class Instruction: def __init__(self): self.args = [] + def describe(self): + description = '{: <4}'.format(self.name) + for i in range(self.nargs): + description += ' {}'.format(self.args[i].describe()) + return description + @staticmethod def next_instruction(data, idx): opcode = Operand.read_op(data[idx]) diff --git a/tools/disasm.py b/tools/disasm.py deleted file mode 100755 index b02a89a..0000000 --- a/tools/disasm.py +++ /dev/null @@ -1,154 +0,0 @@ -#!/usr/bin/env python3 -# synacor.py - An implementation of the Synacor Challenge -# Copyright © 2017 RunasSudo -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . - -import struct # for bytes<-->word handling -import sys # for args - -class OpLiteral: - def __init__(self, value): - self.value = value; - def get(self): - return '{:04x}'.format(self.value); - def set(self): - return '{:04x}'.format(self.value); - -class OpRegister: - def __init__(self, register): - self.register = register; - def get(self): - return 'R{}'.format(self.register); - def set(self): - return 'R{}'.format(self.register); - -def readWord(): - byteData = data.read(2) - if len(byteData) < 2: - return None - return struct.unpack(' Date: Mon, 13 Feb 2017 14:43:59 +1030 Subject: [PATCH 05/10] Fix speedrun --- dbg/speedrun.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbg/speedrun.py b/dbg/speedrun.py index 6807120..652c5be 100644 --- a/dbg/speedrun.py +++ b/dbg/speedrun.py @@ -68,7 +68,7 @@ use corroded coin north take teleporter use teleporter -.dbg_teleporter +.dbg/teleporter use teleporter north north From 81039ce304e42777c6648883ad2b1d381092cec1 Mon Sep 17 00:00:00 2001 From: RunasSudo Date: Mon, 13 Feb 2017 14:44:36 +1030 Subject: [PATCH 06/10] Implement basic assembler --- asm.py | 151 +++++++++++++++++++++++++++++++++++++++++ disasm.py | 10 ++- libsynacor/bytecode.py | 14 ++++ 3 files changed, 172 insertions(+), 3 deletions(-) create mode 100755 asm.py diff --git a/asm.py b/asm.py new file mode 100755 index 0000000..e969f1a --- /dev/null +++ b/asm.py @@ -0,0 +1,151 @@ +#!/usr/bin/env python3 +# synacor.py - An implementation of the Synacor Challenge +# Copyright © 2017 RunasSudo +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +from libsynacor import * + +import argparse +import struct + +parser = argparse.ArgumentParser() +parser.add_argument('file', help='.asm file to read') +parser.add_argument('output', help='.bin file to write') +args = parser.parse_args() + +line_no = 0 + +def split_line(line): + tokens = [] + token = '' + idx = 0 + + in_comment = False + in_string = False + in_escape = False + + while idx < len(line): + if in_comment: + pass + elif in_string: + if in_escape: + token += line[idx] + else: + if line[idx] == '\\': + in_escape = True + token += line[idx] + elif line[idx] == '"': + in_string = False + token += line[idx] + else: + token += line[idx] + else: + if line[idx] == ' ': + if token != '': + tokens.append(token) + token = '' + elif line[idx] == '"': + in_string = True + token += line[idx] + elif line[idx] == ';': + in_comment = True + else: + token += line[idx] + idx += 1 + # Final token + if token != '': + tokens.append(token) + + return tokens + +def unescape_char(char): + return char.encode('utf-8').decode('unicode_escape') + +def assemble_next_instruction(source): + line = source.readline() + global line_no; line_no += 1 + if line == '': + return None, [] + + tokens = split_line(line.strip()) + return assemble_instruction(source, tokens) + +def assemble_instruction(source, tokens): + if len(tokens) == 0: + return assemble_next_instruction(source) + if tokens[0].endswith(':'): + # Label + label = tokens[0][:-1] + instructions, inst_labels = assemble_instruction(source, tokens[1:]) + return instructions, inst_labels + [label] + else: + # Instruction + name = tokens[0] + if name not in instructions_by_name: + raise Exception('Unknown instruction {}'.format(name)) + instruction = instructions_by_name[name]() + + # Special cases + if isinstance(instruction, InstructionOut) and tokens[1].startswith('"'): + chars = unescape_char(tokens[1][1:-1]) + instructions = [] + for char in chars: + instruction = InstructionOut() + instruction.args.append(OpLiteral(ord(char))) + instructions.append(instruction) + return instructions, [] + elif isinstance(instruction, InstructionData): + if tokens[1].startswith('"'): + chars = unescape_char(tokens[1][1:-1]) + instruction.args = [ord(char) for char in chars] + return [instruction], [] + else: + instruction.args = [int(x, 16) for x in tokens[1:]] + return [instruction], [] + else: + if len(tokens) != instruction.nargs + 1: + raise Exception('Invalid number of arguments: Expected {}, got {}'.format(instruction.nargs, len(tokens) - 1)) + for i in range(instruction.nargs): + argstr = tokens[i + 1] + if argstr.startswith('R'): + # Register + arg = OpRegister(int(argstr[1:])) + else: + # Hex literal + arg = OpLiteral(int(argstr, 16)) + instruction.args.append(arg) + return [instruction], [] + +# TODO: First pass + +# Second pass +SYN_MEM = [0] * 32768 +SYN_PTR = 0 + +with open(args.file, 'r') as source: + try: + while True: + instructions, labels = assemble_next_instruction(source) + if instructions is None: + break + for instruction in instructions: + code = instruction.assemble() + SYN_MEM[SYN_PTR:SYN_PTR+len(code)] = code + SYN_PTR += len(code) + except Exception as ex: + raise Exception('Error at line {}'.format(line_no)) from ex + +with open(args.output, 'wb') as f: + f.write(struct.pack('<32768H', *SYN_MEM)) diff --git a/disasm.py b/disasm.py index 13edaa1..b6457cb 100755 --- a/disasm.py +++ b/disasm.py @@ -17,13 +17,17 @@ from libsynacor import * -import sys +import argparse -with open(sys.argv[1], 'rb') as data: +parser = argparse.ArgumentParser() +parser.add_argument('file', help='.bin file containing the initial memory dump') +args = parser.parse_args() + +with open(args.file, 'rb') as data: SYN_MEM = memory_from_file(data) def escape_char(char): - return char.replace('\\', '\\\\').replace('\n', '\\n').replace('"', '\\"') + return char.encode('unicode_escape').decode('utf-8').replace('"', '\\"') MODE_OUT = False MODE_DAT = False #False, 1 (data), 2 (text) diff --git a/libsynacor/bytecode.py b/libsynacor/bytecode.py index d62497c..c155788 100644 --- a/libsynacor/bytecode.py +++ b/libsynacor/bytecode.py @@ -33,6 +33,8 @@ class OpLiteral(Operand): def describe(self): return '{:04x}'.format(self.value) + def assemble(self): + return self.value class OpRegister(Operand): def __init__(self, register): @@ -44,6 +46,8 @@ class OpRegister(Operand): def describe(self): return 'R{}'.format(self.register) + def assemble(self): + return self.register + 32768 instructions_by_opcode = {} instructions_by_name = {} @@ -70,6 +74,9 @@ class Instruction: description += ' {}'.format(self.args[i].describe()) return description + def assemble(self): + return [self.opcode] + [self.args[i].assemble() for i in range(self.nargs)] + @staticmethod def next_instruction(data, idx): opcode = Operand.read_op(data[idx]) @@ -217,3 +224,10 @@ class InstructionIn(Instruction): cpu.SYN_STDIN_BUF = list(line) self.args[0].set(cpu, ord(cpu.SYN_STDIN_BUF.pop(0))) + +# Not actually an instruction, but convenient to think of it as one for the purposes of assembling +# self.args is an array of literal values, rather than Operands +class InstructionData(Instruction): + def assemble(self): + return self.args +instructions_by_name['data'] = InstructionData From d8f472a31fd9abcd7c582dc6d3b2c8bf9d929faa Mon Sep 17 00:00:00 2001 From: RunasSudo Date: Mon, 13 Feb 2017 19:06:53 +1030 Subject: [PATCH 07/10] Add label processing to disassembler --- disasm.py | 91 +++++++++++++++++++++++++++++++++- disasm_hints/annotations.txt | 6 +++ disasm_hints/jmpcall.txt | 95 ++++++++++++++++++++++++++++++++++++ 3 files changed, 191 insertions(+), 1 deletion(-) create mode 100644 disasm_hints/annotations.txt create mode 100644 disasm_hints/jmpcall.txt diff --git a/disasm.py b/disasm.py index b6457cb..86084e5 100755 --- a/disasm.py +++ b/disasm.py @@ -21,11 +21,61 @@ import argparse parser = argparse.ArgumentParser() parser.add_argument('file', help='.bin file containing the initial memory dump') +parser.add_argument('--hints', help='File(s) outlining additional jmp/call targets, label names, comments, etc', action='append') args = parser.parse_args() with open(args.file, 'rb') as data: SYN_MEM = memory_from_file(data) +# Find things to label +labels, comments_before, comments_inline = {}, {}, {} +SYN_PTR = 0 +while SYN_PTR < len(SYN_MEM): + word = SYN_MEM[SYN_PTR] + if word in instructions_by_opcode: + instruction, SYN_PTR = Instruction.next_instruction(SYN_MEM, SYN_PTR) + if isinstance(instruction, InstructionJmp) or isinstance(instruction, InstructionJt) or isinstance(instruction, InstructionJf): + if isinstance(instruction, InstructionJmp): + op = instruction.args[0] + else: + op = instruction.args[1] + if isinstance(op, OpLiteral): + loc = op.get(None) + labels['label_{:04x}'.format(loc)] = loc + elif isinstance(instruction, InstructionCall): + if isinstance(instruction.args[0], OpLiteral): + loc = instruction.args[0].get(None) + labels['sub_{:04x}'.format(loc)] = loc + else: + SYN_PTR += 1 +# Read hints +if args.hints: + for hintfile in args.hints: + with open(hintfile, 'r') as data: + while True: + line = data.readline() + if line == '': + break + if line.startswith('jmp '): + loc = int(line.split()[1], 16) + labels['label_{:04x}'.format(loc)] = loc + elif line.startswith('call '): + loc = int(line.split()[1], 16) + labels['sub_{:04x}'.format(loc)] = loc + elif line.startswith('ren '): + old_label = line.split()[1] + new_label = line.split()[2] + labels[new_label] = labels[old_label] + del labels[old_label] + elif line.startswith('cmb '): + loc = int(line.split()[1], 16) + comment = line[line.index(' ', line.index(' ') + 1) + 1:].strip() + if loc not in comments_before: + comments_before[loc] = [] + comments_before[loc].append(comment) + else: + raise Exception('Invalid line in hint file: {}'.format(line)) + def escape_char(char): return char.encode('unicode_escape').decode('utf-8').replace('"', '\\"') @@ -35,6 +85,30 @@ MODE_DAT = False #False, 1 (data), 2 (text) SYN_PTR = 0 while SYN_PTR < len(SYN_MEM): + if SYN_PTR in comments_before: + if MODE_OUT: + print('"') + MODE_OUT = False + if MODE_DAT == 1: + print() + MODE_DAT = False + if MODE_DAT == 2: + print('"') + MODE_DAT = False + for comment in comments_before[SYN_PTR]: + print('; {}'.format(comment)) + if any(v == SYN_PTR for k, v in labels.items()): + if MODE_OUT: + print('"') + MODE_OUT = False + if MODE_DAT == 1: + print() + MODE_DAT = False + if MODE_DAT == 2: + print('"') + MODE_DAT = False + print('${}:'.format(next(k for k, v in labels.items() if v == SYN_PTR))) + word = SYN_MEM[SYN_PTR] if MODE_OUT and word != 19: @@ -72,7 +146,7 @@ while SYN_PTR < len(SYN_MEM): SYN_PTR += 1 else: # Instruction - instruction, SYN_PTR = Instruction.next_instruction(SYN_MEM, SYN_PTR) + instruction, next_SYN_PTR = Instruction.next_instruction(SYN_MEM, SYN_PTR) # Special cases if isinstance(instruction, InstructionOut): if isinstance(instruction.args[0], OpLiteral): @@ -89,5 +163,20 @@ while SYN_PTR < len(SYN_MEM): print('"') MODE_OUT = False print('{:04x}: {}'.format(SYN_PTR, instruction.describe())) + elif isinstance(instruction, InstructionJmp) or isinstance(instruction, InstructionJt) or isinstance(instruction, InstructionJf) or isinstance(instruction, InstructionCall): + if isinstance(instruction, InstructionJmp) or isinstance(instruction, InstructionCall): + op = instruction.args[0] + else: + op = instruction.args[1] + if isinstance(op, OpLiteral): + loc = op.get(None) + if any(v == loc for k, v in labels.items()): + label = next(k for k, v in labels.items() if v == loc) + print('{:04x}: {: <4} ${}'.format(SYN_PTR, instruction.name, label)) + else: + print('{:04x}: {}'.format(SYN_PTR, instruction.describe())) + else: + print('{:04x}: {}'.format(SYN_PTR, instruction.describe())) else: print('{:04x}: {}'.format(SYN_PTR, instruction.describe())) + SYN_PTR = next_SYN_PTR diff --git a/disasm_hints/annotations.txt b/disasm_hints/annotations.txt new file mode 100644 index 0000000..a482af9 --- /dev/null +++ b/disasm_hints/annotations.txt @@ -0,0 +1,6 @@ +jmp 0000 +ren label_0000 start +jmp 0110 +ren label_0110 self_test +cmb 0140 Test jmp +ren label_015b self_test_jmp1 diff --git a/disasm_hints/jmpcall.txt b/disasm_hints/jmpcall.txt new file mode 100644 index 0000000..72d2f23 --- /dev/null +++ b/disasm_hints/jmpcall.txt @@ -0,0 +1,95 @@ +call 0505 +call 0507 +call 05b2 +call 05ee +call 05f8 +call 05fb +call 0607 +call 0623 +call 0634 +call 0645 +call 0653 +call 0670 +call 0683 +call 06bb +call 06e7 +call 0731 +call 07d1 +call 084d +call 0865 +call 086e +call 0882 +call 08c8 +call 08e9 +call 0b94 +call 0cad +call 0d48 +call 0df0 +call 0e48 +call 0e8f +call 0e9e +call 0ec0 +call 0eca +call 0ede +call 0f35 +call 0f98 +call 0fa9 +call 0fcd +call 0fde +call 1000 +call 1011 +call 1022 +call 1033 +call 1044 +call 107a +call 10b7 +call 1135 +call 11a3 +call 11b5 +call 1203 +call 1252 +call 1270 +call 12bf +call 1315 +call 1371 +call 14f0 +call 1501 +call 1512 +call 1523 +call 1534 +call 1545 +call 1659 +call 16b6 +call 16bf +call 16d6 +call 16f4 +call 1705 +call 1721 +call 174c +call 1766 +jmp 015b +jmp 0166 +jmp 01e4 +jmp 02c4 +jmp 02db +jmp 034d +jmp 03e8 +jmp 061e +jmp 06b2 +jmp 06fb +jmp 07e3 +jmp 07ff +jmp 0833 +jmp 08bc +jmp 08cc +jmp 08f8 +jmp 0aae +jmp 0b86 +jmp 0c79 +jmp 0cfe +jmp 0d99 +jmp 0e43 +jmp 112e +jmp 1312 +jmp 1652 +jmp 1747 From 2982d12588873a49cbaf75bd149463f29bbbaaa7 Mon Sep 17 00:00:00 2001 From: RunasSudo Date: Mon, 13 Feb 2017 19:19:19 +1030 Subject: [PATCH 08/10] Implement labels in assembler --- asm.py | 28 +++++++++++++++++++++++++--- disasm.py | 5 ++++- libsynacor/bytecode.py | 23 ++++++++++++++++++----- 3 files changed, 47 insertions(+), 9 deletions(-) diff --git a/asm.py b/asm.py index e969f1a..e4377e4 100755 --- a/asm.py +++ b/asm.py @@ -122,13 +122,35 @@ def assemble_instruction(source, tokens): if argstr.startswith('R'): # Register arg = OpRegister(int(argstr[1:])) + elif argstr.startswith('$'): + # Label + arg = OpLabel(argstr[1:]) else: # Hex literal arg = OpLiteral(int(argstr, 16)) instruction.args.append(arg) return [instruction], [] -# TODO: First pass +# First pass +labels = {} +SYN_MEM = [0] * 32768 +SYN_PTR = 0 + +with open(args.file, 'r') as source: + try: + while True: + instructions, inst_labels = assemble_next_instruction(source) + for label in inst_labels: + if label.startswith('$'): + labels[label[1:]] = SYN_PTR + if instructions is None: + break + for instruction in instructions: + code = instruction.assemble(None) + SYN_MEM[SYN_PTR:SYN_PTR+len(code)] = code + SYN_PTR += len(code) + except Exception as ex: + raise Exception('Error at line {}'.format(line_no)) from ex # Second pass SYN_MEM = [0] * 32768 @@ -137,11 +159,11 @@ SYN_PTR = 0 with open(args.file, 'r') as source: try: while True: - instructions, labels = assemble_next_instruction(source) + instructions, inst_labels = assemble_next_instruction(source) if instructions is None: break for instruction in instructions: - code = instruction.assemble() + code = instruction.assemble(labels) SYN_MEM[SYN_PTR:SYN_PTR+len(code)] = code SYN_PTR += len(code) except Exception as ex: diff --git a/disasm.py b/disasm.py index 86084e5..f89f349 100755 --- a/disasm.py +++ b/disasm.py @@ -172,7 +172,10 @@ while SYN_PTR < len(SYN_MEM): loc = op.get(None) if any(v == loc for k, v in labels.items()): label = next(k for k, v in labels.items() if v == loc) - print('{:04x}: {: <4} ${}'.format(SYN_PTR, instruction.name, label)) + if isinstance(instruction, InstructionJmp) or isinstance(instruction, InstructionCall): + print('{:04x}: {: <4} ${}'.format(SYN_PTR, instruction.name, label)) + else: + print('{:04x}: {: <4} {} ${}'.format(SYN_PTR, instruction.name, instruction.args[0].describe(), label)) else: print('{:04x}: {}'.format(SYN_PTR, instruction.describe())) else: diff --git a/libsynacor/bytecode.py b/libsynacor/bytecode.py index c155788..ea5224c 100644 --- a/libsynacor/bytecode.py +++ b/libsynacor/bytecode.py @@ -33,7 +33,7 @@ class OpLiteral(Operand): def describe(self): return '{:04x}'.format(self.value) - def assemble(self): + def assemble(self, labels): return self.value class OpRegister(Operand): @@ -46,9 +46,22 @@ class OpRegister(Operand): def describe(self): return 'R{}'.format(self.register) - def assemble(self): + def assemble(self, labels): return self.register + 32768 +# Used only in assembling process +class OpLabel(Operand): + def __init__(self, label): + self.label = label + + def assemble(self, labels): + if labels is None: + # First pass + return 0xffff + if self.label not in labels: + raise Exception('Unknown label {}'.format(self.label)) + return OpLiteral(labels[self.label]).assemble(labels) + instructions_by_opcode = {} instructions_by_name = {} @@ -74,8 +87,8 @@ class Instruction: description += ' {}'.format(self.args[i].describe()) return description - def assemble(self): - return [self.opcode] + [self.args[i].assemble() for i in range(self.nargs)] + def assemble(self, labels): + return [self.opcode] + [self.args[i].assemble(labels) for i in range(self.nargs)] @staticmethod def next_instruction(data, idx): @@ -228,6 +241,6 @@ class InstructionIn(Instruction): # Not actually an instruction, but convenient to think of it as one for the purposes of assembling # self.args is an array of literal values, rather than Operands class InstructionData(Instruction): - def assemble(self): + def assemble(self, labels): return self.args instructions_by_name['data'] = InstructionData From f260c3e4b5317bb11f35564a8a4c8204382e6c2c Mon Sep 17 00:00:00 2001 From: RunasSudo Date: Mon, 13 Feb 2017 20:55:08 +1030 Subject: [PATCH 09/10] Annotate self test --- asm.py | 106 ---------------------------- disasm.py | 53 +++++++++----- disasm_hints/annotations.txt | 96 ++++++++++++++++++++++++-- libsynacor/__init__.py | 1 + libsynacor/assembly.py | 129 +++++++++++++++++++++++++++++++++++ libsynacor/bytecode.py | 6 +- 6 files changed, 261 insertions(+), 130 deletions(-) create mode 100644 libsynacor/assembly.py diff --git a/asm.py b/asm.py index e4377e4..58e17df 100755 --- a/asm.py +++ b/asm.py @@ -25,112 +25,6 @@ parser.add_argument('file', help='.asm file to read') parser.add_argument('output', help='.bin file to write') args = parser.parse_args() -line_no = 0 - -def split_line(line): - tokens = [] - token = '' - idx = 0 - - in_comment = False - in_string = False - in_escape = False - - while idx < len(line): - if in_comment: - pass - elif in_string: - if in_escape: - token += line[idx] - else: - if line[idx] == '\\': - in_escape = True - token += line[idx] - elif line[idx] == '"': - in_string = False - token += line[idx] - else: - token += line[idx] - else: - if line[idx] == ' ': - if token != '': - tokens.append(token) - token = '' - elif line[idx] == '"': - in_string = True - token += line[idx] - elif line[idx] == ';': - in_comment = True - else: - token += line[idx] - idx += 1 - # Final token - if token != '': - tokens.append(token) - - return tokens - -def unescape_char(char): - return char.encode('utf-8').decode('unicode_escape') - -def assemble_next_instruction(source): - line = source.readline() - global line_no; line_no += 1 - if line == '': - return None, [] - - tokens = split_line(line.strip()) - return assemble_instruction(source, tokens) - -def assemble_instruction(source, tokens): - if len(tokens) == 0: - return assemble_next_instruction(source) - if tokens[0].endswith(':'): - # Label - label = tokens[0][:-1] - instructions, inst_labels = assemble_instruction(source, tokens[1:]) - return instructions, inst_labels + [label] - else: - # Instruction - name = tokens[0] - if name not in instructions_by_name: - raise Exception('Unknown instruction {}'.format(name)) - instruction = instructions_by_name[name]() - - # Special cases - if isinstance(instruction, InstructionOut) and tokens[1].startswith('"'): - chars = unescape_char(tokens[1][1:-1]) - instructions = [] - for char in chars: - instruction = InstructionOut() - instruction.args.append(OpLiteral(ord(char))) - instructions.append(instruction) - return instructions, [] - elif isinstance(instruction, InstructionData): - if tokens[1].startswith('"'): - chars = unescape_char(tokens[1][1:-1]) - instruction.args = [ord(char) for char in chars] - return [instruction], [] - else: - instruction.args = [int(x, 16) for x in tokens[1:]] - return [instruction], [] - else: - if len(tokens) != instruction.nargs + 1: - raise Exception('Invalid number of arguments: Expected {}, got {}'.format(instruction.nargs, len(tokens) - 1)) - for i in range(instruction.nargs): - argstr = tokens[i + 1] - if argstr.startswith('R'): - # Register - arg = OpRegister(int(argstr[1:])) - elif argstr.startswith('$'): - # Label - arg = OpLabel(argstr[1:]) - else: - # Hex literal - arg = OpLiteral(int(argstr, 16)) - instruction.args.append(arg) - return [instruction], [] - # First pass labels = {} SYN_MEM = [0] * 32768 diff --git a/disasm.py b/disasm.py index f89f349..c3d1ee8 100755 --- a/disasm.py +++ b/disasm.py @@ -28,7 +28,7 @@ with open(args.file, 'rb') as data: SYN_MEM = memory_from_file(data) # Find things to label -labels, comments_before, comments_inline = {}, {}, {} +labels, comments_before, comments_inline, replacements = {}, {}, {}, {} SYN_PTR = 0 while SYN_PTR < len(SYN_MEM): word = SYN_MEM[SYN_PTR] @@ -62,6 +62,9 @@ if args.hints: elif line.startswith('call '): loc = int(line.split()[1], 16) labels['sub_{:04x}'.format(loc)] = loc + elif line.startswith('lbl '): + loc = int(line.split()[1], 16) + labels[line.split()[2]] = loc elif line.startswith('ren '): old_label = line.split()[1] new_label = line.split()[2] @@ -73,18 +76,25 @@ if args.hints: if loc not in comments_before: comments_before[loc] = [] comments_before[loc].append(comment) + elif line.startswith('cmi '): + loc = int(line.split()[1], 16) + comment = line[line.index(' ', line.index(' ') + 1) + 1:].strip() + comments_inline[loc] = comment + elif line.startswith('rep '): + loc = int(line.split()[1], 16) + code = line[line.index(' ', line.index(' ') + 1) + 1:].strip() + instruction = assemble_line(None, code)[0][0] + replacements[loc] = instruction else: raise Exception('Invalid line in hint file: {}'.format(line)) -def escape_char(char): - return char.encode('unicode_escape').decode('utf-8').replace('"', '\\"') - MODE_OUT = False MODE_DAT = False #False, 1 (data), 2 (text) SYN_PTR = 0 while SYN_PTR < len(SYN_MEM): + # Handle comments if SYN_PTR in comments_before: if MODE_OUT: print('"') @@ -97,6 +107,12 @@ while SYN_PTR < len(SYN_MEM): MODE_DAT = False for comment in comments_before[SYN_PTR]: print('; {}'.format(comment)) + if SYN_PTR in comments_inline: + comment_inline = ' ; {}'.format(comments_inline[SYN_PTR]) + else: + comment_inline = '' + + # Handle labels if any(v == SYN_PTR for k, v in labels.items()): if MODE_OUT: print('"') @@ -109,6 +125,13 @@ while SYN_PTR < len(SYN_MEM): MODE_DAT = False print('${}:'.format(next(k for k, v in labels.items() if v == SYN_PTR))) + # Handle replacements + if SYN_PTR in replacements: + instruction = replacements[SYN_PTR] + print('{:04x}: {}{}'.format(SYN_PTR, instruction.describe(), comment_inline)) + SYN_PTR += len(instruction.assemble(None)) + continue + word = SYN_MEM[SYN_PTR] if MODE_OUT and word != 19: @@ -162,24 +185,18 @@ while SYN_PTR < len(SYN_MEM): if MODE_OUT: print('"') MODE_OUT = False - print('{:04x}: {}'.format(SYN_PTR, instruction.describe())) + print('{:04x}: {}{}'.format(SYN_PTR, instruction.describe(), comment_inline)) elif isinstance(instruction, InstructionJmp) or isinstance(instruction, InstructionJt) or isinstance(instruction, InstructionJf) or isinstance(instruction, InstructionCall): if isinstance(instruction, InstructionJmp) or isinstance(instruction, InstructionCall): - op = instruction.args[0] + argidx = 0 else: - op = instruction.args[1] - if isinstance(op, OpLiteral): - loc = op.get(None) + argidx = 1 + if isinstance(instruction.args[argidx], OpLiteral): + loc = instruction.args[argidx].get(None) if any(v == loc for k, v in labels.items()): label = next(k for k, v in labels.items() if v == loc) - if isinstance(instruction, InstructionJmp) or isinstance(instruction, InstructionCall): - print('{:04x}: {: <4} ${}'.format(SYN_PTR, instruction.name, label)) - else: - print('{:04x}: {: <4} {} ${}'.format(SYN_PTR, instruction.name, instruction.args[0].describe(), label)) - else: - print('{:04x}: {}'.format(SYN_PTR, instruction.describe())) - else: - print('{:04x}: {}'.format(SYN_PTR, instruction.describe())) + instruction.args[argidx] = OpLabel(label) + print('{:04x}: {}{}'.format(SYN_PTR, instruction.describe(), comment_inline)) else: - print('{:04x}: {}'.format(SYN_PTR, instruction.describe())) + print('{:04x}: {}{}'.format(SYN_PTR, instruction.describe(), comment_inline)) SYN_PTR = next_SYN_PTR diff --git a/disasm_hints/annotations.txt b/disasm_hints/annotations.txt index a482af9..37b3543 100644 --- a/disasm_hints/annotations.txt +++ b/disasm_hints/annotations.txt @@ -1,6 +1,94 @@ -jmp 0000 -ren label_0000 start -jmp 0110 -ren label_0110 self_test cmb 0140 Test jmp ren label_015b self_test_jmp1 +cmi 0142 jmp lands here if fails +cmi 0162 jmp from 0160 lands here if -2 +cmi 0164 jmp from 0160 lands here if -1 +cmi 0166 jmp from 0160 lands here if correct +cmi 0168 jmp from 0160 lands here if +1 +cmi 016a jmp from 0160 lands here if +2 +ren label_0166 self_test_jmp2 +ren label_0170 self_test_jmp_diagnose-2 +ren label_018d self_test_jmp_diagnose-1 +ren label_01a8 self_test_jmp_diagnose+1 +ren label_01c5 self_test_jmp_diagnose+2 +cmi 016e jmp from 0162 lands here +cmi 018c jmp from 0164 lands here +cmi 01a9 jmp from 0168 lands here +cmi 01c7 jmp from 016a lands here +cmb 01e4 Test jt/jf +cmb 01e4 ?? Does not test behaviour for values other than 0 and 1 +ren label_01e4 self_test_jtjf +ren label_0432 self_test_jtjf_fail +cmi 01e4 Jump to fail if 0 is true +cmi 01e7 Jump to fail if 1 is false +ren label_01ef self_test_jtjf2 +ren label_01f4 self_test_regzero +cmi 01ed Fail if 1 is not true +cmi 01f2 Fail if 0 is not false +cmb 01f4 Test that all registers are zero +cmb 01f4 ?? Because this uses jt/jf, errors may not be detected if involving values other than 0 and 1 +ren label_0445 self_test_regzero_fail +cmb 020c Test set +ren label_045e self_test_set_fail +cmb 0218 Test add +cmb 0218 ?? This only tests if 1 + 1 != 0, and will fail to detect almost all other errors +cmi 021c Dodgy! +ren label_0234 self_test_add +cmb 0234 Test add +cmb 0234 ?? This reuses the result from the add test, so will erroneously report an eq failure (instead of an add failure) if 1 + 1 != 2 +cmb 0234 It would probably have been a better idea to test eq first, then use that to verify add +cmi 0238 Dodgy! +ren label_024e self_test_pushpop +cmb 024e Test push/pop by exchanging R0 and R1 +cmb 024e ?? Because R1 is reused, the test will erroneously report a push/pop failure (instead of an eq failure) if eq returns any other truthy value in the previous test +cmi 025d Dodgy! +ren label_0486 self_test_pushpop_fail +cmb 0264 Test gt +ren label_0473 self_test_gt_fail +cmb 0279 Test and +ren label_0499 self_test_and_fail +cmb 0284 Test or +cmb 02ac Test not +ren label_02ac self_test_not +ren label_04b8 self_test_not_fail +cmb 02c0 Test call +ren sub_0505 self_test_call_subroutine1 +lbl 02c2 self_test_call_returnloc1 +ren label_0509 self_test_call_fail +ren label_02c4 self_test_call_check_stack1 +rep 02c6 eq R1 R0 $self_test_call_check_stack1 +cmi 02c6 This test is strange. If R0 == 02c2 as tested below, R0 != 02c4 is guaranteed +rep 02cd eq R1 R0 $self_test_call_returnloc1 +cmb 02d4 Test call register value +ren sub_0507 self_test_call_subroutine2 +lbl 02d9 self_test_call_returnloc2 +rep 02d4 set R0 $self_test_call_subroutine2 +ren label_02db self_test_call_check_stack2 +rep 02dd eq R1 R0 $self_test_call_check_stack2 +rep 02e4 eq R1 R0 $self_test_call_returnloc2 +cmb 02eb Test add overflow +ren label_0520 self_test_overflow_fail +cmi 0304 Just to be safe! +cmb 030b Test mult +cmi 030f Test for HHGTTG (non-)compatibility +ren label_0565 self_test_hhgttg_fail +ren label_0586 self_test_mult_fail +cmb 0328 Test mod +ren label_059d self_test_mod_fail +ren label_034d self_test_rwmem +lbl 034b self_test_rwmem_data +cmb 034b Test rmem/wmem +ren label_04d7 self_test_rmem_fail +ren label_04ee self_test_wmem_fail +rep 034d rmem R0 $self_test_rwmem_data +rep 0357 add R2 $self_test_rwmem_data 0001 +rep 0365 set R0 $self_test_rwmem_data +ren sub_06bb decrypt_data +cmi 0375 Sneaky! +lbl 17b4 encrypted_data +rep 0377 rmem R0 $encrypted_data +rep 0381 add R2 $encrypted_data 0001 +ren label_03ad self_test_wmem_cmd_fail +rep 03ab out $self_test_complete +cmi 03a9 Becomes noop; jt 0013 $self_test_complete +lbl 03d2 self_test_complete diff --git a/libsynacor/__init__.py b/libsynacor/__init__.py index 9595d2b..08d899e 100644 --- a/libsynacor/__init__.py +++ b/libsynacor/__init__.py @@ -16,4 +16,5 @@ from .binfile import * from .bytecode import * +from .assembly import * from .cpu import * diff --git a/libsynacor/assembly.py b/libsynacor/assembly.py new file mode 100644 index 0000000..d58651e --- /dev/null +++ b/libsynacor/assembly.py @@ -0,0 +1,129 @@ +# synacor.py - An implementation of the Synacor Challenge +# Copyright © 2017 RunasSudo +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +from libsynacor import * + +def escape_char(char): + return char.encode('unicode_escape').decode('utf-8').replace('"', '\\"') + +def unescape_char(char): + return char.encode('utf-8').decode('unicode_escape') + +def split_line(line): + tokens = [] + token = '' + idx = 0 + + in_comment = False + in_string = False + in_escape = False + + while idx < len(line): + if in_comment: + pass + elif in_string: + if in_escape: + token += line[idx] + else: + if line[idx] == '\\': + in_escape = True + token += line[idx] + elif line[idx] == '"': + in_string = False + token += line[idx] + else: + token += line[idx] + else: + if line[idx] == ' ': + if token != '': + tokens.append(token) + token = '' + elif line[idx] == '"': + in_string = True + token += line[idx] + elif line[idx] == ';': + in_comment = True + else: + token += line[idx] + idx += 1 + # Final token + if token != '': + tokens.append(token) + + return tokens + +line_no = 0 + +def assemble_next_instruction(source): + line = source.readline() + global line_no; line_no += 1 + if line == '': + return None, [] + + return assemble_line(source, line) + +def assemble_line(source, line): + tokens = split_line(line.strip()) + return assemble_instruction(source, tokens) + +def assemble_instruction(source, tokens): + if len(tokens) == 0: + return assemble_next_instruction(source) + if tokens[0].endswith(':'): + # Label + label = tokens[0][:-1] + instructions, inst_labels = assemble_instruction(source, tokens[1:]) + return instructions, inst_labels + [label] + else: + # Instruction + name = tokens[0] + if name not in instructions_by_name: + raise Exception('Unknown instruction {}'.format(name)) + instruction = instructions_by_name[name]() + + # Special cases + if isinstance(instruction, InstructionOut) and tokens[1].startswith('"'): + chars = unescape_char(tokens[1][1:-1]) + instructions = [] + for char in chars: + instruction = InstructionOut() + instruction.args.append(OpLiteral(ord(char))) + instructions.append(instruction) + return instructions, [] + elif isinstance(instruction, InstructionData): + if tokens[1].startswith('"'): + chars = unescape_char(tokens[1][1:-1]) + instruction.args = [ord(char) for char in chars] + return [instruction], [] + else: + instruction.args = [int(x, 16) for x in tokens[1:]] + return [instruction], [] + else: + if len(tokens) != instruction.nargs + 1: + raise Exception('Invalid number of arguments: Expected {}, got {}'.format(instruction.nargs, len(tokens) - 1)) + for i in range(instruction.nargs): + argstr = tokens[i + 1] + if argstr.startswith('R'): + # Register + arg = OpRegister(int(argstr[1:])) + elif argstr.startswith('$'): + # Label + arg = OpLabel(argstr[1:]) + else: + # Hex literal + arg = OpLiteral(int(argstr, 16)) + instruction.args.append(arg) + return [instruction], [] diff --git a/libsynacor/bytecode.py b/libsynacor/bytecode.py index ea5224c..479f687 100644 --- a/libsynacor/bytecode.py +++ b/libsynacor/bytecode.py @@ -49,11 +49,13 @@ class OpRegister(Operand): def assemble(self, labels): return self.register + 32768 -# Used only in assembling process +# Used only in dis/assembling process class OpLabel(Operand): def __init__(self, label): self.label = label + def describe(self): + return '${}'.format(self.label) def assemble(self, labels): if labels is None: # First pass @@ -85,7 +87,7 @@ class Instruction: description = '{: <4}'.format(self.name) for i in range(self.nargs): description += ' {}'.format(self.args[i].describe()) - return description + return description.strip() def assemble(self, labels): return [self.opcode] + [self.args[i].assemble(labels) for i in range(self.nargs)] From 7aaaa45d4eb34636910eba80b7b81868709174c3 Mon Sep 17 00:00:00 2001 From: RunasSudo Date: Mon, 13 Feb 2017 22:00:14 +1030 Subject: [PATCH 10/10] Annotate some more of the code --- disasm.py | 147 ++++++++++++++++++++--------------- disasm_hints/annotations.txt | 50 +++++++++--- 2 files changed, 123 insertions(+), 74 deletions(-) diff --git a/disasm.py b/disasm.py index c3d1ee8..cd21734 100755 --- a/disasm.py +++ b/disasm.py @@ -22,15 +22,29 @@ import argparse parser = argparse.ArgumentParser() parser.add_argument('file', help='.bin file containing the initial memory dump') parser.add_argument('--hints', help='File(s) outlining additional jmp/call targets, label names, comments, etc', action='append') +parser.add_argument('--smart', help='Given a raw Synacor challenge file, disassemble in a Synacor-aware manner', action='store_true') +parser.add_argument('--aggressive-labels', help='Replace values with corresponding labels irrespective of where they appear', action='store_true') args = parser.parse_args() with open(args.file, 'rb') as data: SYN_MEM = memory_from_file(data) + disassemble_end = len(SYN_MEM) + +labels, comments_before, comments_inline, replacements = {}, {}, {}, {} + +# Do smart things if requested +if args.smart: + disassemble_end = 0x17b4 + # Emulate 06bb to decrypt data + for R1 in range(disassemble_end, 0x7562): + R0 = SYN_MEM[R1] + R0 ^= pow(R1, 2, 32768) + R0 ^= 0x4154 + SYN_MEM[R1] = R0 # Find things to label -labels, comments_before, comments_inline, replacements = {}, {}, {}, {} SYN_PTR = 0 -while SYN_PTR < len(SYN_MEM): +while SYN_PTR < disassemble_end: word = SYN_MEM[SYN_PTR] if word in instructions_by_opcode: instruction, SYN_PTR = Instruction.next_instruction(SYN_MEM, SYN_PTR) @@ -93,18 +107,48 @@ MODE_DAT = False #False, 1 (data), 2 (text) SYN_PTR = 0 +def set_mode_out(mode): + global MODE_OUT + if MODE_OUT == mode: + pass + elif mode == False: + # Switching off + print('"') + else: + # Switching on + print('{:04x}: out "'.format(SYN_PTR), end='') + MODE_OUT = mode +def set_mode_dat(mode): + global MODE_DAT + if MODE_DAT == mode: + pass + elif mode == False: + # Switching off + if MODE_DAT == 2: + print('"', end='') + print() + elif MODE_DAT == 1: + # Switching from data to text + print() + print('{:04x}: data "'.format(SYN_PTR), end='') + elif MODE_DAT == 2: + # Switching from text to data + print('"') + print('{:04x}: data'.format(SYN_PTR), end='') + else: + # Switching to a new mode + print('{:04x}: data'.format(SYN_PTR), end='') + if mode == 2: + print('"', end='') + MODE_DAT = mode +def clear_modes(): + set_mode_out(False) + set_mode_dat(False) + while SYN_PTR < len(SYN_MEM): # Handle comments if SYN_PTR in comments_before: - if MODE_OUT: - print('"') - MODE_OUT = False - if MODE_DAT == 1: - print() - MODE_DAT = False - if MODE_DAT == 2: - print('"') - MODE_DAT = False + clear_modes() for comment in comments_before[SYN_PTR]: print('; {}'.format(comment)) if SYN_PTR in comments_inline: @@ -114,15 +158,7 @@ while SYN_PTR < len(SYN_MEM): # Handle labels if any(v == SYN_PTR for k, v in labels.items()): - if MODE_OUT: - print('"') - MODE_OUT = False - if MODE_DAT == 1: - print() - MODE_DAT = False - if MODE_DAT == 2: - print('"') - MODE_DAT = False + clear_modes() print('${}:'.format(next(k for k, v in labels.items() if v == SYN_PTR))) # Handle replacements @@ -134,38 +170,17 @@ while SYN_PTR < len(SYN_MEM): word = SYN_MEM[SYN_PTR] - if MODE_OUT and word != 19: - print('"') - MODE_OUT = False - if MODE_DAT and 0 <= word <= 21: - if MODE_DAT == 1: - print() - if MODE_DAT == 2: - print('"') - MODE_DAT = False - - if word not in instructions_by_opcode: + if SYN_PTR >= disassemble_end or word not in instructions_by_opcode: # Data if 32 <= word <= 126: - # looks like letters - unfortunately "\n" looks like MULT - if MODE_DAT == 2: - print(escape_char(chr(word)), end='') - else: - if MODE_DAT == 1: - print() - print('{:04x}: data "{}'.format(SYN_PTR, escape_char(chr(word))), end='') - MODE_DAT = 2 + # Looks like letters - unfortunately "\n" looks like MULT + set_mode_dat(2) + print(escape_char(chr(word)), end='') if word == 0x0a: - print('"') - MODE_DAT = False # break on newlines + clear_modes() # Break on newlines else: - if MODE_DAT == 1: - print(' {:04x}'.format(word), end='') - else: - if MODE_DAT == 2: - print('"') - print('{:04x}: data {:04x}'.format(SYN_PTR, word), end='') - MODE_DAT = 1 + set_mode_dat(1) + print(' {:04x}'.format(word), end='') SYN_PTR += 1 else: # Instruction @@ -173,30 +188,36 @@ while SYN_PTR < len(SYN_MEM): # Special cases if isinstance(instruction, InstructionOut): if isinstance(instruction.args[0], OpLiteral): - if MODE_OUT: - print(escape_char(chr(instruction.args[0].get(None))), end='') - else: - print('{:04x}: out "{}'.format(SYN_PTR, escape_char(chr(instruction.args[0].get(None)))), end='') - MODE_OUT = True + set_mode_out(True) + print(escape_char(chr(instruction.args[0].get(None))), end='') if instruction.args[0].get(None) == 0x0a: - print('"') - MODE_OUT = False # break on newlines + clear_modes() # Break on newlines else: - if MODE_OUT: - print('"') - MODE_OUT = False + clear_modes() print('{:04x}: {}{}'.format(SYN_PTR, instruction.describe(), comment_inline)) elif isinstance(instruction, InstructionJmp) or isinstance(instruction, InstructionJt) or isinstance(instruction, InstructionJf) or isinstance(instruction, InstructionCall): + clear_modes() if isinstance(instruction, InstructionJmp) or isinstance(instruction, InstructionCall): argidx = 0 else: argidx = 1 - if isinstance(instruction.args[argidx], OpLiteral): - loc = instruction.args[argidx].get(None) - if any(v == loc for k, v in labels.items()): - label = next(k for k, v in labels.items() if v == loc) - instruction.args[argidx] = OpLabel(label) + # Aggressively replace labels if requested + for argnum in range(instruction.nargs) if args.aggressive_labels else [argidx]: + if isinstance(instruction.args[argnum], OpLiteral): + loc = instruction.args[argnum].get(None) + if any(v == loc for k, v in labels.items()): + label = next(k for k, v in labels.items() if v == loc) + instruction.args[argnum] = OpLabel(label) print('{:04x}: {}{}'.format(SYN_PTR, instruction.describe(), comment_inline)) else: + # Aggressively replace labels if requested + if args.aggressive_labels: + for argnum in range(instruction.nargs): + if isinstance(instruction.args[argnum], OpLiteral): + loc = instruction.args[argnum].get(None) + if any(v == loc for k, v in labels.items()): + label = next(k for k, v in labels.items() if v == loc) + instruction.args[argnum] = OpLabel(label) + clear_modes() print('{:04x}: {}{}'.format(SYN_PTR, instruction.describe(), comment_inline)) SYN_PTR = next_SYN_PTR diff --git a/disasm_hints/annotations.txt b/disasm_hints/annotations.txt index 37b3543..2ba96aa 100644 --- a/disasm_hints/annotations.txt +++ b/disasm_hints/annotations.txt @@ -56,16 +56,11 @@ ren sub_0505 self_test_call_subroutine1 lbl 02c2 self_test_call_returnloc1 ren label_0509 self_test_call_fail ren label_02c4 self_test_call_check_stack1 -rep 02c6 eq R1 R0 $self_test_call_check_stack1 cmi 02c6 This test is strange. If R0 == 02c2 as tested below, R0 != 02c4 is guaranteed -rep 02cd eq R1 R0 $self_test_call_returnloc1 cmb 02d4 Test call register value ren sub_0507 self_test_call_subroutine2 lbl 02d9 self_test_call_returnloc2 -rep 02d4 set R0 $self_test_call_subroutine2 ren label_02db self_test_call_check_stack2 -rep 02dd eq R1 R0 $self_test_call_check_stack2 -rep 02e4 eq R1 R0 $self_test_call_returnloc2 cmb 02eb Test add overflow ren label_0520 self_test_overflow_fail cmi 0304 Just to be safe! @@ -80,15 +75,48 @@ lbl 034b self_test_rwmem_data cmb 034b Test rmem/wmem ren label_04d7 self_test_rmem_fail ren label_04ee self_test_wmem_fail -rep 034d rmem R0 $self_test_rwmem_data -rep 0357 add R2 $self_test_rwmem_data 0001 -rep 0365 set R0 $self_test_rwmem_data ren sub_06bb decrypt_data -cmi 0375 Sneaky! +cmb 0375 Sneaky! +cmb 0375 call $decrypt_data +rep 0375 nop +rep 0376 nop lbl 17b4 encrypted_data -rep 0377 rmem R0 $encrypted_data -rep 0381 add R2 $encrypted_data 0001 ren label_03ad self_test_wmem_cmd_fail rep 03ab out $self_test_complete cmi 03a9 Becomes noop; jt 0013 $self_test_complete lbl 03d2 self_test_complete +lbl 17c0 str_self_test_result +cmi 03d6 The "F" +lbl 17e4 str_complete +ren label_03e8 self_test_loop_copy_complete +cmb 03e8 Copy the "complete" string over the "FAILED!!" substring of $str_self_test_result +ren label_03ff self_test_concat_all_pass +cmb 03ff Extend $str_self_test_result to include $str_self_test_all_pass +lbl 17d3 str_self_test_all_pass +cmi 0410 ASCII comma +cmi 0428 Print self-test completion string +cmi 0430 Jump to game loop +ren label_0aae after_self_test +ren sub_05b2 loop_string +cmb 05b2 % Loops over a string and calls a callback for each character +cmb 05b2 % @param R0 Pointer to the string to loop over +cmb 05b2 % @param R1 Pointer to the callback to call with the character in R0 +ren label_05c8 loop_string_loop_chars +ren label_05e3 loop_string_loop_chars_done +ren sub_05ee print_string +cmb 05ee % Prints a string to output +cmb 05ee % @param R0 Pointer to the string to print +ren sub_05f8 print_char +cmb 05f8 % Prints a single character to output +cmb 05f8 % Most useful as a callback to $loop_string +cmb 05f8 % @param R0 The character to print +ren sub_05fb print_char_xored +cmb 05fb % Prints a single character to output after XORing with a fixed value +cmb 05fb % Most useful as a callback to $loop_string +cmb 05fb % @param R0 The input character +cmb 05fb % @param R2 The other XOR operand +ren sub_084d xor +cmb 084d % Calculates the exclusive OR (XOR) of two values +cmb 084d % @param R0 The first XOR operand +cmb 084d % @param R1 The second XOR operand +cmb 084d % @return R0 The value R0 XOR R1