From f260c3e4b5317bb11f35564a8a4c8204382e6c2c Mon Sep 17 00:00:00 2001
From: RunasSudo <runassudo@yingtongli.me>
Date: Mon, 13 Feb 2017 20:55:08 +1030
Subject: [PATCH] Annotate self test

---
 asm.py                       | 106 ----------------------------
 disasm.py                    |  53 +++++++++-----
 disasm_hints/annotations.txt |  96 ++++++++++++++++++++++++--
 libsynacor/__init__.py       |   1 +
 libsynacor/assembly.py       | 129 +++++++++++++++++++++++++++++++++++
 libsynacor/bytecode.py       |   6 +-
 6 files changed, 261 insertions(+), 130 deletions(-)
 create mode 100644 libsynacor/assembly.py

diff --git a/asm.py b/asm.py
index e4377e4..58e17df 100755
--- a/asm.py
+++ b/asm.py
@@ -25,112 +25,6 @@ parser.add_argument('file', help='.asm file to read')
 parser.add_argument('output', help='.bin file to write')
 args = parser.parse_args()
 
-line_no = 0
-
-def split_line(line):
-	tokens = []
-	token = ''
-	idx = 0
-	
-	in_comment = False
-	in_string = False
-	in_escape = False
-	
-	while idx < len(line):
-		if in_comment:
-			pass
-		elif in_string:
-			if in_escape:
-				token += line[idx]
-			else:
-				if line[idx] == '\\':
-					in_escape = True
-					token += line[idx]
-				elif line[idx] == '"':
-					in_string = False
-					token += line[idx]
-				else:
-					token += line[idx]
-		else:
-			if line[idx] == ' ':
-				if token != '':
-					tokens.append(token)
-				token = ''
-			elif line[idx] == '"':
-				in_string = True
-				token += line[idx]
-			elif line[idx] == ';':
-				in_comment = True
-			else:
-				token += line[idx]
-		idx += 1
-	# Final token
-	if token != '':
-		tokens.append(token)
-	
-	return tokens
-
-def unescape_char(char):
-	return char.encode('utf-8').decode('unicode_escape')
-
-def assemble_next_instruction(source):
-	line = source.readline()
-	global line_no; line_no += 1
-	if line == '':
-		return None, []
-	
-	tokens = split_line(line.strip())
-	return assemble_instruction(source, tokens)
-
-def assemble_instruction(source, tokens):
-	if len(tokens) == 0:
-		return assemble_next_instruction(source)
-	if tokens[0].endswith(':'):
-		# Label
-		label = tokens[0][:-1]
-		instructions, inst_labels = assemble_instruction(source, tokens[1:])
-		return instructions, inst_labels + [label]
-	else:
-		# Instruction
-		name = tokens[0]
-		if name not in instructions_by_name:
-			raise Exception('Unknown instruction {}'.format(name))
-		instruction = instructions_by_name[name]()
-		
-		# Special cases
-		if isinstance(instruction, InstructionOut) and tokens[1].startswith('"'):
-			chars = unescape_char(tokens[1][1:-1])
-			instructions = []
-			for char in chars:
-				instruction = InstructionOut()
-				instruction.args.append(OpLiteral(ord(char)))
-				instructions.append(instruction)
-			return instructions, []
-		elif isinstance(instruction, InstructionData):
-			if tokens[1].startswith('"'):
-				chars = unescape_char(tokens[1][1:-1])
-				instruction.args = [ord(char) for char in chars]
-				return [instruction], []
-			else:
-				instruction.args = [int(x, 16) for x in tokens[1:]]
-				return [instruction], []
-		else:
-			if len(tokens) != instruction.nargs + 1:
-				raise Exception('Invalid number of arguments: Expected {}, got {}'.format(instruction.nargs, len(tokens) - 1))
-			for i in range(instruction.nargs):
-				argstr = tokens[i + 1]
-				if argstr.startswith('R'):
-					# Register
-					arg = OpRegister(int(argstr[1:]))
-				elif argstr.startswith('$'):
-					# Label
-					arg = OpLabel(argstr[1:])
-				else:
-					# Hex literal
-					arg = OpLiteral(int(argstr, 16))
-				instruction.args.append(arg)
-			return [instruction], []
-
 # First pass
 labels = {}
 SYN_MEM = [0] * 32768
diff --git a/disasm.py b/disasm.py
index f89f349..c3d1ee8 100755
--- a/disasm.py
+++ b/disasm.py
@@ -28,7 +28,7 @@ with open(args.file, 'rb') as data:
 	SYN_MEM = memory_from_file(data)
 
 # Find things to label
-labels, comments_before, comments_inline = {}, {}, {}
+labels, comments_before, comments_inline, replacements = {}, {}, {}, {}
 SYN_PTR = 0
 while SYN_PTR < len(SYN_MEM):
 	word = SYN_MEM[SYN_PTR]
@@ -62,6 +62,9 @@ if args.hints:
 				elif line.startswith('call '):
 					loc = int(line.split()[1], 16)
 					labels['sub_{:04x}'.format(loc)] = loc
+				elif line.startswith('lbl '):
+					loc = int(line.split()[1], 16)
+					labels[line.split()[2]] = loc
 				elif line.startswith('ren '):
 					old_label = line.split()[1]
 					new_label = line.split()[2]
@@ -73,18 +76,25 @@ if args.hints:
 					if loc not in comments_before:
 						comments_before[loc] = []
 					comments_before[loc].append(comment)
+				elif line.startswith('cmi '):
+					loc = int(line.split()[1], 16)
+					comment = line[line.index(' ', line.index(' ') + 1) + 1:].strip()
+					comments_inline[loc] = comment
+				elif line.startswith('rep '):
+					loc = int(line.split()[1], 16)
+					code = line[line.index(' ', line.index(' ') + 1) + 1:].strip()
+					instruction = assemble_line(None, code)[0][0]
+					replacements[loc] = instruction
 				else:
 					raise Exception('Invalid line in hint file: {}'.format(line))
 
-def escape_char(char):
-	return char.encode('unicode_escape').decode('utf-8').replace('"', '\\"')
-
 MODE_OUT = False
 MODE_DAT = False #False, 1 (data), 2 (text)
 
 SYN_PTR = 0
 
 while SYN_PTR < len(SYN_MEM):
+	# Handle comments
 	if SYN_PTR in comments_before:
 		if MODE_OUT:
 			print('"')
@@ -97,6 +107,12 @@ while SYN_PTR < len(SYN_MEM):
 			MODE_DAT = False
 		for comment in comments_before[SYN_PTR]:
 			print('; {}'.format(comment))
+	if SYN_PTR in comments_inline:
+		comment_inline = ' ; {}'.format(comments_inline[SYN_PTR])
+	else:
+		comment_inline = ''
+	
+	# Handle labels
 	if any(v == SYN_PTR for k, v in labels.items()):
 		if MODE_OUT:
 			print('"')
@@ -109,6 +125,13 @@ while SYN_PTR < len(SYN_MEM):
 			MODE_DAT = False
 		print('${}:'.format(next(k for k, v in labels.items() if v == SYN_PTR)))
 	
+	# Handle replacements
+	if SYN_PTR in replacements:
+		instruction = replacements[SYN_PTR]
+		print('{:04x}: {}{}'.format(SYN_PTR, instruction.describe(), comment_inline))
+		SYN_PTR += len(instruction.assemble(None))
+		continue
+	
 	word = SYN_MEM[SYN_PTR]
 	
 	if MODE_OUT and word != 19:
@@ -162,24 +185,18 @@ while SYN_PTR < len(SYN_MEM):
 				if MODE_OUT:
 					print('"')
 					MODE_OUT = False
-				print('{:04x}: {}'.format(SYN_PTR, instruction.describe()))
+				print('{:04x}: {}{}'.format(SYN_PTR, instruction.describe(), comment_inline))
 		elif isinstance(instruction, InstructionJmp) or isinstance(instruction, InstructionJt) or isinstance(instruction, InstructionJf) or isinstance(instruction, InstructionCall):
 			if isinstance(instruction, InstructionJmp) or isinstance(instruction, InstructionCall):
-				op = instruction.args[0]
+				argidx = 0
 			else:
-				op = instruction.args[1]
-			if isinstance(op, OpLiteral):
-				loc = op.get(None)
+				argidx = 1
+			if isinstance(instruction.args[argidx], OpLiteral):
+				loc = instruction.args[argidx].get(None)
 				if any(v == loc for k, v in labels.items()):
 					label = next(k for k, v in labels.items() if v == loc)
-					if isinstance(instruction, InstructionJmp) or isinstance(instruction, InstructionCall):
-						print('{:04x}: {: <4} ${}'.format(SYN_PTR, instruction.name, label))
-					else:
-						print('{:04x}: {: <4} {} ${}'.format(SYN_PTR, instruction.name, instruction.args[0].describe(), label))
-				else:
-					print('{:04x}: {}'.format(SYN_PTR, instruction.describe()))
-			else:
-				print('{:04x}: {}'.format(SYN_PTR, instruction.describe()))
+					instruction.args[argidx] = OpLabel(label)
+			print('{:04x}: {}{}'.format(SYN_PTR, instruction.describe(), comment_inline))
 		else:
-			print('{:04x}: {}'.format(SYN_PTR, instruction.describe()))
+			print('{:04x}: {}{}'.format(SYN_PTR, instruction.describe(), comment_inline))
 		SYN_PTR = next_SYN_PTR
diff --git a/disasm_hints/annotations.txt b/disasm_hints/annotations.txt
index a482af9..37b3543 100644
--- a/disasm_hints/annotations.txt
+++ b/disasm_hints/annotations.txt
@@ -1,6 +1,94 @@
-jmp 0000
-ren label_0000 start
-jmp 0110
-ren label_0110 self_test
 cmb 0140 Test jmp
 ren label_015b self_test_jmp1
+cmi 0142 jmp lands here if fails
+cmi 0162 jmp from 0160 lands here if -2
+cmi 0164 jmp from 0160 lands here if -1
+cmi 0166 jmp from 0160 lands here if correct
+cmi 0168 jmp from 0160 lands here if +1
+cmi 016a jmp from 0160 lands here if +2
+ren label_0166 self_test_jmp2
+ren label_0170 self_test_jmp_diagnose-2
+ren label_018d self_test_jmp_diagnose-1
+ren label_01a8 self_test_jmp_diagnose+1
+ren label_01c5 self_test_jmp_diagnose+2
+cmi 016e jmp from 0162 lands here
+cmi 018c jmp from 0164 lands here
+cmi 01a9 jmp from 0168 lands here
+cmi 01c7 jmp from 016a lands here
+cmb 01e4 Test jt/jf
+cmb 01e4 ?? Does not test behaviour for values other than 0 and 1
+ren label_01e4 self_test_jtjf
+ren label_0432 self_test_jtjf_fail
+cmi 01e4 Jump to fail if 0 is true
+cmi 01e7 Jump to fail if 1 is false
+ren label_01ef self_test_jtjf2
+ren label_01f4 self_test_regzero
+cmi 01ed Fail if 1 is not true
+cmi 01f2 Fail if 0 is not false
+cmb 01f4 Test that all registers are zero
+cmb 01f4 ?? Because this uses jt/jf, errors may not be detected if involving values other than 0 and 1
+ren label_0445 self_test_regzero_fail
+cmb 020c Test set
+ren label_045e self_test_set_fail
+cmb 0218 Test add
+cmb 0218 ?? This only tests if 1 + 1 != 0, and will fail to detect almost all other errors
+cmi 021c Dodgy!
+ren label_0234 self_test_add
+cmb 0234 Test add
+cmb 0234 ?? This reuses the result from the add test, so will erroneously report an eq failure (instead of an add failure) if 1 + 1 != 2
+cmb 0234 It would probably have been a better idea to test eq first, then use that to verify add
+cmi 0238 Dodgy!
+ren label_024e self_test_pushpop
+cmb 024e Test push/pop by exchanging R0 and R1
+cmb 024e ?? Because R1 is reused, the test will erroneously report a push/pop failure (instead of an eq failure) if eq returns any other truthy value in the previous test
+cmi 025d Dodgy!
+ren label_0486 self_test_pushpop_fail
+cmb 0264 Test gt
+ren label_0473 self_test_gt_fail
+cmb 0279 Test and
+ren label_0499 self_test_and_fail
+cmb 0284 Test or
+cmb 02ac Test not
+ren label_02ac self_test_not
+ren label_04b8 self_test_not_fail
+cmb 02c0 Test call
+ren sub_0505 self_test_call_subroutine1
+lbl 02c2 self_test_call_returnloc1
+ren label_0509 self_test_call_fail
+ren label_02c4 self_test_call_check_stack1
+rep 02c6 eq   R1 R0 $self_test_call_check_stack1
+cmi 02c6 This test is strange. If R0 == 02c2 as tested below, R0 != 02c4 is guaranteed
+rep 02cd eq   R1 R0 $self_test_call_returnloc1
+cmb 02d4 Test call register value
+ren sub_0507 self_test_call_subroutine2
+lbl 02d9 self_test_call_returnloc2
+rep 02d4 set  R0 $self_test_call_subroutine2
+ren label_02db self_test_call_check_stack2
+rep 02dd eq   R1 R0 $self_test_call_check_stack2
+rep 02e4 eq   R1 R0 $self_test_call_returnloc2
+cmb 02eb Test add overflow
+ren label_0520 self_test_overflow_fail
+cmi 0304 Just to be safe!
+cmb 030b Test mult
+cmi 030f Test for HHGTTG (non-)compatibility
+ren label_0565 self_test_hhgttg_fail
+ren label_0586 self_test_mult_fail
+cmb 0328 Test mod
+ren label_059d self_test_mod_fail
+ren label_034d self_test_rwmem
+lbl 034b self_test_rwmem_data
+cmb 034b Test rmem/wmem
+ren label_04d7 self_test_rmem_fail
+ren label_04ee self_test_wmem_fail
+rep 034d rmem R0 $self_test_rwmem_data
+rep 0357 add  R2 $self_test_rwmem_data 0001
+rep 0365 set  R0 $self_test_rwmem_data
+ren sub_06bb decrypt_data
+cmi 0375 Sneaky!
+lbl 17b4 encrypted_data
+rep 0377 rmem R0 $encrypted_data
+rep 0381 add  R2 $encrypted_data 0001
+ren label_03ad self_test_wmem_cmd_fail
+rep 03ab out $self_test_complete
+cmi 03a9 Becomes noop; jt 0013 $self_test_complete
+lbl 03d2 self_test_complete
diff --git a/libsynacor/__init__.py b/libsynacor/__init__.py
index 9595d2b..08d899e 100644
--- a/libsynacor/__init__.py
+++ b/libsynacor/__init__.py
@@ -16,4 +16,5 @@
 
 from .binfile import *
 from .bytecode import *
+from .assembly import *
 from .cpu import *
diff --git a/libsynacor/assembly.py b/libsynacor/assembly.py
new file mode 100644
index 0000000..d58651e
--- /dev/null
+++ b/libsynacor/assembly.py
@@ -0,0 +1,129 @@
+#    synacor.py - An implementation of the Synacor Challenge
+#    Copyright © 2017  RunasSudo
+#
+#    This program is free software: you can redistribute it and/or modify
+#    it under the terms of the GNU Affero General Public License as published by
+#    the Free Software Foundation, either version 3 of the License, or
+#    (at your option) any later version.
+#
+#    This program is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU Affero General Public License for more details.
+#
+#    You should have received a copy of the GNU Affero General Public License
+#    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+from libsynacor import *
+
+def escape_char(char):
+	return char.encode('unicode_escape').decode('utf-8').replace('"', '\\"')
+
+def unescape_char(char):
+	return char.encode('utf-8').decode('unicode_escape')
+
+def split_line(line):
+	tokens = []
+	token = ''
+	idx = 0
+	
+	in_comment = False
+	in_string = False
+	in_escape = False
+	
+	while idx < len(line):
+		if in_comment:
+			pass
+		elif in_string:
+			if in_escape:
+				token += line[idx]
+			else:
+				if line[idx] == '\\':
+					in_escape = True
+					token += line[idx]
+				elif line[idx] == '"':
+					in_string = False
+					token += line[idx]
+				else:
+					token += line[idx]
+		else:
+			if line[idx] == ' ':
+				if token != '':
+					tokens.append(token)
+				token = ''
+			elif line[idx] == '"':
+				in_string = True
+				token += line[idx]
+			elif line[idx] == ';':
+				in_comment = True
+			else:
+				token += line[idx]
+		idx += 1
+	# Final token
+	if token != '':
+		tokens.append(token)
+	
+	return tokens
+
+line_no = 0
+
+def assemble_next_instruction(source):
+	line = source.readline()
+	global line_no; line_no += 1
+	if line == '':
+		return None, []
+	
+	return assemble_line(source, line)
+
+def assemble_line(source, line):
+	tokens = split_line(line.strip())
+	return assemble_instruction(source, tokens)
+
+def assemble_instruction(source, tokens):
+	if len(tokens) == 0:
+		return assemble_next_instruction(source)
+	if tokens[0].endswith(':'):
+		# Label
+		label = tokens[0][:-1]
+		instructions, inst_labels = assemble_instruction(source, tokens[1:])
+		return instructions, inst_labels + [label]
+	else:
+		# Instruction
+		name = tokens[0]
+		if name not in instructions_by_name:
+			raise Exception('Unknown instruction {}'.format(name))
+		instruction = instructions_by_name[name]()
+		
+		# Special cases
+		if isinstance(instruction, InstructionOut) and tokens[1].startswith('"'):
+			chars = unescape_char(tokens[1][1:-1])
+			instructions = []
+			for char in chars:
+				instruction = InstructionOut()
+				instruction.args.append(OpLiteral(ord(char)))
+				instructions.append(instruction)
+			return instructions, []
+		elif isinstance(instruction, InstructionData):
+			if tokens[1].startswith('"'):
+				chars = unescape_char(tokens[1][1:-1])
+				instruction.args = [ord(char) for char in chars]
+				return [instruction], []
+			else:
+				instruction.args = [int(x, 16) for x in tokens[1:]]
+				return [instruction], []
+		else:
+			if len(tokens) != instruction.nargs + 1:
+				raise Exception('Invalid number of arguments: Expected {}, got {}'.format(instruction.nargs, len(tokens) - 1))
+			for i in range(instruction.nargs):
+				argstr = tokens[i + 1]
+				if argstr.startswith('R'):
+					# Register
+					arg = OpRegister(int(argstr[1:]))
+				elif argstr.startswith('$'):
+					# Label
+					arg = OpLabel(argstr[1:])
+				else:
+					# Hex literal
+					arg = OpLiteral(int(argstr, 16))
+				instruction.args.append(arg)
+			return [instruction], []
diff --git a/libsynacor/bytecode.py b/libsynacor/bytecode.py
index ea5224c..479f687 100644
--- a/libsynacor/bytecode.py
+++ b/libsynacor/bytecode.py
@@ -49,11 +49,13 @@ class OpRegister(Operand):
 	def assemble(self, labels):
 		return self.register + 32768
 
-# Used only in assembling process
+# Used only in dis/assembling process
 class OpLabel(Operand):
 	def __init__(self, label):
 		self.label = label
 	
+	def describe(self):
+		return '${}'.format(self.label)
 	def assemble(self, labels):
 		if labels is None:
 			# First pass
@@ -85,7 +87,7 @@ class Instruction:
 		description = '{: <4}'.format(self.name)
 		for i in range(self.nargs):
 			description += ' {}'.format(self.args[i].describe())
-		return description
+		return description.strip()
 	
 	def assemble(self, labels):
 		return [self.opcode] + [self.args[i].assemble(labels) for i in range(self.nargs)]