from collections import defaultdict from dataclasses import dataclass from pprint import pprint import os import inspect @dataclass(frozen=True) class CodeOperand: operand: str lsb: int length: int @dataclass(frozen=True) class Code: code_bits: int mask_bits: int operands: list[CodeOperand] @dataclass(frozen=True) class Instruction: instruction: str operands: str variables: list[str] code: Code operation: str def unparse_instruction_code(ins): code = list(f'{ins.code.code_bits:016b}') for operand in ins.code.operands.values(): for i in range(operand.lsb, operand.lsb + operand.length): code[15 - i] = operand.operand return ''.join(code) _operands = { "Rn", "Rm", "R0", "GBR", "SR", "VBR", "MACH", "MACL", "R0", } def _b16_str(n): for i in reversed(range(16)): yield chr(ord('0') + ((n >> i) & 1)) def b16_str(n): return ''.join(_b16_str(n)) _operand_bits = { 'i', 'n', 'm', 'd', } def assert_contiguous(bits): assert len(bits) == len(set(bits)) for i, bit in enumerate(bits[:-1]): assert abs(bit - bits[i+1]) == 1 return bits def parse_code(code): operands_list = defaultdict(list) code_bits_list = [] mask_bits_list = [] for i, digit in enumerate(reversed(code)): if digit not in {'0','1'}: assert digit in _operand_bits, (digit, code) operands_list[digit].append(i) mask_bits_list.append(i) else: code_bits_list.append((i, int(digit))) code_bits = 0 for i, digit in code_bits_list: code_bits |= (digit << i) mask_bits = 0xffff for i in mask_bits_list: mask_bits &= ~(1 << i) operands = { operand: CodeOperand( operand=operand, lsb=min(assert_contiguous(bits)), length=len(bits) ) for operand, bits in operands_list.items() } return Code( code_bits=code_bits, mask_bits=mask_bits, operands=operands ) def get_variable(token): variables = { 'Rm': 'm', 'Rn': 'n', 'disp': 'd', 'imm': 'i', 'label': 'd', # SH4 "Rm_BANK": 'm', "Rn_BANK": 'n', # SH4 floating point "FRm": 'm', "FRn": 'n', "DRm": 'm', "DRn": 'n', "XDm": 'm', "XDn": 'n', "FVm": 'm', "FVn": 'n', } non_variables = { "GBR", "VBR", "MACH", "MACL", "SR", "PR", "PC", "R0", # SH4 "SSR", "SPC", "DBR", "SGR", # SH4 floating point "FPUL", "FR0", "FPSCR", "XMTRX", } if token in variables: yield variables[token] elif token in non_variables: return else: assert False, token def parse_tokens(operands): if operands == '': return span = 0 i = 0 while i != len(operands): c = operands[i] if c in {'(', '#', '@', '-'}: span = i + 1 elif c in {',', ')', '+'}: if span != i: yield operands[span:i] span = i + 1 i += 1 assert operands[-1] != ',' if span != len(operands): yield operands[span:] def parse_variables(operands): for token in parse_tokens(operands): yield from get_variable(token) def parse_instruction(*, instruction, operands, code, operation, **kwargs): code = parse_code(code) try: variables = tuple(parse_variables(operands)) except: print(instruction) raise return ( instruction, operands, variables, code, operation, ) def untabulate_instructions(filename, columns): with open(filename, 'r') as f: buf = f.read() instructions = [] for line in buf.split('\n'): def column(s): start, end = columns[s] return line[start:end] if not line.strip(): continue assert '\t' not in line, line if line and line[0] == ' ': assert len(line.strip()) == len(column("operation").strip()) instructions[-1] = Instruction( instruction = instructions[-1].instruction, operands = instructions[-1].operands, variables = instructions[-1].variables, code = instructions[-1].code, operation = " ".join([ instructions[-1].operation, line.strip() ]) ) continue instruction = Instruction(*parse_instruction(**dict([ (name, line[start:end].strip()) for name, (start, end) in columns.items() ]))) instructions.append(instruction) return instructions def column_bounds(columns): for ix, (name, start) in enumerate(columns): if ix == len(columns) - 1: end = None else: end = columns[ix+1][1] yield name, (start, end) directory = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) def untabulate_instructions_sh2(): columns = dict(column_bounds([ ("instruction", 0 ), ("operands" , 8 ), ("code" , 23 ), ("operation" , 42 ), ("state" , 88 ), ("t_bit" , 102) ])) return untabulate_instructions(os.path.join(directory, "..", "sh2.txt"), columns) def untabulate_instructions_sh4(): columns = dict(column_bounds([ ("instruction", 0 ), ("operands" , 8 ), ("operation" , 24 ), ("code" , 80 ), ("privileged" , 104), ("t_bit" , 116) ])) return untabulate_instructions(os.path.join(directory, "..", "sh4.txt"), columns)