From d90311596437ce14d35526bfe38f01707280a6cd Mon Sep 17 00:00:00 2001 From: Zack Buhman Date: Thu, 16 Oct 2025 12:53:28 -0500 Subject: [PATCH] assembler: feature parity with the disassembler --- regs/assembler/__main__.py | 29 ++++++++++++++ regs/assembler/emitter.py | 82 ++++++++++++++++++++++++++++++++++++++ regs/assembler/lexer.py | 2 +- regs/assembler/parser.py | 44 +++++++++++--------- regs/parse_pvs.py | 9 +++-- regs/parse_pvs_bits.py | 1 + regs/pvs_src_bits.py | 3 ++ 7 files changed, 147 insertions(+), 23 deletions(-) create mode 100644 regs/assembler/__main__.py create mode 100644 regs/assembler/emitter.py diff --git a/regs/assembler/__main__.py b/regs/assembler/__main__.py new file mode 100644 index 0000000..9825c29 --- /dev/null +++ b/regs/assembler/__main__.py @@ -0,0 +1,29 @@ +from assembler.lexer import Lexer +from assembler.parser import Parser +from assembler.emitter import emit_instruction + +sample = b""" +temp[0].xyzw = VE_ADD const[1].xyzw const[1].0000 const[1].0000 +temp[1].xyzw = VE_ADD const[1].xyzw const[1].0000 const[1].0000 +temp[0].x = VE_MAD const[0].x___ temp[1].x___ temp[0].y___ +temp[0].x = VE_FRC temp[0].x___ temp[0].0000 temp[0].0000 +temp[0].x = VE_MAD temp[0].x___ const[1].z___ const[1].w___ +temp[0].y = ME_COS temp[0].xxxx temp[0].0000 temp[0].0000 +temp[0].x = ME_SIN temp[0].xxxx temp[0].0000 temp[0].0000 +temp[0].yz = VE_MUL input[0]._xy_ temp[0]._yy_ temp[0].0000 +out[0].xz = VE_MAD input[0].-y-_-0-_ temp[0].x_0_ temp[0].y_0_ +out[0].yw = VE_MAD input[0]._x_0 temp[0]._x_0 temp[0]._z_1 +""" + +if __name__ == "__main__": + + #buf = b"out[0].xz = VE_MAD input[0].-y-_-0-_ temp[0].x_0_ temp[0].y_0_" + buf = sample + lexer = Lexer(buf) + tokens = list(lexer.lex_tokens()) + parser = Parser(tokens) + for ins in parser.instructions(): + print("\n".join( + f"{value:08x}" + for value in emit_instruction(ins) + )) diff --git a/regs/assembler/emitter.py b/regs/assembler/emitter.py new file mode 100644 index 0000000..a81d84c --- /dev/null +++ b/regs/assembler/emitter.py @@ -0,0 +1,82 @@ +from assembler.keywords import ME, VE, KW +from assembler.parser import Instruction, DestinationOp, Source +import pvs_dst +import pvs_src +import pvs_dst_bits +import pvs_src_bits + +def we_x(s): + return int(0 in s) + +def we_y(s): + return int(1 in s) + +def we_z(s): + return int(2 in s) + +def we_w(s): + return int(3 in s) + +def dst_reg_type(kw): + if kw == KW.temporary: + return pvs_dst_bits.PVS_DST_REG_gen["TEMPORARY"] + elif kw == KW.a0: + return pvs_dst_bits.PVS_DST_REG_gen["A0"] + elif kw == KW.out: + return pvs_dst_bits.PVS_DST_REG_gen["OUT"] + elif kw == KW.out_repl_x: + return pvs_dst_bits.PVS_DST_REG_gen["OUT_REPL_X"] + elif kw == KW.alt_temporary: + return pvs_dst_bits.PVS_DST_REG_gen["ALT_TEMPORARY"] + elif kw == KW.input: + return pvs_dst_bits.PVS_DST_REG_gen["INPUT"] + else: + assert not "Invalid PVS_DST_REG", kw + +def emit_destination_op(dst_op: DestinationOp): + assert type(dst_op.opcode) in {ME, VE} + math_inst = int(type(dst_op.opcode) is ME) + value = ( + pvs_dst.OPCODE_gen(dst_op.opcode.value) + | pvs_dst.MATH_INST_gen(math_inst) + | pvs_dst.REG_TYPE_gen(dst_reg_type(dst_op.type)) + | pvs_dst.OFFSET_gen(dst_op.offset) + | pvs_dst.WE_X_gen(we_x(dst_op.write_enable)) + | pvs_dst.WE_Y_gen(we_y(dst_op.write_enable)) + | pvs_dst.WE_Z_gen(we_z(dst_op.write_enable)) + | pvs_dst.WE_W_gen(we_w(dst_op.write_enable)) + ) + yield value + +def src_reg_type(kw): + if kw == KW.temporary: + return pvs_src_bits.PVS_SRC_REG_TYPE_gen["PVS_SRC_REG_TEMPORARY"] + elif kw == KW.input: + return pvs_src_bits.PVS_SRC_REG_TYPE_gen["PVS_SRC_REG_INPUT"] + elif kw == KW.constant: + return pvs_src_bits.PVS_SRC_REG_TYPE_gen["PVS_SRC_REG_CONSTANT"] + elif kw == KW.alt_temporary: + return pvs_src_bits.PVS_SRC_REG_TYPE_gen["PVS_SRC_REG_ALT_TEMPORARY"] + else: + assert not "Invalid PVS_SRC_REG", kw + +def emit_source(src: Source): + value = ( + pvs_src.REG_TYPE_gen(src_reg_type(src.type)) + | pvs_src.OFFSET_gen(src.offset) + | pvs_src.SWIZZLE_X_gen(src.swizzle.select[0]) + | pvs_src.SWIZZLE_Y_gen(src.swizzle.select[1]) + | pvs_src.SWIZZLE_Z_gen(src.swizzle.select[2]) + | pvs_src.SWIZZLE_W_gen(src.swizzle.select[3]) + | pvs_src.MODIFIER_X_gen(int(src.swizzle.modifier[0])) + | pvs_src.MODIFIER_Y_gen(int(src.swizzle.modifier[1])) + | pvs_src.MODIFIER_Z_gen(int(src.swizzle.modifier[2])) + | pvs_src.MODIFIER_W_gen(int(src.swizzle.modifier[3])) + ) + yield value + +def emit_instruction(ins: Instruction): + yield from emit_destination_op(ins.destination_op) + yield from emit_source(ins.source0) + yield from emit_source(ins.source1) + yield from emit_source(ins.source2) diff --git a/regs/assembler/lexer.py b/regs/assembler/lexer.py index b321bc0..c3523d1 100644 --- a/regs/assembler/lexer.py +++ b/regs/assembler/lexer.py @@ -3,7 +3,7 @@ from enum import Enum, auto from itertools import chain from typing import Union -import keywords +from assembler import keywords DEBUG = True diff --git a/regs/assembler/parser.py b/regs/assembler/parser.py index 8e1ab3b..cf6805a 100644 --- a/regs/assembler/parser.py +++ b/regs/assembler/parser.py @@ -1,15 +1,16 @@ -import lexer -from lexer import TT -from keywords import KW, ME, VE from itertools import pairwise from dataclasses import dataclass from typing import Union +from assembler import lexer +from assembler.lexer import TT +from assembler.keywords import KW, ME, VE + """ temp[0].xyzw = VE_ADD const[1].xyzw const[1].0000 const[1].0000 temp[1].xyzw = VE_ADD const[1].xyzw const[1].0000 const[1].0000 temp[0].x = VE_MAD const[0].x___ temp[1].x___ temp[0].y___ -temp[0].x = VE_FRAC temp[0].x___ temp[0].0000 temp[0].0000 +temp[0].x = VE_FRC temp[0].x___ temp[0].0000 temp[0].0000 temp[0].x = VE_MAD temp[0].x___ const[1].z___ const[1].w___ temp[0].y = ME_COS temp[0].xxxx temp[0].0000 temp[0].0000 temp[0].x = ME_SIN temp[0].xxxx temp[0].0000 temp[0].0000 @@ -54,15 +55,21 @@ def identifier_to_number(token): raise ParseError("expected number", token) return int(bytes(token.lexeme), 10) +def we_ord(c): + if c == ord("w"): + return 3 + else: + return c - ord("x") + def parse_dest_write_enable(token): we_chars = set(b"xyzw") assert token.type is TT.identifier we = bytes(token.lexeme).lower() if not all(c in we_chars for c in we): raise ParseError("expected destination write enable", token) - if not all(a < b for a, b in pairwise(we)) or len(set(we)) != len(we): + if not all(we_ord(a) < we_ord(b) for a, b in pairwise(we)) or len(set(we)) != len(we): raise ParseError("misleading non-sequential write enable", token) - return set(c - ord('x') for c in we) + return set(we_ord(c) for c in we) def parse_source_swizzle(token): select_mapping = { @@ -109,7 +116,9 @@ class Parser: self.tokens = tokens def peek(self): - return self.tokens[self.current_ix] + token = self.tokens[self.current_ix] + #print(token) + return token def at_end_p(self): return self.peek().type == TT.eof @@ -119,8 +128,8 @@ class Parser: self.current_ix += 1 return token - def match(self, token_type, message): - token = self.advance() + def match(self, token_type): + token = self.peek() return token.type == token_type def consume(self, token_type, message): @@ -135,15 +144,6 @@ class Parser: raise ParseError(message, token) return token - - """ - def consume_keyword(self, keyword, message): - token = self.consume(TT.keyword, message) - assert token.keyword is not None - if token.keyword != keyword: - raise ParseError(message, token) - """ - def destination_type(self): token = self.consume(TT.keyword, "expected destination type") destination_keywords = {KW.temporary, KW.a0, KW.out, KW.out_repl_x, KW.alt_temporary, KW.input} @@ -194,6 +194,8 @@ class Parser: return Source(source_type, offset, source_swizzle) def instruction(self): + while self.match(TT.eol): + self.advance() destination_op = self.destination_op() source0 = self.source() source1 = self.source() @@ -201,8 +203,12 @@ class Parser: self.consume_either(TT.eol, TT.eof, "expected newline or EOF") return Instruction(destination_op, source0, source1, source2) + def instructions(self): + while not self.match(TT.eof): + yield self.instruction() + if __name__ == "__main__": - from lexer import Lexer + from assembler.lexer import Lexer buf = b"out[0].xz = VE_MAD input[0].-y-_-0-_ temp[0].x_0_ temp[0].y_0_" lexer = Lexer(buf) tokens = list(lexer.lex_tokens()) diff --git a/regs/parse_pvs.py b/regs/parse_pvs.py index ebe4bda..9582f17 100644 --- a/regs/parse_pvs.py +++ b/regs/parse_pvs.py @@ -67,12 +67,15 @@ def low_from_bits(bits): return bits def generate_python(prefix, fields): - #out(0, f"class {prefix}:") fields = list(fields) for field_name, bits, description in fields: - #out(1, f"@staticmethod") out(0, f"def {field_name}(n):") - out(1, f"return (n >> {low_from_bits(bits)}) & {mask_from_bits(bits)}") + out(1, f"return (n >> {low_from_bits(bits)}) & {hex(mask_from_bits(bits))}") + out(0, "") + + out(0, f"def {field_name}_gen(n):") + out(1, f"assert ({hex(mask_from_bits(bits))} & n) == n, (n, {hex(mask_from_bits(bits))})") + out(1, f"return n << {low_from_bits(bits)}") out(0, "") out(0, "table = [") diff --git a/regs/parse_pvs_bits.py b/regs/parse_pvs_bits.py index 92ac93a..89e9efc 100644 --- a/regs/parse_pvs_bits.py +++ b/regs/parse_pvs_bits.py @@ -27,4 +27,5 @@ while ix < len(lines): print(f' {value.strip()}: "{key.strip()}",') ix += 1 print("}") + print(f"{name}_gen = dict((v, k) for k, v in {name}.items())") print() diff --git a/regs/pvs_src_bits.py b/regs/pvs_src_bits.py index 75e59a0..5bfe5af 100644 --- a/regs/pvs_src_bits.py +++ b/regs/pvs_src_bits.py @@ -4,6 +4,7 @@ PVS_SRC_REG_TYPE = { 2: "PVS_SRC_REG_CONSTANT", 3: "PVS_SRC_REG_ALT_TEMPORARY", } +PVS_SRC_REG_TYPE_gen = dict((v, k) for k, v in PVS_SRC_REG_TYPE.items()) PVS_SRC_SWIZZLE_SEL = { 0: "PVS_SRC_SELECT_X", @@ -13,10 +14,12 @@ PVS_SRC_SWIZZLE_SEL = { 4: "PVS_SRC_SELECT_FORCE_0", 5: "PVS_SRC_SELECT_FORCE_1", } +PVS_SRC_SWIZZLE_SEL_gen = dict((v, k) for k, v in PVS_SRC_SWIZZLE_SEL.items()) PVS_SRC_ADDR_MODE = { 0: "Absolute addressing", 1: "Relative addressing using A0 register", 2: "Relative addressing using I0 register", } +PVS_SRC_ADDR_MODE_gen = dict((v, k) for k, v in PVS_SRC_ADDR_MODE.items())