From d2e81516a1f4b8192438dc14586d4789dd242c4a Mon Sep 17 00:00:00 2001 From: Zack Buhman Date: Sat, 18 Oct 2025 21:54:41 -0500 Subject: [PATCH] drm/texture_cube_clear_zwrite_vertex_shader: fixed --- drm/clear_nop.vs.asm | 1 + drm/cube_rotate.vs.asm | 23 ++++----- drm/cube_rotate.vs.inc | 23 ++++----- drm/texture_cube_clear_zwrite_vertex_shader.c | 50 +++++++++++++------ regs/assembler/__main__.py | 2 + regs/assembler/emitter.py | 7 ++- regs/assembler/keywords.py | 11 ++++ regs/assembler/parser.py | 4 +- regs/assembler/validator.py | 25 ++++++++++ ...vs_opcode_and_destination_operand_bits.txt | 3 ++ regs/pvs_disassemble.py | 4 +- 11 files changed, 106 insertions(+), 47 deletions(-) create mode 100644 drm/clear_nop.vs.asm create mode 100644 regs/assembler/validator.py diff --git a/drm/clear_nop.vs.asm b/drm/clear_nop.vs.asm new file mode 100644 index 0000000..3140b2a --- /dev/null +++ b/drm/clear_nop.vs.asm @@ -0,0 +1 @@ +out[0].xyzw = VE_ADD input[0].xyzw input[0].0000 input[0].0000 diff --git a/drm/cube_rotate.vs.asm b/drm/cube_rotate.vs.asm index 50629c8..8fed9ab 100644 --- a/drm/cube_rotate.vs.asm +++ b/drm/cube_rotate.vs.asm @@ -12,20 +12,15 @@ temp[0].xy = VE_ADD const[1].xy__ const[1].00__ ; calculation. ; ; This 3-instruction sequence linearly remaps the range [-∞,+∞] to [-π,+π] -temp[0].x = VE_MAD temp[0].x___ const[0].x___ const[0].y___ -temp[0].x = VE_FRC temp[0].x___ -temp[0].x = VE_MAD temp[0].x___ const[0].z___ const[0].w___ - -; the same thing, but for temp[0].y -temp[0].y = VE_MAD temp[0].y___ const[0].x___ const[0].y___ -temp[0].y = VE_FRC temp[0].y___ -temp[0].y = VE_MAD temp[0].y___ const[0].z___ const[0].w___ +temp[0].xy = VE_MAD temp[0].xy__ const[0].xx__ const[0].yy__ +temp[0].xy = VE_FRC temp[0].xy__ +temp[0].xy = VE_MAD temp[0].xy__ const[0].zz__ const[0].ww__ ; sin and cos temp[3].x = ME_SIN temp[0].___x temp[3].y = ME_COS temp[0].___x -temp[3].z = ME_SIN temp[0].___y -temp[3].w = ME_COS temp[0].___y +temp[4].x = ME_SIN temp[0].___y +temp[4].y = ME_COS temp[0].___y ; temp[3] now contains: ; temp[3] = {sin(theta1), cos(theta1), sin(theta2), cos(theta2)} @@ -49,22 +44,22 @@ temp[1].xyz = VE_MAD input[0].xyy_ temp[3].1yx_ temp[1].0yz_ ; x_ = (-z1 * st2) ; z_ = ( z1 * ct2) -temp[2].xz = VE_MUL temp[1].-z_z_ temp[3].z_w_ +temp[2].xz = VE_MUL temp[1].-z_z_ temp[4].x_y_ ; x2 = (x1 * ct2 + nz1st2) ; y2 = (y1 * 1 + 0) ; z2 = (x1 * st2 + z1ct2) -temp[2].xyz = VE_MAD temp[1].xyx_ temp[3].w1z_ temp[2].x0z_ +temp[2].xyz = VE_MAD temp[1].xyx_ temp[4].y1x_ temp[2].x0z_ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; scale ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -temp[2].xyz = VE_MAD temp[1].xyz_ const[1].zzz_ const[1].00w_ +temp[3].xyz = VE_MAD temp[2].xyz_ const[1].zzz_ const[1].00w_ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; output ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -out[0].xyzw = VE_MUL temp[2].xyzz temp[2].11-z1 +out[0].xyzw = VE_MUL temp[3].xyzz temp[3].11-z1 out[1].xyzw = VE_ADD input[1].xyzw input[1].0000 diff --git a/drm/cube_rotate.vs.inc b/drm/cube_rotate.vs.inc index a540730..7d20d6f 100644 --- a/drm/cube_rotate.vs.inc +++ b/drm/cube_rotate.vs.inc @@ -1,18 +1,15 @@ 0x00300003, 0x01f90022, 0x01fc8022, 0x01ffe022, -0x00100004, 0x01ff0000, 0x01ff0002, 0x01ff2002, -0x00100006, 0x01ff0000, 0x01ffe000, 0x01ffe000, -0x00100004, 0x01ff0000, 0x01ff4002, 0x01ff6002, -0x00200004, 0x01ff2000, 0x01ff0002, 0x01ff2002, -0x00200006, 0x01ff2000, 0x01ffe000, 0x01ffe000, -0x00200004, 0x01ff2000, 0x01ff4002, 0x01ff6002, +0x00300004, 0x01f90000, 0x01f90002, 0x01f92002, +0x00300006, 0x01f90000, 0x01ffe000, 0x01ffe000, +0x00300004, 0x01f90000, 0x01fa4002, 0x01fb6002, 0x00106050, 0x003fe000, 0x01ffe000, 0x01ffe000, 0x00206051, 0x003fe000, 0x01ffe000, 0x01ffe000, -0x00406050, 0x007fe000, 0x01ffe000, 0x01ffe000, -0x00806051, 0x007fe000, 0x01ffe000, 0x01ffe000, +0x00108050, 0x007fe000, 0x01ffe000, 0x01ffe000, +0x00208051, 0x007fe000, 0x01ffe000, 0x01ffe000, 0x00602002, 0x05d2e001, 0x01c8e060, 0x01ffe060, -0x00702004, 0x01c90001, 0x01c1a060, 0x01d18020, -0x00504002, 0x03d74020, 0x01df4060, 0x01ffe060, -0x00704004, 0x01c10020, 0x01d56060, 0x01d40040, -0x00704004, 0x01d10020, 0x01d24022, 0x09dc8022, -0x00f00202, 0x01510040, 0x0955a040, 0x01ffe040, +0x00702080, 0x01c90001, 0x01c1a060, 0x01d18020, +0x00504002, 0x03d74020, 0x01cf0080, 0x01ffe080, +0x00704080, 0x01c10020, 0x01c52080, 0x01d40040, +0x00706004, 0x01d10040, 0x01d24022, 0x01dc8022, +0x00f00202, 0x00910060, 0x0955a060, 0x01ffe060, 0x00f02203, 0x00d10021, 0x01248021, 0x01ffe021, diff --git a/drm/texture_cube_clear_zwrite_vertex_shader.c b/drm/texture_cube_clear_zwrite_vertex_shader.c index f065ce6..896abda 100644 --- a/drm/texture_cube_clear_zwrite_vertex_shader.c +++ b/drm/texture_cube_clear_zwrite_vertex_shader.c @@ -159,9 +159,10 @@ static const int fragment_shader_instructions = (fragment_shader_length / 6) - 1 static const uint32_t vertex_shader[] = { //#include "../shader_examples/mesa/texture_cube.vs.txt" #include "cube_rotate.vs.inc" + #include "clear_nop.vs.inc" }; static const int vertex_shader_length = (sizeof (vertex_shader)) / (sizeof (vertex_shader[0])); -static const int vertex_shader_instructions = vertex_shader_length / 4; +static const int vertex_shader_instructions = (vertex_shader_length / 4) - 1; union u32_f32 { uint32_t u32; @@ -228,6 +229,19 @@ int _3d_clear(int ix) T0V(TX_ENABLE, 0x00000000); + ////////////////////////////////////////////////////////////////////////////// + // VAP_PVS + ////////////////////////////////////////////////////////////////////////////// + + T0V(VAP_PVS_CODE_CNTL_0 + , VAP_PVS_CODE_CNTL_0__PVS_FIRST_INST(vertex_shader_instructions) + | VAP_PVS_CODE_CNTL_0__PVS_XYZW_VALID_INST(vertex_shader_instructions) + | VAP_PVS_CODE_CNTL_0__PVS_LAST_INST(vertex_shader_instructions) + ); + T0V(VAP_PVS_CODE_CNTL_1 + , VAP_PVS_CODE_CNTL_1__PVS_LAST_VTX_SRC_INST(vertex_shader_instructions) + ); + ////////////////////////////////////////////////////////////////////////////// // VAP ////////////////////////////////////////////////////////////////////////////// @@ -241,7 +255,7 @@ int _3d_clear(int ix) | VAP_VTE_CNTL__VTX_Z_FMT(1) ); - T0V(VAP_CNTL_STATUS, VAP_CNTL_STATUS__PVS_BYPASS(1)); + T0V(VAP_CNTL_STATUS, VAP_CNTL_STATUS__PVS_BYPASS(0)); T0V(VAP_PROG_STREAM_CNTL_0 , VAP_PROG_STREAM_CNTL__DATA_TYPE_0__FLOAT_2 @@ -252,7 +266,7 @@ int _3d_clear(int ix) T0V(VAP_PROG_STREAM_CNTL_EXT_0 , VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_X_0__SELECT_X | VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_Y_0__SELECT_Y - | VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_Z_0__SELECT_FP_ONE + | VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_Z_0__SELECT_FP_ZERO | VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_W_0__SELECT_FP_ONE | VAP_PROG_STREAM_CNTL_EXT__WRITE_ENA_0(0b1111) ); @@ -321,8 +335,7 @@ int _3d_cube(int ix, float theta) | ZB_CNTL__ZWRITEENABLE__ENABLE // 1 ); T0V(ZB_ZSTENCILCNTL - //, ZB_ZSTENCILCNTL__ZFUNC(5) // greater than - , ZB_ZSTENCILCNTL__ZFUNC__ALWAYS + , ZB_ZSTENCILCNTL__ZFUNC(5) // greater than ); T0V(ZB_FORMAT @@ -408,7 +421,7 @@ int _3d_cube(int ix, float theta) ib[ix++].u32 = 2 * 4; // index into relocs array ////////////////////////////////////////////////////////////////////////////// - // VAP + // VAP_PVS ////////////////////////////////////////////////////////////////////////////// T0V(VAP_PVS_CONST_CNTL @@ -421,7 +434,8 @@ int _3d_cube(int ix, float theta) ); float theta1 = theta; - float theta2 = 3.14 * theta; + //float theta2 = 3.14f * theta; + float theta2 = theta; float consts[] = { I_PI_2, 0.5f, PI_2, -PI, theta1, theta2, 0.2f, 0.5f, @@ -431,6 +445,19 @@ int _3d_cube(int ix, float theta) for (int i = 0; i < consts_length; i++) ib[ix++].f32 = consts[i]; + T0V(VAP_PVS_CODE_CNTL_0 + , VAP_PVS_CODE_CNTL_0__PVS_FIRST_INST(0) + | VAP_PVS_CODE_CNTL_0__PVS_XYZW_VALID_INST((vertex_shader_instructions - 1)) + | VAP_PVS_CODE_CNTL_0__PVS_LAST_INST((vertex_shader_instructions - 1)) + ); + T0V(VAP_PVS_CODE_CNTL_1 + , VAP_PVS_CODE_CNTL_1__PVS_LAST_VTX_SRC_INST((vertex_shader_instructions - 1)) + ); + + ////////////////////////////////////////////////////////////////////////////// + // VAP + ////////////////////////////////////////////////////////////////////////////// + T0V(VAP_CLIP_CNTL , VAP_CLIP_CNTL__PS_UCP_MODE(3) ); @@ -825,15 +852,6 @@ int indirect_buffer(float theta) assert(vertex_shader_length % 4 == 0); printf("vs instructions %d\n", vertex_shader_instructions); - T0V(VAP_PVS_CODE_CNTL_0 - , VAP_PVS_CODE_CNTL_0__PVS_FIRST_INST(0) - | VAP_PVS_CODE_CNTL_0__PVS_XYZW_VALID_INST((vertex_shader_instructions - 1)) - | VAP_PVS_CODE_CNTL_0__PVS_LAST_INST((vertex_shader_instructions - 1)) - ); - T0V(VAP_PVS_CODE_CNTL_1 - , VAP_PVS_CODE_CNTL_1__PVS_LAST_VTX_SRC_INST((vertex_shader_instructions - 1)) - ); - T0V(VAP_PVS_VECTOR_INDX_REG , VAP_PVS_VECTOR_INDX_REG__OCTWORD_OFFSET(0) ); diff --git a/regs/assembler/__main__.py b/regs/assembler/__main__.py index 4ed3963..9dd3337 100644 --- a/regs/assembler/__main__.py +++ b/regs/assembler/__main__.py @@ -3,6 +3,7 @@ import sys from assembler.lexer import Lexer, LexerError from assembler.parser import Parser, ParserError from assembler.emitter import emit_instruction +from assembler.validator import validate_instruction sample = b""" temp[0].xyzw = VE_ADD const[1].xyzw const[1].0000 const[1].0000 @@ -22,6 +23,7 @@ def frontend_inner(buf): tokens = list(lexer.lex_tokens()) parser = Parser(tokens) for ins, start_end in parser.instructions(): + ins = validate_instruction(ins) yield list(emit_instruction(ins)), start_end def print_error(filename, buf, e): diff --git a/regs/assembler/emitter.py b/regs/assembler/emitter.py index 0e57c49..53bbb62 100644 --- a/regs/assembler/emitter.py +++ b/regs/assembler/emitter.py @@ -1,4 +1,4 @@ -from assembler.keywords import ME, VE, KW +from assembler.keywords import ME, VE, MVE, KW from assembler.parser import Instruction, DestinationOp, Source import pvs_dst import pvs_src @@ -34,8 +34,10 @@ def dst_reg_type(kw): assert not "Invalid PVS_DST_REG", kw def emit_destination_op(dst_op: DestinationOp): - assert type(dst_op.opcode) in {ME, VE} + assert type(dst_op.opcode) in {ME, VE, MVE} math_inst = int(type(dst_op.opcode) is ME) + if dst_op.macro: + assert dst_op.opcode.value in {0, 1} value = ( pvs_dst.OPCODE_gen(dst_op.opcode.value) | pvs_dst.MATH_INST_gen(math_inst) @@ -45,6 +47,7 @@ def emit_destination_op(dst_op: DestinationOp): | pvs_dst.WE_Y_gen(we_y(dst_op.write_enable)) | pvs_dst.WE_Z_gen(we_z(dst_op.write_enable)) | pvs_dst.WE_W_gen(we_w(dst_op.write_enable)) + | pvs_dst.MACRO_INST_gen(int(dst_op.macro)) ) yield value diff --git a/regs/assembler/keywords.py b/regs/assembler/keywords.py index 273357c..f0e6a05 100644 --- a/regs/assembler/keywords.py +++ b/regs/assembler/keywords.py @@ -2,6 +2,12 @@ from dataclasses import dataclass from typing import Optional from enum import Enum, auto +@dataclass +class MVE: + name: str + synonym: Optional[str] + value: int + @dataclass class VE: name: str @@ -14,6 +20,11 @@ class ME: synonym: Optional[str] value: int +macro_vector_operations = [ + MVE(b"MACRO_OP_2CLK_MADD" , None , 0), + MVE(b"MACRO_OP_2CLK_M2X_ADD" , None , 1), +] + vector_operations = [ # name synonym value VE(b"VECTOR_NO_OP" , b"VE_NOP" , 0), diff --git a/regs/assembler/parser.py b/regs/assembler/parser.py index c08a90c..40a6c13 100644 --- a/regs/assembler/parser.py +++ b/regs/assembler/parser.py @@ -25,6 +25,7 @@ class DestinationOp: offset: int write_enable: set[int] opcode: Union[VE, ME] + macro: bool @dataclass class SourceSwizzle: @@ -172,7 +173,8 @@ class Parser: write_enable = parse_dest_write_enable(write_enable_token) self.consume(TT.equal, "expected equals") opcode = self.opcode() - return DestinationOp(destination_type, offset_value, write_enable, opcode) + macro = False + return DestinationOp(destination_type, offset_value, write_enable, opcode, macro) def source_type(self): token = self.consume(TT.keyword, "expected source type") diff --git a/regs/assembler/validator.py b/regs/assembler/validator.py new file mode 100644 index 0000000..971ba2c --- /dev/null +++ b/regs/assembler/validator.py @@ -0,0 +1,25 @@ +from assembler.keywords import ME, VE, macro_vector_operations + +class ValidatorError(Exception): + pass + +def validate_instruction(ins): + addresses = len(set( + source.offset + for source in [ins.source0, ins.source1, ins.source2] + if source is not None + )) + if addresses > 2: + if type(ins.destination_op.opcode) is not VE: + raise ValidatorError("too many addresses for non-VE instruction", ins) + if ins.destination_op.opcode.name not in {b"VE_MULTIPLYX2_ADD", b"VE_MULTIPLY_ADD"}: + raise ValidatorError("too many addresses for VE non-multiply-add instruction", ins) + assert ins.destination_op.macro == False, ins + ins.destination_op.macro = True + if ins.destination_op.opcode.name == b"VE_MULTIPLY_ADD": + ins.destination_op.opcode = macro_vector_operations[0] + elif ins.destination_op.opcode.name == b"VE_MULTIPLYX2_ADD": + ins.destination_op.opcode = macro_vector_operations[1] + else: + assert False + return ins diff --git a/regs/pvs_bits/pvs_opcode_and_destination_operand_bits.txt b/regs/pvs_bits/pvs_opcode_and_destination_operand_bits.txt index 57eae2a..8e3ba9e 100644 --- a/regs/pvs_bits/pvs_opcode_and_destination_operand_bits.txt +++ b/regs/pvs_bits/pvs_opcode_and_destination_operand_bits.txt @@ -1,3 +1,6 @@ +MACRO_OPCODE: +MACRO_OP_2CLK_MADD = 0 +MACRO_OP_2CLK_M2X_ADD = 1 VECTOR_OPCODE: VECTOR_NO_OP = 0 VE_DOT_PRODUCT = 1 diff --git a/regs/pvs_disassemble.py b/regs/pvs_disassemble.py index 9d18c87..c90dea6 100644 --- a/regs/pvs_disassemble.py +++ b/regs/pvs_disassemble.py @@ -88,7 +88,6 @@ def parse_dst_op(dst_op): addr_sel = pvs_dst.ADDR_SEL(dst_op) assert addr_mode == 0 - assert macro_inst == 0 assert pred_enable == 0 assert pred_sense == 0 assert dual_math_op == 0 @@ -102,7 +101,10 @@ def parse_dst_op(dst_op): parts.append(f"{reg_str}[{offset}].{we_swizzle}") if math_inst: + assert not macro_inst parts.append(op_substitutions(pvs_dst_bits.MATH_OPCODE[opcode])) + elif macro_inst: + parts.append(op_substitutions(pvs_dst_bits.MACRO_OPCODE[opcode])) else: parts.append(op_substitutions(pvs_dst_bits.VECTOR_OPCODE[opcode]))