drm/texture_cube_clear_zwrite_vertex_shader: fixed

This commit is contained in:
Zack Buhman 2025-10-18 21:54:41 -05:00
parent 0743f780af
commit d2e81516a1
11 changed files with 106 additions and 47 deletions

1
drm/clear_nop.vs.asm Normal file
View File

@ -0,0 +1 @@
out[0].xyzw = VE_ADD input[0].xyzw input[0].0000 input[0].0000

View File

@ -12,20 +12,15 @@ temp[0].xy = VE_ADD const[1].xy__ const[1].00__
; calculation. ; calculation.
; ;
; This 3-instruction sequence linearly remaps the range [-∞,+∞] to [-π,+π] ; This 3-instruction sequence linearly remaps the range [-∞,+∞] to [-π,+π]
temp[0].x = VE_MAD temp[0].x___ const[0].x___ const[0].y___ temp[0].xy = VE_MAD temp[0].xy__ const[0].xx__ const[0].yy__
temp[0].x = VE_FRC temp[0].x___ temp[0].xy = VE_FRC temp[0].xy__
temp[0].x = VE_MAD temp[0].x___ const[0].z___ const[0].w___ temp[0].xy = VE_MAD temp[0].xy__ const[0].zz__ const[0].ww__
; the same thing, but for temp[0].y
temp[0].y = VE_MAD temp[0].y___ const[0].x___ const[0].y___
temp[0].y = VE_FRC temp[0].y___
temp[0].y = VE_MAD temp[0].y___ const[0].z___ const[0].w___
; sin and cos ; sin and cos
temp[3].x = ME_SIN temp[0].___x temp[3].x = ME_SIN temp[0].___x
temp[3].y = ME_COS temp[0].___x temp[3].y = ME_COS temp[0].___x
temp[3].z = ME_SIN temp[0].___y temp[4].x = ME_SIN temp[0].___y
temp[3].w = ME_COS temp[0].___y temp[4].y = ME_COS temp[0].___y
; temp[3] now contains: ; temp[3] now contains:
; temp[3] = {sin(theta1), cos(theta1), sin(theta2), cos(theta2)} ; temp[3] = {sin(theta1), cos(theta1), sin(theta2), cos(theta2)}
@ -49,22 +44,22 @@ temp[1].xyz = VE_MAD input[0].xyy_ temp[3].1yx_ temp[1].0yz_
; x_ = (-z1 * st2) ; x_ = (-z1 * st2)
; z_ = ( z1 * ct2) ; z_ = ( z1 * ct2)
temp[2].xz = VE_MUL temp[1].-z_z_ temp[3].z_w_ temp[2].xz = VE_MUL temp[1].-z_z_ temp[4].x_y_
; x2 = (x1 * ct2 + nz1st2) ; x2 = (x1 * ct2 + nz1st2)
; y2 = (y1 * 1 + 0) ; y2 = (y1 * 1 + 0)
; z2 = (x1 * st2 + z1ct2) ; z2 = (x1 * st2 + z1ct2)
temp[2].xyz = VE_MAD temp[1].xyx_ temp[3].w1z_ temp[2].x0z_ temp[2].xyz = VE_MAD temp[1].xyx_ temp[4].y1x_ temp[2].x0z_
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; scale ; scale
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
temp[2].xyz = VE_MAD temp[1].xyz_ const[1].zzz_ const[1].00w_ temp[3].xyz = VE_MAD temp[2].xyz_ const[1].zzz_ const[1].00w_
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; output ; output
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
out[0].xyzw = VE_MUL temp[2].xyzz temp[2].11-z1 out[0].xyzw = VE_MUL temp[3].xyzz temp[3].11-z1
out[1].xyzw = VE_ADD input[1].xyzw input[1].0000 out[1].xyzw = VE_ADD input[1].xyzw input[1].0000

View File

@ -1,18 +1,15 @@
0x00300003, 0x01f90022, 0x01fc8022, 0x01ffe022, 0x00300003, 0x01f90022, 0x01fc8022, 0x01ffe022,
0x00100004, 0x01ff0000, 0x01ff0002, 0x01ff2002, 0x00300004, 0x01f90000, 0x01f90002, 0x01f92002,
0x00100006, 0x01ff0000, 0x01ffe000, 0x01ffe000, 0x00300006, 0x01f90000, 0x01ffe000, 0x01ffe000,
0x00100004, 0x01ff0000, 0x01ff4002, 0x01ff6002, 0x00300004, 0x01f90000, 0x01fa4002, 0x01fb6002,
0x00200004, 0x01ff2000, 0x01ff0002, 0x01ff2002,
0x00200006, 0x01ff2000, 0x01ffe000, 0x01ffe000,
0x00200004, 0x01ff2000, 0x01ff4002, 0x01ff6002,
0x00106050, 0x003fe000, 0x01ffe000, 0x01ffe000, 0x00106050, 0x003fe000, 0x01ffe000, 0x01ffe000,
0x00206051, 0x003fe000, 0x01ffe000, 0x01ffe000, 0x00206051, 0x003fe000, 0x01ffe000, 0x01ffe000,
0x00406050, 0x007fe000, 0x01ffe000, 0x01ffe000, 0x00108050, 0x007fe000, 0x01ffe000, 0x01ffe000,
0x00806051, 0x007fe000, 0x01ffe000, 0x01ffe000, 0x00208051, 0x007fe000, 0x01ffe000, 0x01ffe000,
0x00602002, 0x05d2e001, 0x01c8e060, 0x01ffe060, 0x00602002, 0x05d2e001, 0x01c8e060, 0x01ffe060,
0x00702004, 0x01c90001, 0x01c1a060, 0x01d18020, 0x00702080, 0x01c90001, 0x01c1a060, 0x01d18020,
0x00504002, 0x03d74020, 0x01df4060, 0x01ffe060, 0x00504002, 0x03d74020, 0x01cf0080, 0x01ffe080,
0x00704004, 0x01c10020, 0x01d56060, 0x01d40040, 0x00704080, 0x01c10020, 0x01c52080, 0x01d40040,
0x00704004, 0x01d10020, 0x01d24022, 0x09dc8022, 0x00706004, 0x01d10040, 0x01d24022, 0x01dc8022,
0x00f00202, 0x01510040, 0x0955a040, 0x01ffe040, 0x00f00202, 0x00910060, 0x0955a060, 0x01ffe060,
0x00f02203, 0x00d10021, 0x01248021, 0x01ffe021, 0x00f02203, 0x00d10021, 0x01248021, 0x01ffe021,

View File

@ -159,9 +159,10 @@ static const int fragment_shader_instructions = (fragment_shader_length / 6) - 1
static const uint32_t vertex_shader[] = { static const uint32_t vertex_shader[] = {
//#include "../shader_examples/mesa/texture_cube.vs.txt" //#include "../shader_examples/mesa/texture_cube.vs.txt"
#include "cube_rotate.vs.inc" #include "cube_rotate.vs.inc"
#include "clear_nop.vs.inc"
}; };
static const int vertex_shader_length = (sizeof (vertex_shader)) / (sizeof (vertex_shader[0])); static const int vertex_shader_length = (sizeof (vertex_shader)) / (sizeof (vertex_shader[0]));
static const int vertex_shader_instructions = vertex_shader_length / 4; static const int vertex_shader_instructions = (vertex_shader_length / 4) - 1;
union u32_f32 { union u32_f32 {
uint32_t u32; uint32_t u32;
@ -228,6 +229,19 @@ int _3d_clear(int ix)
T0V(TX_ENABLE, 0x00000000); T0V(TX_ENABLE, 0x00000000);
//////////////////////////////////////////////////////////////////////////////
// VAP_PVS
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_PVS_CODE_CNTL_0
, VAP_PVS_CODE_CNTL_0__PVS_FIRST_INST(vertex_shader_instructions)
| VAP_PVS_CODE_CNTL_0__PVS_XYZW_VALID_INST(vertex_shader_instructions)
| VAP_PVS_CODE_CNTL_0__PVS_LAST_INST(vertex_shader_instructions)
);
T0V(VAP_PVS_CODE_CNTL_1
, VAP_PVS_CODE_CNTL_1__PVS_LAST_VTX_SRC_INST(vertex_shader_instructions)
);
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
// VAP // VAP
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
@ -241,7 +255,7 @@ int _3d_clear(int ix)
| VAP_VTE_CNTL__VTX_Z_FMT(1) | VAP_VTE_CNTL__VTX_Z_FMT(1)
); );
T0V(VAP_CNTL_STATUS, VAP_CNTL_STATUS__PVS_BYPASS(1)); T0V(VAP_CNTL_STATUS, VAP_CNTL_STATUS__PVS_BYPASS(0));
T0V(VAP_PROG_STREAM_CNTL_0 T0V(VAP_PROG_STREAM_CNTL_0
, VAP_PROG_STREAM_CNTL__DATA_TYPE_0__FLOAT_2 , VAP_PROG_STREAM_CNTL__DATA_TYPE_0__FLOAT_2
@ -252,7 +266,7 @@ int _3d_clear(int ix)
T0V(VAP_PROG_STREAM_CNTL_EXT_0 T0V(VAP_PROG_STREAM_CNTL_EXT_0
, VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_X_0__SELECT_X , VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_X_0__SELECT_X
| VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_Y_0__SELECT_Y | VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_Y_0__SELECT_Y
| VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_Z_0__SELECT_FP_ONE | VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_Z_0__SELECT_FP_ZERO
| VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_W_0__SELECT_FP_ONE | VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_W_0__SELECT_FP_ONE
| VAP_PROG_STREAM_CNTL_EXT__WRITE_ENA_0(0b1111) | VAP_PROG_STREAM_CNTL_EXT__WRITE_ENA_0(0b1111)
); );
@ -321,8 +335,7 @@ int _3d_cube(int ix, float theta)
| ZB_CNTL__ZWRITEENABLE__ENABLE // 1 | ZB_CNTL__ZWRITEENABLE__ENABLE // 1
); );
T0V(ZB_ZSTENCILCNTL T0V(ZB_ZSTENCILCNTL
//, ZB_ZSTENCILCNTL__ZFUNC(5) // greater than , ZB_ZSTENCILCNTL__ZFUNC(5) // greater than
, ZB_ZSTENCILCNTL__ZFUNC__ALWAYS
); );
T0V(ZB_FORMAT T0V(ZB_FORMAT
@ -408,7 +421,7 @@ int _3d_cube(int ix, float theta)
ib[ix++].u32 = 2 * 4; // index into relocs array ib[ix++].u32 = 2 * 4; // index into relocs array
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
// VAP // VAP_PVS
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
T0V(VAP_PVS_CONST_CNTL T0V(VAP_PVS_CONST_CNTL
@ -421,7 +434,8 @@ int _3d_cube(int ix, float theta)
); );
float theta1 = theta; float theta1 = theta;
float theta2 = 3.14 * theta; //float theta2 = 3.14f * theta;
float theta2 = theta;
float consts[] = { float consts[] = {
I_PI_2, 0.5f, PI_2, -PI, I_PI_2, 0.5f, PI_2, -PI,
theta1, theta2, 0.2f, 0.5f, theta1, theta2, 0.2f, 0.5f,
@ -431,6 +445,19 @@ int _3d_cube(int ix, float theta)
for (int i = 0; i < consts_length; i++) for (int i = 0; i < consts_length; i++)
ib[ix++].f32 = consts[i]; ib[ix++].f32 = consts[i];
T0V(VAP_PVS_CODE_CNTL_0
, VAP_PVS_CODE_CNTL_0__PVS_FIRST_INST(0)
| VAP_PVS_CODE_CNTL_0__PVS_XYZW_VALID_INST((vertex_shader_instructions - 1))
| VAP_PVS_CODE_CNTL_0__PVS_LAST_INST((vertex_shader_instructions - 1))
);
T0V(VAP_PVS_CODE_CNTL_1
, VAP_PVS_CODE_CNTL_1__PVS_LAST_VTX_SRC_INST((vertex_shader_instructions - 1))
);
//////////////////////////////////////////////////////////////////////////////
// VAP
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_CLIP_CNTL T0V(VAP_CLIP_CNTL
, VAP_CLIP_CNTL__PS_UCP_MODE(3) , VAP_CLIP_CNTL__PS_UCP_MODE(3)
); );
@ -825,15 +852,6 @@ int indirect_buffer(float theta)
assert(vertex_shader_length % 4 == 0); assert(vertex_shader_length % 4 == 0);
printf("vs instructions %d\n", vertex_shader_instructions); printf("vs instructions %d\n", vertex_shader_instructions);
T0V(VAP_PVS_CODE_CNTL_0
, VAP_PVS_CODE_CNTL_0__PVS_FIRST_INST(0)
| VAP_PVS_CODE_CNTL_0__PVS_XYZW_VALID_INST((vertex_shader_instructions - 1))
| VAP_PVS_CODE_CNTL_0__PVS_LAST_INST((vertex_shader_instructions - 1))
);
T0V(VAP_PVS_CODE_CNTL_1
, VAP_PVS_CODE_CNTL_1__PVS_LAST_VTX_SRC_INST((vertex_shader_instructions - 1))
);
T0V(VAP_PVS_VECTOR_INDX_REG T0V(VAP_PVS_VECTOR_INDX_REG
, VAP_PVS_VECTOR_INDX_REG__OCTWORD_OFFSET(0) , VAP_PVS_VECTOR_INDX_REG__OCTWORD_OFFSET(0)
); );

View File

@ -3,6 +3,7 @@ import sys
from assembler.lexer import Lexer, LexerError from assembler.lexer import Lexer, LexerError
from assembler.parser import Parser, ParserError from assembler.parser import Parser, ParserError
from assembler.emitter import emit_instruction from assembler.emitter import emit_instruction
from assembler.validator import validate_instruction
sample = b""" sample = b"""
temp[0].xyzw = VE_ADD const[1].xyzw const[1].0000 const[1].0000 temp[0].xyzw = VE_ADD const[1].xyzw const[1].0000 const[1].0000
@ -22,6 +23,7 @@ def frontend_inner(buf):
tokens = list(lexer.lex_tokens()) tokens = list(lexer.lex_tokens())
parser = Parser(tokens) parser = Parser(tokens)
for ins, start_end in parser.instructions(): for ins, start_end in parser.instructions():
ins = validate_instruction(ins)
yield list(emit_instruction(ins)), start_end yield list(emit_instruction(ins)), start_end
def print_error(filename, buf, e): def print_error(filename, buf, e):

View File

@ -1,4 +1,4 @@
from assembler.keywords import ME, VE, KW from assembler.keywords import ME, VE, MVE, KW
from assembler.parser import Instruction, DestinationOp, Source from assembler.parser import Instruction, DestinationOp, Source
import pvs_dst import pvs_dst
import pvs_src import pvs_src
@ -34,8 +34,10 @@ def dst_reg_type(kw):
assert not "Invalid PVS_DST_REG", kw assert not "Invalid PVS_DST_REG", kw
def emit_destination_op(dst_op: DestinationOp): def emit_destination_op(dst_op: DestinationOp):
assert type(dst_op.opcode) in {ME, VE} assert type(dst_op.opcode) in {ME, VE, MVE}
math_inst = int(type(dst_op.opcode) is ME) math_inst = int(type(dst_op.opcode) is ME)
if dst_op.macro:
assert dst_op.opcode.value in {0, 1}
value = ( value = (
pvs_dst.OPCODE_gen(dst_op.opcode.value) pvs_dst.OPCODE_gen(dst_op.opcode.value)
| pvs_dst.MATH_INST_gen(math_inst) | pvs_dst.MATH_INST_gen(math_inst)
@ -45,6 +47,7 @@ def emit_destination_op(dst_op: DestinationOp):
| pvs_dst.WE_Y_gen(we_y(dst_op.write_enable)) | pvs_dst.WE_Y_gen(we_y(dst_op.write_enable))
| pvs_dst.WE_Z_gen(we_z(dst_op.write_enable)) | pvs_dst.WE_Z_gen(we_z(dst_op.write_enable))
| pvs_dst.WE_W_gen(we_w(dst_op.write_enable)) | pvs_dst.WE_W_gen(we_w(dst_op.write_enable))
| pvs_dst.MACRO_INST_gen(int(dst_op.macro))
) )
yield value yield value

View File

@ -2,6 +2,12 @@ from dataclasses import dataclass
from typing import Optional from typing import Optional
from enum import Enum, auto from enum import Enum, auto
@dataclass
class MVE:
name: str
synonym: Optional[str]
value: int
@dataclass @dataclass
class VE: class VE:
name: str name: str
@ -14,6 +20,11 @@ class ME:
synonym: Optional[str] synonym: Optional[str]
value: int value: int
macro_vector_operations = [
MVE(b"MACRO_OP_2CLK_MADD" , None , 0),
MVE(b"MACRO_OP_2CLK_M2X_ADD" , None , 1),
]
vector_operations = [ vector_operations = [
# name synonym value # name synonym value
VE(b"VECTOR_NO_OP" , b"VE_NOP" , 0), VE(b"VECTOR_NO_OP" , b"VE_NOP" , 0),

View File

@ -25,6 +25,7 @@ class DestinationOp:
offset: int offset: int
write_enable: set[int] write_enable: set[int]
opcode: Union[VE, ME] opcode: Union[VE, ME]
macro: bool
@dataclass @dataclass
class SourceSwizzle: class SourceSwizzle:
@ -172,7 +173,8 @@ class Parser:
write_enable = parse_dest_write_enable(write_enable_token) write_enable = parse_dest_write_enable(write_enable_token)
self.consume(TT.equal, "expected equals") self.consume(TT.equal, "expected equals")
opcode = self.opcode() opcode = self.opcode()
return DestinationOp(destination_type, offset_value, write_enable, opcode) macro = False
return DestinationOp(destination_type, offset_value, write_enable, opcode, macro)
def source_type(self): def source_type(self):
token = self.consume(TT.keyword, "expected source type") token = self.consume(TT.keyword, "expected source type")

View File

@ -0,0 +1,25 @@
from assembler.keywords import ME, VE, macro_vector_operations
class ValidatorError(Exception):
pass
def validate_instruction(ins):
addresses = len(set(
source.offset
for source in [ins.source0, ins.source1, ins.source2]
if source is not None
))
if addresses > 2:
if type(ins.destination_op.opcode) is not VE:
raise ValidatorError("too many addresses for non-VE instruction", ins)
if ins.destination_op.opcode.name not in {b"VE_MULTIPLYX2_ADD", b"VE_MULTIPLY_ADD"}:
raise ValidatorError("too many addresses for VE non-multiply-add instruction", ins)
assert ins.destination_op.macro == False, ins
ins.destination_op.macro = True
if ins.destination_op.opcode.name == b"VE_MULTIPLY_ADD":
ins.destination_op.opcode = macro_vector_operations[0]
elif ins.destination_op.opcode.name == b"VE_MULTIPLYX2_ADD":
ins.destination_op.opcode = macro_vector_operations[1]
else:
assert False
return ins

View File

@ -1,3 +1,6 @@
MACRO_OPCODE:
MACRO_OP_2CLK_MADD = 0
MACRO_OP_2CLK_M2X_ADD = 1
VECTOR_OPCODE: VECTOR_OPCODE:
VECTOR_NO_OP = 0 VECTOR_NO_OP = 0
VE_DOT_PRODUCT = 1 VE_DOT_PRODUCT = 1

View File

@ -88,7 +88,6 @@ def parse_dst_op(dst_op):
addr_sel = pvs_dst.ADDR_SEL(dst_op) addr_sel = pvs_dst.ADDR_SEL(dst_op)
assert addr_mode == 0 assert addr_mode == 0
assert macro_inst == 0
assert pred_enable == 0 assert pred_enable == 0
assert pred_sense == 0 assert pred_sense == 0
assert dual_math_op == 0 assert dual_math_op == 0
@ -102,7 +101,10 @@ def parse_dst_op(dst_op):
parts.append(f"{reg_str}[{offset}].{we_swizzle}") parts.append(f"{reg_str}[{offset}].{we_swizzle}")
if math_inst: if math_inst:
assert not macro_inst
parts.append(op_substitutions(pvs_dst_bits.MATH_OPCODE[opcode])) parts.append(op_substitutions(pvs_dst_bits.MATH_OPCODE[opcode]))
elif macro_inst:
parts.append(op_substitutions(pvs_dst_bits.MACRO_OPCODE[opcode]))
else: else:
parts.append(op_substitutions(pvs_dst_bits.VECTOR_OPCODE[opcode])) parts.append(op_substitutions(pvs_dst_bits.VECTOR_OPCODE[opcode]))