From 2b90395b2de816a3e0ce730808f04452a6697add Mon Sep 17 00:00:00 2001 From: Zack Buhman Date: Fri, 31 Oct 2025 20:15:11 -0500 Subject: [PATCH] matrix_cubesphere_specular: rewrite specular fragment shader --- drm/matrix_cubesphere_specular.cpp | 4 +- drm/matrix_cubesphere_specular.fs.asm | 149 +++++++++++++------------- drm/matrix_cubesphere_specular.vs.asm | 12 ++- regs/assembler/vs/validator.py | 2 +- 4 files changed, 86 insertions(+), 81 deletions(-) diff --git a/drm/matrix_cubesphere_specular.cpp b/drm/matrix_cubesphere_specular.cpp index ab7bb7f..edaa360 100644 --- a/drm/matrix_cubesphere_specular.cpp +++ b/drm/matrix_cubesphere_specular.cpp @@ -365,8 +365,8 @@ int _3d_cube_inner(int ix, mat4x4 trans, mat4x4 world_trans, vec4 light_pos, vec vec2 t = model->texture[obj->triangle[i][j].texture]; vec3 n = model->normal[obj->triangle[i][j].normal]; - fprintf(stderr, "% 2.03f, % 2.03f, % 2.03f, % 2.03f, % 2.03f, % 2.03f,\n", - p.x, p.y, p.z, n.x, n.y, n.z); + //fprintf(stderr, "% 2.03f, % 2.03f, % 2.03f, % 2.03f, % 2.03f, % 2.03f,\n", + //p.x, p.y, p.z, n.x, n.y, n.z); ib[ix++].f32 = p.x; ib[ix++].f32 = p.y; diff --git a/drm/matrix_cubesphere_specular.fs.asm b/drm/matrix_cubesphere_specular.fs.asm index ebebd59..609cefd 100644 --- a/drm/matrix_cubesphere_specular.fs.asm +++ b/drm/matrix_cubesphere_specular.fs.asm @@ -9,94 +9,97 @@ TEX TEX_SEM_WAIT TEX_SEM_ACQUIRE temp[4].rgba = LD tex[0].rgba temp[4].rgaa ; +-- -- normal = normalize(normal) --- normal = (1.0 / sqrt(dot(normal, normal))) * normal -src0.rgb = temp[1] : +-- +src0.rgb = temp[1] : -- normal DP3 src0.rgb src0.rgb , temp[1].a = DP ; src0.a = temp[1] : - temp[1].a = RSQ |src0.a| ; -src0.a = temp[1], src0.rgb = temp[1] : + temp[1].a = RSQ |src0.a| ; +src0.a = temp[1] , +src0.rgb = temp[1] : -- normal temp[1].rgb = MAD src0.rgb src0.aaa src0.000 ; --- light_dir = normalize((f_light_pos - f_world_pos)) -src1.rgb = temp[2] , -- f_light_pos -src0.rgb = temp[0] , -- f_world_pos -srcp.rgb = neg : -- (f_light_pos - f_world_pos) - DP3 srcp.rgb srcp.rgb , +-- +-- light_dir = light_pos - world_pos +-- +src0.rgb = temp[2] , -- light pos +src1.rgb = temp[0] : -- world pos + temp[2].rgb = MAD src0.111 src0.rgb -src1.rgb ; + +-- +-- light_dir = normalize(light_dir) +-- +src0.rgb = temp[2] : -- light_dir + DP3 src0.rgb src0.rgb , temp[2].a = DP ; src0.a = temp[2] : - temp[2].a = RSQ |src0.a| ; -src0.a = temp[2], src0.rgb = temp[2] : + temp[2].a = RSQ |src0.a| ; +src0.a = temp[2] , +src0.rgb = temp[2] : -- light_dir temp[2].rgb = MAD src0.rgb src0.aaa src0.000 ; --- diff = dot(normal, light_dir) -src0.rgb = temp[2] , -src1.rgb = temp[1] : - DP3 src0.rgb src1.rgb , - temp[5].a = DP ; +-- +-- view_dir = view_pos - world_pos +-- +src0.rgb = temp[3] , -- view pos +src1.rgb = temp[0] : -- world pos + temp[3].rgb = MAD src0.111 src0.rgb -src1.rgb ; --- diff = max(diff, 0) +-- +-- view_dir = normalize(view_dir) +-- +src0.rgb = temp[3] : -- view dir + DP3 src0.rgb src0.rgb , + temp[3].a = DP ; +src0.a = temp[3] : + temp[3].a = RSQ |src0.a| ; +src0.a = temp[3] , +src0.rgb = temp[3] : -- view dir + temp[3].rgb = MAD src0.rgb src0.aaa src0.000 ; + +-- +-- reflect_dir = reflect(light_dir, normal) +-- +-- dotLN = dot(-light_dir, normal) +src0.rgb = temp[2] , -- light dir +src1.rgb = temp[1] : -- normal + DP3 -src0.rgb src1.rgb , + temp[5].a = DP ; +-- dotLN = 2.0 * dotLN +src0.a = temp[5] , -- dotLN +src1.a = float(64) : -- 2.0 + temp[5].a = MAD src0.a src1.a src0.0 ; +-- dotLN = -dotLN * normal + -light_dir +src0.a = temp[5] , -- dotLN +src1.rgb = temp[1] , -- normal +src2.rgb = temp[2] : -- light dir + temp[5].rgb = MAD -src0.aaa src1.rgb -src2.rgb ; + +-- +-- spec = max(dot(view_dir, reflect_dir), 0.0) +-- +src0.rgb = temp[3] , +src1.rgb = temp[5] : + DP3 src0.rgb src1.rgb , + temp[5].a = DP ; src0.a = temp[5] : temp[5].a = MAX src0.a src0.0 ; --- intensity = diff + 0.125 -src0.a = temp[5] , -src1.a = float(32) : -- 0.125 - temp[5].a = MAD src0.a src0.1 src1.a ; - -- --- specular --- --- temp[3] -- view pos (world space) --- view_dir = normalize(f_view_pos - f_world_pos) -src1.rgb = temp[3] , -- f_light_pos -src0.rgb = temp[0] , -- f_world_pos -srcp.rgb = neg : -- (f_light_pos - f_world_pos) - DP3 srcp.rgb srcp.rgb , - temp[3].a = DP ; -src0.a = temp[3] : - temp[3].a = RSQ |src0.a| ; -src0.a = temp[3], src0.rgb = temp[3] : - temp[3].rgb = MAD src0.rgb src0.aaa src0.000 ; - --- reflect(I, N) --- I - 2.0 * dot(N, I) * N --- reflect_dir = reflect(-light_dir, norm) --- reflect_dir = reflect(-temp[2], temp[1]) --- I - 2.0 * dot(N, I) * N --- - (2.0 * dot(N, I)) * temp[1] + -temp[2] -src0.rgb = temp[1] , -- N=normal -src1.rgb = temp[2] : -- I=light_dir dot(N, -I) - temp[6].r = DP3 src0.rgb -src1.rgb ; -src0.rgb = temp[6] , -src1.rgb = float(64) : -- 2.0 - temp[6].r = MAD src0.r00 src1.r00 src0.000 ; -src0.rgb = temp[6] , -src1.rgb = temp[1] , -- N -src2.rgb = temp[2] : -- I - temp[6].rgb = MAD -src0.rrr src1.rgb -src2.rgb ; - --- spec = max(dot(view_dir, reflect_dir), 0.0) -src0.rgb = temp[6] , -- reflect_dir -src1.rgb = temp[3] : -- view_dir - temp[6].r = DP3 src0.rgb src1.rgb ; -src0.rgb = temp[6] : - temp[6].a = MAX src0.r src0.0 ; - -- spec = pow(spec, 32) -src0.a = temp[6] : - temp[6].a = LN2 src0.a ; -src0.a = temp[6] , -src1.a = float(72) : -- 32 - temp[6].a = MAD src0.a src1.a src1.0 ; -src0.a = temp[6] : - temp[6].a = EX2 src0.a ; +-- +src0.a = temp[5] : -- spec + temp[5].a = LN2 src0.a ; +src0.a = temp[5] , -- spec +src1.a = float(96) : -- 32 + temp[5].a = MAD src0.a src1.a src1.0 ; +src0.a = temp[5] : -- spec + temp[5].a = EX2 src0.a ; OUT TEX_SEM_WAIT -src0.a = temp[4], -src0.rgb = temp[4] , -src1.a = temp[6] , -src1.rgb = temp[6] : - out[0].a = MAX src0.a src0.a , - out[0].rgb = MAD src0.111 src1.aaa src1.000 ; +src1.a = temp[5] , +src1.rgb = temp[5] : + out[0].a = MAX src1.1 src0.1 , + out[0].rgb = MAD src1.111 src1.aaa src1.000 ; diff --git a/drm/matrix_cubesphere_specular.vs.asm b/drm/matrix_cubesphere_specular.vs.asm index 86a4d47..d5aba49 100644 --- a/drm/matrix_cubesphere_specular.vs.asm +++ b/drm/matrix_cubesphere_specular.vs.asm @@ -37,12 +37,14 @@ temp[3].z = VE_DOT const[6].xyz0 input[2].xyz0 ; -- position (clip space) out[0].xyzw = VE_ADD temp[1].xyzw const[0].0000 ; -- position (world space) -out[1].xyzw = VE_ADD temp[2].xyzw const[0].0000 ; +out[1].xyzw = VE_ADD temp[2].xyzw const[0].0000 ; -- 0 world pos -- normal -out[2].xyzw = VE_ADD temp[3].xyz0 const[0].0000 ; +out[2].xyzw = VE_ADD temp[3].xyz0 const[0].0000 ; -- 1 normal -- light pos (world space) -out[3].xyzw = VE_ADD const[8].xyzw const[8].0000 ; +out[3].xyzw = VE_ADD const[8].xyzw const[8].0000 ; -- 2 light pos / light dir -- view pos (world space) -out[4].xyzw = VE_ADD const[9].xyzw const[9].0000 ; +out[4].xyzw = VE_ADD const[9].xyzw const[9].0000 ; -- 3 view pos / view dir -- texture -out[5].xyzw = VE_ADD input[1].xy00 const[0].0000 ; +out[5].xyzw = VE_ADD input[1].xy00 const[0].0000 ; -- 4 texture + + -- 5 reflect dir diff --git a/regs/assembler/vs/validator.py b/regs/assembler/vs/validator.py index dd84fa7..c714379 100644 --- a/regs/assembler/vs/validator.py +++ b/regs/assembler/vs/validator.py @@ -382,7 +382,7 @@ def validate_instruction(ins): if len(opcodes) == 2: return validate_dual_math_instruction(ins.operations, opcodes) else: - assert len(opcodes) == 1 + assert len(opcodes) == 1, (opcodes, ins) return validate_instruction_inner(ins.operations[0], opcodes[0]) if __name__ == "__main__":