diff --git a/drm/matrix_cubesphere.cpp b/drm/matrix_cubesphere.cpp index 4980b2f..31d6c62 100644 --- a/drm/matrix_cubesphere.cpp +++ b/drm/matrix_cubesphere.cpp @@ -279,7 +279,7 @@ mat4x4 perspective(float low1, float high1, return m2 * m1; } -int _3d_cube_inner(int ix, mat4x4 trans, mat4x4 world_trans, mat3x3 normal_trans, vec4 light_pos) +int _3d_cube_inner(int ix, mat4x4 trans, mat4x4 world_trans, vec4 light_pos) { T0V(VAP_PVS_STATE_FLUSH_REG, 0x00000000); @@ -312,12 +312,6 @@ int _3d_cube_inner(int ix, mat4x4 trans, mat4x4 world_trans, mat3x3 normal_trans world_trans[3][0], world_trans[3][1], world_trans[3][2], world_trans[3][3], // 8 - normal_trans[0][0], normal_trans[0][1], normal_trans[0][2], 0, - normal_trans[1][0], normal_trans[1][1], normal_trans[1][2], 0, - normal_trans[2][0], normal_trans[2][1], normal_trans[2][2], 0, - 0, 0, 0, 0, - - // 12 light_pos.x, light_pos.y, light_pos.z, light_pos.w, }; const int consts_length = (sizeof (consts)) / (sizeof (consts[0])); @@ -373,9 +367,9 @@ int _3d_cube_inner(int ix, mat4x4 trans, mat4x4 world_trans, mat3x3 normal_trans ib[ix++].f32 = p.z; ib[ix++].f32 = t.x; ib[ix++].f32 = t.y; - ib[ix++].f32 = n.x;//n.x;//n.x; - ib[ix++].f32 = n.y;//0;//n.y;//n.y; - ib[ix++].f32 = n.z;//n.z; + ib[ix++].f32 = n.x; + ib[ix++].f32 = n.y; + ib[ix++].f32 = n.z; } } @@ -445,7 +439,7 @@ int _3d_cube(int ix, float theta) | RS_IP__TEX_PTR_Q(11) | RS_IP__OFFSET_EN(0) ); - T0V(RS_IP_2 + T0V(RS_IP_3 , RS_IP__TEX_PTR_S(12) | RS_IP__TEX_PTR_T(13) | RS_IP__TEX_PTR_R(14) @@ -616,41 +610,40 @@ int _3d_cube(int ix, float theta) 0.5f, 2.0f); vec4 light_pos = vec4(0, 0, 0, 1.0f); + // light if (1) { mat4x4 t = translate(vec3(0, 0, 3)); - mat4x4 t1 = translate(vec3(1, 1, 1)); + mat4x4 t1 = translate(vec3(1, 0, 0)); mat4x4 s = scale(0.1f); mat4x4 rz = rotate_y(theta * 2.f); mat4x4 world_trans = rz * t1 * s; - mat3x3 normal_trans = transpose(inverse(submatrix(world_trans, 3, 3))); + //mat3x3 normal_trans = transpose(inverse(submatrix(world_trans, 3, 3))); mat4x4 trans = aspect * p * t * world_trans; light_pos = world_trans * light_pos; - ix = _3d_cube_inner(ix, trans, world_trans, normal_trans, light_pos); + ix = _3d_cube_inner(ix, trans, world_trans, light_pos); } + // object if (1) { mat4x4 t = translate(vec3(0, 0, 3)); - mat4x4 rx = rotate_x(0 * theta1 * 0.5f); + mat4x4 rx = rotate_x(1 * theta1 * 0.5f); mat4x4 ry = rotate_y(0 * theta2 * 0.8f + 1.4f); - mat4x4 s = scale(0.7f); + mat4x4 s = scale(0.9f); mat4x4 world_trans = rx * ry * s; - mat3x3 normal_trans = transpose(inverse(submatrix(world_trans, 3, 3))); + //mat3x3 normal_trans = transpose(inverse(submatrix(world_trans, 3, 3))); mat4x4 trans = aspect * p * t * world_trans; - printf("light_pos % 2.03f % 2.03f % 2.03f % 2.03f\n", - light_pos.x, light_pos.y, light_pos.z, light_pos.w); - - ix = _3d_cube_inner(ix, trans, world_trans, normal_trans, light_pos); + ix = _3d_cube_inner(ix, trans, world_trans, light_pos); } return ix; diff --git a/drm/matrix_cubesphere.fs.asm b/drm/matrix_cubesphere.fs.asm index 7991f1a..7ed97b7 100644 --- a/drm/matrix_cubesphere.fs.asm +++ b/drm/matrix_cubesphere.fs.asm @@ -1,55 +1,50 @@ --- temp[0] -- texture +-- temp[0] -- position (world space) -- temp[1] -- normal --- temp[2] -- (world space) fragment position --- temp[3] -- (world space) light position +-- temp[2] -- light pos (world space) +-- temp[3] -- texture -- PIXSIZE 4 TEX TEX_SEM_WAIT TEX_SEM_ACQUIRE - temp[0].rgba = LD tex[0].rgba temp[0].rgaa ; + temp[3].rgba = LD tex[0].rgba temp[3].rgaa ; --- normalize: --- v * 1.0f / sqrt(dot(v, v)) - --- norm = normalize(Normal) +-- normal = normalize(normal) +-- normal = (1.0 / sqrt(dot(normal, normal))) * normal src0.rgb = temp[1] : - DP3 src0.rgb src0.rgb , - temp[1].a = DP ; -src0.a = temp[1] : - temp[1].a = RSQ src0.a ; -src0.rgb = temp[1] , + DP3 src0.rgb src0.rgb , + temp[1].a = DP ; src0.a = temp[1] : + temp[1].a = RSQ |src0.a| ; +src0.a = temp[1], src0.rgb = temp[1] : temp[1].rgb = MAD src0.rgb src0.aaa src0.000 ; --- temp[2] -- (world space) fragment position --- temp[3] -- (world space) light position --- lightDir = normalize(lightPos - fragPos) --- srcp.rgb = (src1.rgb - src0.rgb) -src1.rgb = temp[3] , -src0.rgb = temp[2] , -srcp.rgb = neg : +-- light_dir = normalize((f_light_pos - f_world_pos)) +src1.rgb = temp[2] , -- f_light_pos +src0.rgb = temp[0] , -- f_world_pos +srcp.rgb = neg : -- (f_light_pos - f_world_pos) DP3 srcp.rgb srcp.rgb , - temp[3].a = DP ; -src0.a = temp[3] : - temp[3].a = RSQ src0.a ; -src0.rgb = temp[3] , -src0.a = temp[3] : - temp[3].rgb = MAD src0.rgb src0.aaa src0.000 ; + temp[2].a = DP ; +src0.a = temp[2] : + temp[2].a = RSQ |src0.a| ; +src0.a = temp[2], src0.rgb = temp[2] : + temp[2].rgb = MAD src0.rgb src0.aaa src0.000 ; --- diff = dot(norm, lightDir) --- diff = dot(temp[1].rgb, temp[3].rgb) -src0.rgb = temp[1] , -src1.rgb = temp[3] : - temp[4].r = DP3 src0.rgb src1.rgb ; +-- dot(normal, light_dir) +src0.rgb = temp[2] , +src1.rgb = temp[1] : + DP3 src0.rgb src1.rgb , + temp[4].a = DP ; -src0.rgb = temp[4] : - temp[4].r = MAX src0.r00 src0.000 ; +src0.a = temp[4] : + temp[4].a = MAX src0.a src0.0 ; + +src0.a = temp[4] , +src1.a = float(32) : + temp[4].a = MAD src0.a src0.1 src1.a ; OUT TEX_SEM_WAIT -src0.a = temp[0], src0.rgb = temp[0] , -src1.rgb = temp[4] , -src2.rgb = temp[1] : - out[0].a = MAD src0.a src1.1 src1.0 , - out[0].rgb = MAD src0.rgb src1.rrr src1.000 ; - --out[0].rgb = MAD src2.rgb src2.100 src1.000 ; - --out[0].rgb = MAD src2.r00 src1.rrr src1.000 ; +src0.a = temp[3], +src0.rgb = temp[3] , +src1.a = temp[4] : + out[0].a = MAX src0.a src0.a , + out[0].rgb = MAD src0.rgb src1.aaa src2.000 ; diff --git a/drm/matrix_cubesphere.vs.asm b/drm/matrix_cubesphere.vs.asm index b03cd86..45d6fd4 100644 --- a/drm/matrix_cubesphere.vs.asm +++ b/drm/matrix_cubesphere.vs.asm @@ -28,13 +28,18 @@ temp[2].y = VE_DOT const[5].xyzw input[0].xyzw ; temp[2].z = VE_DOT const[6].xyzw input[0].xyzw ; temp[2].w = VE_DOT const[7].xyzw input[0].xyzw ; -temp[3].x = VE_DOT const[8].xyz0 input[2].xyz0 ; -temp[3].y = VE_DOT const[9].xyz0 input[2].xyz0 ; -temp[3].z = VE_DOT const[10].xyz0 input[2].xyz0 ; ---temp[3].xyzw = VE_MAX input[2].xyz0 input[2].xyz0 ; +-- normal world space +temp[3].x = VE_DOT const[4].xyz0 input[2].xyz0 ; +temp[3].y = VE_DOT const[5].xyz0 input[2].xyz0 ; +temp[3].z = VE_DOT const[6].xyz0 input[2].xyz0 ; -out[0].xyzw = VE_MAX temp[1].xyzw temp[1].xyzw ; -- position clip space -out[1].xyzw = VE_MAX input[1].xyzw input[1].xyzw ; -- texture -out[2].xyzw = VE_MAX temp[3].xyz0 temp[3].xyz0 ; -- normal -out[3].xyzw = VE_MAX temp[2].xyzw temp[2].xyzw ; -- position world space -out[4].xyzw = VE_MAX const[12].xyzw const[12].xyzw ; -- light position world space +-- position (clip space) +out[0].xyzw = VE_ADD temp[1].xyzw const[0].0000 ; +-- position (world space) +out[1].xyzw = VE_ADD temp[2].xyzw const[0].0000 ; +-- normal +out[2].xyzw = VE_ADD temp[3].xyz0 const[0].0000 ; +-- light pos (world space) +out[3].xyzw = VE_ADD const[8].xyzw const[8].0000 ; +-- texture +out[4].xyzw = VE_ADD input[1].xy00 const[0].0000 ; diff --git a/drm/texture_cube_warping.vs.asm b/drm/texture_cube_warping.vs.asm index f949ef0..db90c82 100644 --- a/drm/texture_cube_warping.vs.asm +++ b/drm/texture_cube_warping.vs.asm @@ -1,3 +1,3 @@ temp[0].x = VE_MUL input[0].z___ input[0].1___ ; out[1].xy = VE_ADD input[1].xy__ input[1].0000 ; -out[0].xyzw = VE_ADD temp[0].-0-0-x-0 input[0].xy01 ; +out[0].xyzw = VE_ADD temp[0].-0-0-x-x input[0].xy00 ; diff --git a/regs/assembler/vs/validator.py b/regs/assembler/vs/validator.py index 949a02d..dd84fa7 100644 --- a/regs/assembler/vs/validator.py +++ b/regs/assembler/vs/validator.py @@ -232,13 +232,13 @@ def validate_source_address_counts(sources_ast, sources, opcode): input_count = len(addresses_by_type(sources, SourceType.input)) if input_count > 1: - source_ix = source_with_type_reversed(sources, SourceType.input) + source_ix = source_ix_with_type_reversed(sources, SourceType.input) raise ValidatorError(f"too many input addresses in operation(s); expected 1, have {input_count}", sources_ast[source_ix].offset_identifier) alt_temporary_count = len(addresses_by_type(sources, SourceType.alt_temporary)) if alt_temporary_count > 1: - source_ix = source_with_type_reversed(sources, SourceType.alt_temporary) + source_ix = source_ix_with_type_reversed(sources, SourceType.alt_temporary) raise ValidatorError(f"too many alt temporary addresses in operation(s); expected 1, have {alt_temporary_count}", sources_ast[source_ix].offset_identifier)