Compare commits

...

2 Commits

6 changed files with 219 additions and 33 deletions

View File

@ -1,7 +1,7 @@
OPT = -O0
CFLAGS += -g
CFLAGS += -Wall -Werror -Wfatal-errors -Wno-error=unused-variable -Wno-narrowing
CFLAGS += -Wall -Werror -Wfatal-errors -Wno-error=unused-variable -Wno-error=unused-but-set-variable -Wno-narrowing
LDFLAGS += $(shell pkg-config --cflags --libs libdrm) -lm

View File

@ -77,14 +77,14 @@ static void * read_file(const char * filename)
}
static const uint32_t fragment_shader[] = {
#include "texture_cube.fs.inc"
#include "matrix_cubesphere.fs.inc"
#include "clear.fs.inc"
};
static const int fragment_shader_length = (sizeof (fragment_shader)) / (sizeof (fragment_shader[0]));
static const int fragment_shader_instructions = (fragment_shader_length / 6) - 1;
static const uint32_t vertex_shader[] = {
#include "matrix.vs.inc"
#include "matrix_cubesphere.vs.inc"
#include "clear_nop.vs.inc"
};
static const int vertex_shader_length = (sizeof (vertex_shader)) / (sizeof (vertex_shader[0]));
@ -143,14 +143,20 @@ int _3d_clear(int ix)
| RS_IP__COL_FMT(6) // Zero components (0,0,0,1)
| RS_IP__OFFSET_EN(0)
);
T0V(RS_IP_1, 0);
T0V(RS_IP_2, 0);
T0V(RS_COUNT
, RS_COUNT__IT_COUNT(0)
| RS_COUNT__IC_COUNT(1)
| RS_COUNT__W_ADDR(0)
| RS_COUNT__HIRES_EN(1)
);
T0V(RS_INST_COUNT, 0x00000000);
T0V(RS_INST_0, 0x00000000);
T0V(RS_INST_COUNT
, RS_INST_COUNT__INST_COUNT(0));
T0V(RS_INST_0, 0);
T0V(RS_INST_1, 0);
T0V(RS_INST_2, 0);
T0V(RS_INST_3, 0);
//////////////////////////////////////////////////////////////////////////////
// TX
@ -273,12 +279,12 @@ mat4x4 perspective(float low1, float high1,
return m2 * m1;
}
int _3d_cube_inner(int ix, mat4x4 trans)
int _3d_cube_inner(int ix, mat4x4 trans, mat4x4 world_trans, vec4 light_pos)
{
T0V(VAP_PVS_STATE_FLUSH_REG, 0x00000000);
T0V(VAP_VTX_SIZE
, VAP_VTX_SIZE__DWORDS_PER_VTX(5)
, VAP_VTX_SIZE__DWORDS_PER_VTX(8)
);
T0V(VAP_VF_MAX_VTX_INDX
@ -293,10 +299,20 @@ int _3d_cube_inner(int ix, mat4x4 trans)
//////////////////////////////////////////////////////////////////////////////
const float consts[] = {
// 0
trans[0][0], trans[0][1], trans[0][2], trans[0][3],
trans[1][0], trans[1][1], trans[1][2], trans[1][3],
trans[2][0], trans[2][1], trans[2][2], trans[2][3],
trans[3][0], trans[3][1], trans[3][2], trans[3][3],
// 4
world_trans[0][0], world_trans[0][1], world_trans[0][2], world_trans[0][3],
world_trans[1][0], world_trans[1][1], world_trans[1][2], world_trans[1][3],
world_trans[2][0], world_trans[2][1], world_trans[2][2], world_trans[2][3],
world_trans[3][0], world_trans[3][1], world_trans[3][2], world_trans[3][3],
// 8
light_pos.x, light_pos.y, light_pos.z, light_pos.w,
};
const int consts_length = (sizeof (consts)) / (sizeof (consts[0]));
assert(consts_length % 4 == 0);
@ -327,7 +343,7 @@ int _3d_cube_inner(int ix, mat4x4 trans)
// 3D_DRAW
//////////////////////////////////////////////////////////////////////////////
T3(_3D_DRAW_IMMD_2, (1 + vertex_count * 5) - 1);
T3(_3D_DRAW_IMMD_2, (1 + vertex_count * 8) - 1);
ib[ix++].u32
= VAP_VF_CNTL__PRIM_TYPE(4)
| VAP_VF_CNTL__PRIM_WALK(3)
@ -344,12 +360,16 @@ int _3d_cube_inner(int ix, mat4x4 trans)
for (int j = 0; j < 3; j++) {
vec3 p = model->position[obj->triangle[i][j].position];
vec2 t = model->texture[obj->triangle[i][j].texture];
vec3 n = model->normal[obj->triangle[i][j].normal];
ib[ix++].f32 = p.x;
ib[ix++].f32 = p.y;
ib[ix++].f32 = p.z;
ib[ix++].f32 = t.x;
ib[ix++].f32 = t.y;
ib[ix++].f32 = n.x;
ib[ix++].f32 = n.y;
ib[ix++].f32 = n.z;
}
}
@ -392,27 +412,63 @@ int _3d_cube(int ix, float theta)
// RS
//////////////////////////////////////////////////////////////////////////////
T0V(RS_COUNT
, RS_COUNT__IT_COUNT(16)
| RS_COUNT__IC_COUNT(0)
| RS_COUNT__W_ADDR(0)
| RS_COUNT__HIRES_EN(1)
);
T0V(RS_IP_0
, RS_IP__TEX_PTR_S(0)
| RS_IP__TEX_PTR_T(1)
| RS_IP__TEX_PTR_R(2)
| RS_IP__TEX_PTR_Q(3)
| RS_IP__COL_PTR(0)
| RS_IP__COL_FMT(0)
| RS_IP__OFFSET_EN(0)
);
T0V(RS_COUNT
, RS_COUNT__IT_COUNT(4)
| RS_COUNT__IC_COUNT(0)
| RS_COUNT__W_ADDR(0)
| RS_COUNT__HIRES_EN(1)
T0V(RS_IP_1
, RS_IP__TEX_PTR_S(4)
| RS_IP__TEX_PTR_T(5)
| RS_IP__TEX_PTR_R(6)
| RS_IP__TEX_PTR_Q(7)
| RS_IP__OFFSET_EN(0)
);
T0V(RS_INST_COUNT, 0x00000000);
T0V(RS_IP_2
, RS_IP__TEX_PTR_S(8)
| RS_IP__TEX_PTR_T(9)
| RS_IP__TEX_PTR_R(10)
| RS_IP__TEX_PTR_Q(11)
| RS_IP__OFFSET_EN(0)
);
T0V(RS_IP_3
, RS_IP__TEX_PTR_S(12)
| RS_IP__TEX_PTR_T(13)
| RS_IP__TEX_PTR_R(14)
| RS_IP__TEX_PTR_Q(15)
| RS_IP__OFFSET_EN(0)
);
T0V(RS_INST_COUNT
, RS_INST_COUNT__INST_COUNT(3));
T0V(RS_INST_0
, RS_INST__TEX_ID(0)
| RS_INST__TEX_CN(1)
| RS_INST__TEX_ADDR(0)
);
T0V(RS_INST_1
, RS_INST__TEX_ID(1)
| RS_INST__TEX_CN(1)
| RS_INST__TEX_ADDR(1)
);
T0V(RS_INST_2
, RS_INST__TEX_ID(2)
| RS_INST__TEX_CN(1)
| RS_INST__TEX_ADDR(2)
);
T0V(RS_INST_3
, RS_INST__TEX_ID(3)
| RS_INST__TEX_CN(1)
| RS_INST__TEX_ADDR(3)
);
//////////////////////////////////////////////////////////////////////////////
// TX
@ -485,7 +541,7 @@ int _3d_cube(int ix, float theta)
| VAP_PROG_STREAM_CNTL__DATA_TYPE_1__FLOAT_2
| VAP_PROG_STREAM_CNTL__SKIP_DWORDS_1(0)
| VAP_PROG_STREAM_CNTL__DST_VEC_LOC_1(1)
| VAP_PROG_STREAM_CNTL__LAST_VEC_1(1)
| VAP_PROG_STREAM_CNTL__LAST_VEC_1(0)
);
T0V(VAP_PROG_STREAM_CNTL_EXT_0
, VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_X_0__SELECT_X
@ -500,12 +556,29 @@ int _3d_cube(int ix, float theta)
| VAP_PROG_STREAM_CNTL_EXT__WRITE_ENA_1(0b1111) // XYZW
);
T0V(VAP_PROG_STREAM_CNTL_1
, VAP_PROG_STREAM_CNTL__DATA_TYPE_0__FLOAT_3
| VAP_PROG_STREAM_CNTL__SKIP_DWORDS_0(0)
| VAP_PROG_STREAM_CNTL__DST_VEC_LOC_0(2)
| VAP_PROG_STREAM_CNTL__LAST_VEC_0(1)
);
T0V(VAP_PROG_STREAM_CNTL_EXT_1
, VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_X_0__SELECT_X
| VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_Y_0__SELECT_Y
| VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_Z_0__SELECT_Z
| VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_W_0__SELECT_FP_ONE
| VAP_PROG_STREAM_CNTL_EXT__WRITE_ENA_0(0b1111) // XYZW
);
T0V(VAP_INDEX_OFFSET, 0x00000000);
T0V(VAP_OUT_VTX_FMT_0
, VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1));
T0V(VAP_OUT_VTX_FMT_1
, VAP_OUT_VTX_FMT_1__TEX_0_COMP_CNT(4));
, VAP_OUT_VTX_FMT_1__TEX_0_COMP_CNT(4)
| VAP_OUT_VTX_FMT_1__TEX_1_COMP_CNT(4)
| VAP_OUT_VTX_FMT_1__TEX_2_COMP_CNT(4)
| VAP_OUT_VTX_FMT_1__TEX_3_COMP_CNT(4));
//////////////////////////////////////////////////////////////////////////////
// GA_US
@ -536,23 +609,41 @@ int _3d_cube(int ix, float theta)
0.001f, 0.999f,
0.5f, 2.0f);
if (1) {
mat4x4 t = translate(vec3(0, 0, 3));
mat4x4 rx = rotate_x(theta1);
mat4x4 ry = rotate_y(theta2 * 2.0f);
mat4x4 s = scale(0.7f);
mat4x4 trans = aspect * p * t * rx * ry * s;
vec4 light_pos = vec4(0, 0, 0, 1.0f);
ix = _3d_cube_inner(ix, trans);
}
// light
if (1) {
mat4x4 t = translate(vec3(0, 0, 3));
mat4x4 t1 = translate(vec3(1, 1, 1));
mat4x4 t1 = translate(vec3(1, 0, 0));
mat4x4 s = scale(0.1f);
mat4x4 rz = rotate_y(theta * 2.f);
mat4x4 trans = aspect * p * t * rz * t1 * s;
ix = _3d_cube_inner(ix, trans);
mat4x4 world_trans = rz * t1 * s;
//mat3x3 normal_trans = transpose(inverse(submatrix(world_trans, 3, 3)));
mat4x4 trans = aspect * p * t * world_trans;
light_pos = world_trans * light_pos;
ix = _3d_cube_inner(ix, trans, world_trans, light_pos);
}
// object
if (1) {
mat4x4 t = translate(vec3(0, 0, 3));
mat4x4 rx = rotate_x(1 * theta1 * 0.5f);
mat4x4 ry = rotate_y(0 * theta2 * 0.8f + 1.4f);
mat4x4 s = scale(0.9f);
mat4x4 world_trans = rx * ry * s;
//mat3x3 normal_trans = transpose(inverse(submatrix(world_trans, 3, 3)));
mat4x4 trans = aspect * p * t * world_trans;
ix = _3d_cube_inner(ix, trans, world_trans, light_pos);
}
return ix;
@ -776,7 +867,7 @@ int indirect_buffer(float theta)
, US_CONFIG__ZERO_TIMES_ANYTHING_EQUALS_ZERO(1)
);
T0V(US_PIXSIZE
, US_PIXSIZE__PIX_SIZE(1)
, US_PIXSIZE__PIX_SIZE(4)
);
T0V(US_FC_CTRL, 0);

View File

@ -0,0 +1,50 @@
-- temp[0] -- position (world space)
-- temp[1] -- normal
-- temp[2] -- light pos (world space)
-- temp[3] -- texture
-- PIXSIZE 4
TEX TEX_SEM_WAIT TEX_SEM_ACQUIRE
temp[3].rgba = LD tex[0].rgba temp[3].rgaa ;
-- normal = normalize(normal)
-- normal = (1.0 / sqrt(dot(normal, normal))) * normal
src0.rgb = temp[1] :
DP3 src0.rgb src0.rgb ,
temp[1].a = DP ;
src0.a = temp[1] :
temp[1].a = RSQ |src0.a| ;
src0.a = temp[1], src0.rgb = temp[1] :
temp[1].rgb = MAD src0.rgb src0.aaa src0.000 ;
-- light_dir = normalize((f_light_pos - f_world_pos))
src1.rgb = temp[2] , -- f_light_pos
src0.rgb = temp[0] , -- f_world_pos
srcp.rgb = neg : -- (f_light_pos - f_world_pos)
DP3 srcp.rgb srcp.rgb ,
temp[2].a = DP ;
src0.a = temp[2] :
temp[2].a = RSQ |src0.a| ;
src0.a = temp[2], src0.rgb = temp[2] :
temp[2].rgb = MAD src0.rgb src0.aaa src0.000 ;
-- dot(normal, light_dir)
src0.rgb = temp[2] ,
src1.rgb = temp[1] :
DP3 src0.rgb src1.rgb ,
temp[4].a = DP ;
src0.a = temp[4] :
temp[4].a = MAX src0.a src0.0 ;
src0.a = temp[4] ,
src1.a = float(32) :
temp[4].a = MAD src0.a src0.1 src1.a ;
OUT TEX_SEM_WAIT
src0.a = temp[3],
src0.rgb = temp[3] ,
src1.a = temp[4] :
out[0].a = MAX src0.a src0.a ,
out[0].rgb = MAD src0.rgb src1.aaa src2.000 ;

View File

@ -0,0 +1,45 @@
--
-- dot(m[0], v), dot(m[1], v), dot(m[2], v), dot(m[3], v)
--
-- input[0] -- position
-- input[1] -- texture
-- input[2] -- normal
-- consts[0] -- trans
-- consts[4] -- world_trans
-- consts[8] -- normal_trans
-- out[0] -- position clip space
-- out[1] -- texture
-- out[2] -- normal
-- out[3] -- object position world space
-- out[4] -- light position world space
-- position clip space
temp[1].x = VE_DOT const[0].xyzw input[0].xyzw ;
temp[1].y = VE_DOT const[1].xyzw input[0].xyzw ;
temp[1].z = VE_DOT const[2].xyzw input[0].xyzw ;
temp[1].w = VE_DOT const[3].xyzw input[0].xyzw ;
-- position world space
temp[2].x = VE_DOT const[4].xyzw input[0].xyzw ;
temp[2].y = VE_DOT const[5].xyzw input[0].xyzw ;
temp[2].z = VE_DOT const[6].xyzw input[0].xyzw ;
temp[2].w = VE_DOT const[7].xyzw input[0].xyzw ;
-- normal world space
temp[3].x = VE_DOT const[4].xyz0 input[2].xyz0 ;
temp[3].y = VE_DOT const[5].xyz0 input[2].xyz0 ;
temp[3].z = VE_DOT const[6].xyz0 input[2].xyz0 ;
-- position (clip space)
out[0].xyzw = VE_ADD temp[1].xyzw const[0].0000 ;
-- position (world space)
out[1].xyzw = VE_ADD temp[2].xyzw const[0].0000 ;
-- normal
out[2].xyzw = VE_ADD temp[3].xyz0 const[0].0000 ;
-- light pos (world space)
out[3].xyzw = VE_ADD const[8].xyzw const[8].0000 ;
-- texture
out[4].xyzw = VE_ADD input[1].xy00 const[0].0000 ;

View File

@ -1,3 +1,3 @@
temp[0].x = VE_MUL input[0].z___ input[0].1___ ;
out[1].xy = VE_ADD input[1].xy__ input[1].0000 ;
out[0].xyzw = VE_ADD temp[0].-0-0-x-0 input[0].xy01 ;
out[0].xyzw = VE_ADD temp[0].-0-0-x-x input[0].xy00 ;

View File

@ -232,13 +232,13 @@ def validate_source_address_counts(sources_ast, sources, opcode):
input_count = len(addresses_by_type(sources, SourceType.input))
if input_count > 1:
source_ix = source_with_type_reversed(sources, SourceType.input)
source_ix = source_ix_with_type_reversed(sources, SourceType.input)
raise ValidatorError(f"too many input addresses in operation(s); expected 1, have {input_count}",
sources_ast[source_ix].offset_identifier)
alt_temporary_count = len(addresses_by_type(sources, SourceType.alt_temporary))
if alt_temporary_count > 1:
source_ix = source_with_type_reversed(sources, SourceType.alt_temporary)
source_ix = source_ix_with_type_reversed(sources, SourceType.alt_temporary)
raise ValidatorError(f"too many alt temporary addresses in operation(s); expected 1, have {alt_temporary_count}",
sources_ast[source_ix].offset_identifier)