particle_oriented_animated_quad_vbuf_pixel_shader: use vertex_buffer_copy shader

This commit is contained in:
Zack Buhman 2025-11-09 21:33:35 -06:00
parent e622d769a4
commit 314267afe1
7 changed files with 285 additions and 11 deletions

View File

@ -32,6 +32,7 @@
#define PARTICLE_SHADER 2 #define PARTICLE_SHADER 2
#define TEXTURE_TILE_SHADER 3 #define TEXTURE_TILE_SHADER 3
#define PARTICLE_PHYSICS_SHADER 4 #define PARTICLE_PHYSICS_SHADER 4
#define VERTEX_BUFFER_COPY_SHADER 5
#define PARTICLE_POSITION_RELOC_INDEX 5 #define PARTICLE_POSITION_RELOC_INDEX 5
@ -41,6 +42,7 @@ const char * vertex_shader_paths[] = {
"particle_particle_animated_quad_vbuf.vs.bin", "particle_particle_animated_quad_vbuf.vs.bin",
"texture_tile.vs.bin", "texture_tile.vs.bin",
"particle_physics.vs.bin", "particle_physics.vs.bin",
"vertex_buffer_copy.vs.bin",
}; };
const int vertex_shader_paths_length = (sizeof (vertex_shader_paths)) / (sizeof (vertex_shader_paths[0])); const int vertex_shader_paths_length = (sizeof (vertex_shader_paths)) / (sizeof (vertex_shader_paths[0]));
const char * fragment_shader_paths[] = { const char * fragment_shader_paths[] = {
@ -49,6 +51,7 @@ const char * fragment_shader_paths[] = {
"particle_particle.fs.bin", "particle_particle.fs.bin",
"texture_tile.fs.bin", "texture_tile.fs.bin",
"particle_physics.fs.bin", "particle_physics.fs.bin",
"vertex_buffer_copy.fs.bin",
}; };
const int fragment_shader_paths_length = (sizeof (fragment_shader_paths)) / (sizeof (fragment_shader_paths[0])); const int fragment_shader_paths_length = (sizeof (fragment_shader_paths)) / (sizeof (fragment_shader_paths[0]));
@ -293,8 +296,8 @@ void _3d_particle_inner(int particles_length, int position_offset)
| VAP_VTX_NUM_ARRAYS__VC_FORCE_PREFETCH(1) | VAP_VTX_NUM_ARRAYS__VC_FORCE_PREFETCH(1)
); );
TU( // VAP_VTX_AOS_ATTR01 TU( // VAP_VTX_AOS_ATTR01
VAP_VTX_AOS_ATTR__VTX_AOS_COUNT0(3) VAP_VTX_AOS_ATTR__VTX_AOS_COUNT0(4)
| VAP_VTX_AOS_ATTR__VTX_AOS_STRIDE0(3) | VAP_VTX_AOS_ATTR__VTX_AOS_STRIDE0(4)
| VAP_VTX_AOS_ATTR__VTX_AOS_COUNT1(2) | VAP_VTX_AOS_ATTR__VTX_AOS_COUNT1(2)
| VAP_VTX_AOS_ATTR__VTX_AOS_STRIDE1(2) | VAP_VTX_AOS_ATTR__VTX_AOS_STRIDE1(2)
); );
@ -468,7 +471,7 @@ void _3d_particle(const shaders& shaders,
macrotile, microtile, macrotile, microtile,
clamp); clamp);
ib_vap_stream_cntl__32(); ib_vap_stream_cntl__42();
// shaders // shaders
T0V(US_PIXSIZE T0V(US_PIXSIZE
@ -543,6 +546,7 @@ void _3d_particle(const shaders& shaders,
ib_vap_pvs_const_cntl(consts, (sizeof (consts))); ib_vap_pvs_const_cntl(consts, (sizeof (consts)));
int offset = state.length * 4 * 2; int offset = state.length * 4 * 2;
/*
int ix = 0; int ix = 0;
particle_position * pos = state.position_output(); particle_position * pos = state.position_output();
for (int i = 0; i < state.length; i++) { for (int i = 0; i < state.length; i++) {
@ -554,17 +558,197 @@ void _3d_particle(const shaders& shaders,
ix++; ix++;
vertexbuffer_ptr[offset + ix] = position.z; vertexbuffer_ptr[offset + ix] = position.z;
ix++; ix++;
vertexbuffer_ptr[offset + ix] = 1; // W
ix++;
}; };
} }
asm volatile ("" ::: "memory"); asm volatile ("" ::: "memory");
*/
_3d_particle_inner(state.length, offset); _3d_particle_inner(state.length, offset);
} }
void _copy_to_vertexbuffer(const shaders& shaders, void _copy_to_vertexbuffer(const shaders& shaders,
const floatbuffer_state& state) const floatbuffer_state& state,
int floatbuffer_width,
int floatbuffer_height)
{ {
assert(floatbuffer_width <= 1024);
int viewport_width = floatbuffer_width * 4;
int viewport_height = floatbuffer_height;
int texture_width = floatbuffer_width;
int texture_height = floatbuffer_height;
int macrotile = 0;
int microtile = 0;
T0V(SC_SCISSOR0
, SC_SCISSOR0__XS0(0)
| SC_SCISSOR0__YS0(0)
);
T0V(SC_SCISSOR1
, SC_SCISSOR1__XS1(viewport_width - 1)
| SC_SCISSOR1__YS1(viewport_height - 1)
);
T0Vf(VAP_VPORT_XSCALE, (float)viewport_width);
T0Vf(VAP_VPORT_YSCALE, (float)viewport_height);
int colorformat = 7; // ARGB32323232
int offset = state.length * 4 * 2 * (sizeof (float));
ib_colorbuffer3(0,
VERTEXBUFFER_RELOC_INDEX,
offset,
viewport_width,
macrotile,
microtile,
colorformat);
T0V(US_OUT_FMT_0
, US_OUT_FMT__OUT_FMT(21) // C4_32_FP
| US_OUT_FMT__C0_SEL__RED
| US_OUT_FMT__C1_SEL__GREEN
| US_OUT_FMT__C2_SEL__BLUE
| US_OUT_FMT__C3_SEL__ALPHA
| US_OUT_FMT__OUT_SIGN(0)
);
T0V(US_OUT_FMT_1
, US_OUT_FMT__OUT_FMT(15) // render target is not used
);
T0V(US_OUT_FMT_2
, US_OUT_FMT__OUT_FMT(15) // render target is not used
);
T0V(US_OUT_FMT_3
, US_OUT_FMT__OUT_FMT(15) // render target is not used
);
// shaders
//load_pvs_shaders(shaders.vertex, shaders.vertex_length);
//load_us_shaders(shaders.fragment, shaders.fragment_length);
// GA
T0V(GB_ENABLE
, 0
);
//////////////////////////////////////////////////////////////////////////////
// RS
//////////////////////////////////////////////////////////////////////////////
ib_rs_instructions(1);
//////////////////////////////////////////////////////////////////////////////
// VAP OUT
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_OUT_VTX_FMT_0
, VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1)
);
T0V(VAP_OUT_VTX_FMT_1
, VAP_OUT_VTX_FMT_1__TEX_0_COMP_CNT(4)
);
//
T0V(ZB_CNTL, 0);
T0V(ZB_ZSTENCILCNTL, 0);
//
//////////////////////////////////////////////////////////////////////////////
// TX
//////////////////////////////////////////////////////////////////////////////
T0V(TX_INVALTAGS, 0x00000000);
T0V(TX_ENABLE
, TX_ENABLE__TEX_0_ENABLE__ENABLE
);
int clamp = 2; // clamp to [0.0, 1.0]
int txformat = 29; // TX_FMT_32F_32F_32F_32F
ib_texture2(0,
PARTICLE_POSITION_RELOC_INDEX,
texture_width, texture_height,
macrotile, microtile,
clamp,
txformat);
// shaders
ib_vap_stream_cntl__2();
T0V(US_PIXSIZE
, US_PIXSIZE__PIX_SIZE(2)
);
ib_ga_us(&shaders.fragment[VERTEX_BUFFER_COPY_SHADER]);
ib_vap_pvs(&shaders.vertex[VERTEX_BUFFER_COPY_SHADER]);
//////////////////////////////////////////////////////////////////////////////
// VAP
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_CLIP_CNTL
, VAP_CLIP_CNTL__CLIP_DISABLE(1)
);
T0V(VAP_VTE_CNTL
, VAP_VTE_CNTL__VPORT_X_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_Y_SCALE_ENA(1)
| VAP_VTE_CNTL__VTX_XY_FMT(1) // disable W division
| VAP_VTE_CNTL__VTX_Z_FMT(1) // disable W division
);
T0V(VAP_CNTL_STATUS
, VAP_CNTL_STATUS__PVS_BYPASS(0)
);
//////////////////////////////////////////////////////////////////////////////
// 3D_DRAW
//////////////////////////////////////////////////////////////////////////////
const int dwords_per_vtx = 2;
T0V(VAP_VTX_SIZE
, VAP_VTX_SIZE__DWORDS_PER_VTX(dwords_per_vtx)
);
const float vertices[] = {
0.0f, 0.0f,
1.0f, 0.0f,
1.0f, 1.0f,
0.0f, 1.0f,
};
const int vertex_count = 4;
T3(_3D_DRAW_IMMD_2, (1 + vertex_count * dwords_per_vtx) - 1);
TU( VAP_VF_CNTL__PRIM_TYPE(13) // quad list
| VAP_VF_CNTL__PRIM_WALK(3)
| VAP_VF_CNTL__INDEX_SIZE(0)
| VAP_VF_CNTL__VTX_REUSE_DIS(0)
| VAP_VF_CNTL__DUAL_INDEX_MODE(0)
| VAP_VF_CNTL__USE_ALT_NUM_VERTS(0)
| VAP_VF_CNTL__NUM_VERTICES(vertex_count)
);
for (int i = 0; i < vertex_count * 2; i++) {
TF(vertices[i]);
}
//
T0V(RB3D_DSTCACHE_CTLSTAT
, RB3D_DSTCACHE_CTLSTAT__DC_FLUSH(0x2) // Flush dirty 3D data
| RB3D_DSTCACHE_CTLSTAT__DC_FREE(0x2) // Free 3D tags
);
T0V(ZB_ZCACHE_CTLSTAT
, ZB_ZCACHE_CTLSTAT__ZC_FLUSH(1)
| ZB_ZCACHE_CTLSTAT__ZC_FREE(1)
);
T0V(WAIT_UNTIL, 0x00020000);
} }
int indirect_buffer(const shaders& shaders, int indirect_buffer(const shaders& shaders,
@ -572,7 +756,9 @@ int indirect_buffer(const shaders& shaders,
//const particle * particles, //const particle * particles,
//const int particles_length, //const int particles_length,
float theta, float theta,
float * vertexbuffer_ptr) float * vertexbuffer_ptr,
int floatbuffer_width,
int floatbuffer_height)
{ {
int width = 1600; int width = 1600;
int height = 1200; int height = 1200;
@ -582,7 +768,10 @@ int indirect_buffer(const shaders& shaders,
ib_generic_initialization(); ib_generic_initialization();
_copy_to_vertexbuffer(shaders, state); _copy_to_vertexbuffer(shaders,
state,
floatbuffer_width,
floatbuffer_height);
T0V(RB3D_BLENDCNTL, 0); T0V(RB3D_BLENDCNTL, 0);
T0V(RB3D_ABLENDCNTL, 0); T0V(RB3D_ABLENDCNTL, 0);
@ -610,8 +799,8 @@ int indirect_buffer(const shaders& shaders,
, US_OUT_FMT__OUT_FMT(15) // render target is not used , US_OUT_FMT__OUT_FMT(15) // render target is not used
); );
load_pvs_shaders(shaders.vertex, shaders.vertex_length); //load_pvs_shaders(shaders.vertex, shaders.vertex_length);
load_us_shaders(shaders.fragment, shaders.fragment_length); //load_us_shaders(shaders.fragment, shaders.fragment_length);
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
// DRAW // DRAW
@ -707,7 +896,7 @@ int init_particles_vertexbuffer(int fd, int particles_length, float ** ptr_out)
const int vertex_count = 4; const int vertex_count = 4;
const int size = particles_length * vertex_count * 2 * (sizeof (float)) const int size = particles_length * vertex_count * 2 * (sizeof (float))
+ particles_length * vertex_count * 3 * (sizeof (float)); + particles_length * vertex_count * 4 * (sizeof (float));
void * ptr; void * ptr;
int handle = create_buffer(fd, size, &ptr); int handle = create_buffer(fd, size, &ptr);
@ -930,7 +1119,8 @@ int _floatbuffer(const shaders& shaders,
T0V(TX_ENABLE T0V(TX_ENABLE
, TX_ENABLE__TEX_0_ENABLE__ENABLE , TX_ENABLE__TEX_0_ENABLE__ENABLE
| TX_ENABLE__TEX_1_ENABLE__ENABLE); | TX_ENABLE__TEX_1_ENABLE__ENABLE
);
int clamp = 2; // clamp to [0.0, 1.0] int clamp = 2; // clamp to [0.0, 1.0]
int txformat = 29; // TX_FMT_32F_32F_32F_32F int txformat = 29; // TX_FMT_32F_32F_32F_32F
@ -1151,7 +1341,9 @@ int main()
int ib_dwords = indirect_buffer(shaders, int ib_dwords = indirect_buffer(shaders,
state, state,
theta, theta,
vertexbuffer_ptr); vertexbuffer_ptr,
floatbuffer_width,
floatbuffer_height);
assert(textures_length == 2); assert(textures_length == 2);
int particle_position_handle = state.handles[fb_output + 0]; int particle_position_handle = state.handles[fb_output + 0];

View File

@ -330,6 +330,39 @@ void ib_colorbuffer2(int buffer_index,
TU(reloc_index * 4); // index into relocs array TU(reloc_index * 4); // index into relocs array
} }
void ib_colorbuffer3(int buffer_index,
int reloc_index,
int offset,
int pitch,
int macrotile, int microtile,
int colorformat)
{
assert(buffer_index >= 0 && buffer_index <= 3);
int reg_offset = buffer_index * 4;
//////////////////////////////////////////////////////////////////////////////
// CB
//////////////////////////////////////////////////////////////////////////////
T0V(RB3D_COLOROFFSET0 + reg_offset
, offset // value replaced by kernel from relocs
);
T3(_NOP, 0);
TU(reloc_index * 4); // index into relocs array
T0V(RB3D_COLORPITCH0 + reg_offset
, RB3D_COLORPITCH__COLORPITCH(pitch >> 1)
| RB3D_COLORPITCH__COLORTILE(macrotile)
| RB3D_COLORPITCH__COLORMICROTILE(microtile)
| RB3D_COLORPITCH__COLORFORMAT(colorformat)
);
// The COLORPITCH NOP is ignored/not applied due to
// RADEON_CS_KEEP_TILING_FLAGS, but is still required.
T3(_NOP, 0);
TU(reloc_index * 4); // index into relocs array
}
void ib_viewport(int width, int height) void ib_viewport(int width, int height)
{ {
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
@ -869,6 +902,36 @@ void ib_vap_stream_cntl__32()
); );
} }
void ib_vap_stream_cntl__42()
{
//////////////////////////////////////////////////////////////////////////////
// VAP_PROG_STREAM_CNTL
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_PROG_STREAM_CNTL_0
, VAP_PROG_STREAM_CNTL__DATA_TYPE_0__FLOAT_4
| VAP_PROG_STREAM_CNTL__SKIP_DWORDS_0(0)
| VAP_PROG_STREAM_CNTL__DST_VEC_LOC_0(0)
| VAP_PROG_STREAM_CNTL__LAST_VEC_0(0)
| VAP_PROG_STREAM_CNTL__DATA_TYPE_1__FLOAT_2
| VAP_PROG_STREAM_CNTL__SKIP_DWORDS_1(0)
| VAP_PROG_STREAM_CNTL__DST_VEC_LOC_1(1)
| VAP_PROG_STREAM_CNTL__LAST_VEC_1(1)
);
T0V(VAP_PROG_STREAM_CNTL_EXT_0
, VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_X_0__SELECT_X
| VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_Y_0__SELECT_Y
| VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_Z_0__SELECT_Z
| VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_W_0__SELECT_W
| VAP_PROG_STREAM_CNTL_EXT__WRITE_ENA_0(0b1111) // XYZW
| VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_X_1__SELECT_X
| VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_Y_1__SELECT_Y
| VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_Z_1__SELECT_FP_ZERO
| VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_W_1__SELECT_FP_ONE
| VAP_PROG_STREAM_CNTL_EXT__WRITE_ENA_1(0b1111) // XYZW
);
}
void ib_vap_stream_cntl__323() void ib_vap_stream_cntl__323()
{ {
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////

View File

@ -62,6 +62,12 @@ void ib_colorbuffer2(int buffer_index,
int pitch, int pitch,
int macrotile, int microtile, int macrotile, int microtile,
int colorformat); int colorformat);
void ib_colorbuffer3(int buffer_index,
int reloc_index,
int offset,
int pitch,
int macrotile, int microtile,
int colorformat);
void ib_zbuffer(int reloc_index, int pitch, int zfunc); void ib_zbuffer(int reloc_index, int pitch, int zfunc);
void ib_rs_instructions(int count); void ib_rs_instructions(int count);
void ib_texture__0(); void ib_texture__0();
@ -87,6 +93,7 @@ void ib_ga_consts(const float * consts, int consts_length, int index);
void ib_vap_stream_cntl__2(); void ib_vap_stream_cntl__2();
void ib_vap_stream_cntl__3(); void ib_vap_stream_cntl__3();
void ib_vap_stream_cntl__32(); void ib_vap_stream_cntl__32();
void ib_vap_stream_cntl__42();
void ib_vap_stream_cntl__323(); void ib_vap_stream_cntl__323();
#ifdef __cplusplus #ifdef __cplusplus

View File

@ -0,0 +1,10 @@
-- temp[0]: texture coordinate
TEX TEX_SEM_WAIT TEX_SEM_ACQUIRE
temp[0].rgba = LD tex[0].rgba temp[0].rgaa ;
OUT TEX_SEM_WAIT
src0.a = temp[0] ,
src0.rgb = temp[0] :
out[0].a = MAX src0.a src0.a ,
out[0].rgb = MAX src0.rgb src0.rgb ;

Binary file not shown.

View File

@ -0,0 +1,2 @@
out[0].xyzw = VE_ADD input[0].xyzw input[0].0000 ;
out[1].xyzw = VE_ADD input[0].xy00 const[0].xy00 ;

Binary file not shown.