diff --git a/src/particle_oriented_animated_quad_vbuf_pixel_shader.cpp b/src/particle_oriented_animated_quad_vbuf_pixel_shader.cpp index 3ca6fb7..564fb79 100644 --- a/src/particle_oriented_animated_quad_vbuf_pixel_shader.cpp +++ b/src/particle_oriented_animated_quad_vbuf_pixel_shader.cpp @@ -32,6 +32,7 @@ #define PARTICLE_SHADER 2 #define TEXTURE_TILE_SHADER 3 #define PARTICLE_PHYSICS_SHADER 4 +#define VERTEX_BUFFER_COPY_SHADER 5 #define PARTICLE_POSITION_RELOC_INDEX 5 @@ -41,6 +42,7 @@ const char * vertex_shader_paths[] = { "particle_particle_animated_quad_vbuf.vs.bin", "texture_tile.vs.bin", "particle_physics.vs.bin", + "vertex_buffer_copy.vs.bin", }; const int vertex_shader_paths_length = (sizeof (vertex_shader_paths)) / (sizeof (vertex_shader_paths[0])); const char * fragment_shader_paths[] = { @@ -49,6 +51,7 @@ const char * fragment_shader_paths[] = { "particle_particle.fs.bin", "texture_tile.fs.bin", "particle_physics.fs.bin", + "vertex_buffer_copy.fs.bin", }; const int fragment_shader_paths_length = (sizeof (fragment_shader_paths)) / (sizeof (fragment_shader_paths[0])); @@ -293,8 +296,8 @@ void _3d_particle_inner(int particles_length, int position_offset) | VAP_VTX_NUM_ARRAYS__VC_FORCE_PREFETCH(1) ); TU( // VAP_VTX_AOS_ATTR01 - VAP_VTX_AOS_ATTR__VTX_AOS_COUNT0(3) - | VAP_VTX_AOS_ATTR__VTX_AOS_STRIDE0(3) + VAP_VTX_AOS_ATTR__VTX_AOS_COUNT0(4) + | VAP_VTX_AOS_ATTR__VTX_AOS_STRIDE0(4) | VAP_VTX_AOS_ATTR__VTX_AOS_COUNT1(2) | VAP_VTX_AOS_ATTR__VTX_AOS_STRIDE1(2) ); @@ -468,7 +471,7 @@ void _3d_particle(const shaders& shaders, macrotile, microtile, clamp); - ib_vap_stream_cntl__32(); + ib_vap_stream_cntl__42(); // shaders T0V(US_PIXSIZE @@ -543,6 +546,7 @@ void _3d_particle(const shaders& shaders, ib_vap_pvs_const_cntl(consts, (sizeof (consts))); int offset = state.length * 4 * 2; + /* int ix = 0; particle_position * pos = state.position_output(); for (int i = 0; i < state.length; i++) { @@ -554,17 +558,197 @@ void _3d_particle(const shaders& shaders, ix++; vertexbuffer_ptr[offset + ix] = position.z; ix++; + vertexbuffer_ptr[offset + ix] = 1; // W + ix++; }; } asm volatile ("" ::: "memory"); + */ _3d_particle_inner(state.length, offset); } void _copy_to_vertexbuffer(const shaders& shaders, - const floatbuffer_state& state) + const floatbuffer_state& state, + int floatbuffer_width, + int floatbuffer_height) { + assert(floatbuffer_width <= 1024); + int viewport_width = floatbuffer_width * 4; + int viewport_height = floatbuffer_height; + int texture_width = floatbuffer_width; + int texture_height = floatbuffer_height; + int macrotile = 0; + int microtile = 0; + + T0V(SC_SCISSOR0 + , SC_SCISSOR0__XS0(0) + | SC_SCISSOR0__YS0(0) + ); + T0V(SC_SCISSOR1 + , SC_SCISSOR1__XS1(viewport_width - 1) + | SC_SCISSOR1__YS1(viewport_height - 1) + ); + T0Vf(VAP_VPORT_XSCALE, (float)viewport_width); + T0Vf(VAP_VPORT_YSCALE, (float)viewport_height); + + int colorformat = 7; // ARGB32323232 + + int offset = state.length * 4 * 2 * (sizeof (float)); + + ib_colorbuffer3(0, + VERTEXBUFFER_RELOC_INDEX, + offset, + viewport_width, + macrotile, + microtile, + colorformat); + + T0V(US_OUT_FMT_0 + , US_OUT_FMT__OUT_FMT(21) // C4_32_FP + | US_OUT_FMT__C0_SEL__RED + | US_OUT_FMT__C1_SEL__GREEN + | US_OUT_FMT__C2_SEL__BLUE + | US_OUT_FMT__C3_SEL__ALPHA + | US_OUT_FMT__OUT_SIGN(0) + ); + T0V(US_OUT_FMT_1 + , US_OUT_FMT__OUT_FMT(15) // render target is not used + ); + T0V(US_OUT_FMT_2 + , US_OUT_FMT__OUT_FMT(15) // render target is not used + ); + T0V(US_OUT_FMT_3 + , US_OUT_FMT__OUT_FMT(15) // render target is not used + ); + + // shaders + //load_pvs_shaders(shaders.vertex, shaders.vertex_length); + //load_us_shaders(shaders.fragment, shaders.fragment_length); + + // GA + + T0V(GB_ENABLE + , 0 + ); + + ////////////////////////////////////////////////////////////////////////////// + // RS + ////////////////////////////////////////////////////////////////////////////// + + ib_rs_instructions(1); + + ////////////////////////////////////////////////////////////////////////////// + // VAP OUT + ////////////////////////////////////////////////////////////////////////////// + + T0V(VAP_OUT_VTX_FMT_0 + , VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1) + ); + T0V(VAP_OUT_VTX_FMT_1 + , VAP_OUT_VTX_FMT_1__TEX_0_COMP_CNT(4) + ); + + // + + T0V(ZB_CNTL, 0); + T0V(ZB_ZSTENCILCNTL, 0); + + // + + ////////////////////////////////////////////////////////////////////////////// + // TX + ////////////////////////////////////////////////////////////////////////////// + + T0V(TX_INVALTAGS, 0x00000000); + + T0V(TX_ENABLE + , TX_ENABLE__TEX_0_ENABLE__ENABLE + ); + + int clamp = 2; // clamp to [0.0, 1.0] + int txformat = 29; // TX_FMT_32F_32F_32F_32F + ib_texture2(0, + PARTICLE_POSITION_RELOC_INDEX, + texture_width, texture_height, + macrotile, microtile, + clamp, + txformat); + + // shaders + + ib_vap_stream_cntl__2(); + + T0V(US_PIXSIZE + , US_PIXSIZE__PIX_SIZE(2) + ); + + ib_ga_us(&shaders.fragment[VERTEX_BUFFER_COPY_SHADER]); + ib_vap_pvs(&shaders.vertex[VERTEX_BUFFER_COPY_SHADER]); + + ////////////////////////////////////////////////////////////////////////////// + // VAP + ////////////////////////////////////////////////////////////////////////////// + + T0V(VAP_CLIP_CNTL + , VAP_CLIP_CNTL__CLIP_DISABLE(1) + ); + + T0V(VAP_VTE_CNTL + , VAP_VTE_CNTL__VPORT_X_SCALE_ENA(1) + | VAP_VTE_CNTL__VPORT_Y_SCALE_ENA(1) + | VAP_VTE_CNTL__VTX_XY_FMT(1) // disable W division + | VAP_VTE_CNTL__VTX_Z_FMT(1) // disable W division + ); + + T0V(VAP_CNTL_STATUS + , VAP_CNTL_STATUS__PVS_BYPASS(0) + ); + + ////////////////////////////////////////////////////////////////////////////// + // 3D_DRAW + ////////////////////////////////////////////////////////////////////////////// + + const int dwords_per_vtx = 2; + + T0V(VAP_VTX_SIZE + , VAP_VTX_SIZE__DWORDS_PER_VTX(dwords_per_vtx) + ); + + const float vertices[] = { + 0.0f, 0.0f, + 1.0f, 0.0f, + 1.0f, 1.0f, + 0.0f, 1.0f, + }; + const int vertex_count = 4; + T3(_3D_DRAW_IMMD_2, (1 + vertex_count * dwords_per_vtx) - 1); + TU( VAP_VF_CNTL__PRIM_TYPE(13) // quad list + | VAP_VF_CNTL__PRIM_WALK(3) + | VAP_VF_CNTL__INDEX_SIZE(0) + | VAP_VF_CNTL__VTX_REUSE_DIS(0) + | VAP_VF_CNTL__DUAL_INDEX_MODE(0) + | VAP_VF_CNTL__USE_ALT_NUM_VERTS(0) + | VAP_VF_CNTL__NUM_VERTICES(vertex_count) + ); + for (int i = 0; i < vertex_count * 2; i++) { + TF(vertices[i]); + } + + // + + T0V(RB3D_DSTCACHE_CTLSTAT + , RB3D_DSTCACHE_CTLSTAT__DC_FLUSH(0x2) // Flush dirty 3D data + | RB3D_DSTCACHE_CTLSTAT__DC_FREE(0x2) // Free 3D tags + ); + + T0V(ZB_ZCACHE_CTLSTAT + , ZB_ZCACHE_CTLSTAT__ZC_FLUSH(1) + | ZB_ZCACHE_CTLSTAT__ZC_FREE(1) + ); + + T0V(WAIT_UNTIL, 0x00020000); } int indirect_buffer(const shaders& shaders, @@ -572,7 +756,9 @@ int indirect_buffer(const shaders& shaders, //const particle * particles, //const int particles_length, float theta, - float * vertexbuffer_ptr) + float * vertexbuffer_ptr, + int floatbuffer_width, + int floatbuffer_height) { int width = 1600; int height = 1200; @@ -582,7 +768,10 @@ int indirect_buffer(const shaders& shaders, ib_generic_initialization(); - _copy_to_vertexbuffer(shaders, state); + _copy_to_vertexbuffer(shaders, + state, + floatbuffer_width, + floatbuffer_height); T0V(RB3D_BLENDCNTL, 0); T0V(RB3D_ABLENDCNTL, 0); @@ -610,8 +799,8 @@ int indirect_buffer(const shaders& shaders, , US_OUT_FMT__OUT_FMT(15) // render target is not used ); - load_pvs_shaders(shaders.vertex, shaders.vertex_length); - load_us_shaders(shaders.fragment, shaders.fragment_length); + //load_pvs_shaders(shaders.vertex, shaders.vertex_length); + //load_us_shaders(shaders.fragment, shaders.fragment_length); ////////////////////////////////////////////////////////////////////////////// // DRAW @@ -707,7 +896,7 @@ int init_particles_vertexbuffer(int fd, int particles_length, float ** ptr_out) const int vertex_count = 4; const int size = particles_length * vertex_count * 2 * (sizeof (float)) - + particles_length * vertex_count * 3 * (sizeof (float)); + + particles_length * vertex_count * 4 * (sizeof (float)); void * ptr; int handle = create_buffer(fd, size, &ptr); @@ -930,7 +1119,8 @@ int _floatbuffer(const shaders& shaders, T0V(TX_ENABLE , TX_ENABLE__TEX_0_ENABLE__ENABLE - | TX_ENABLE__TEX_1_ENABLE__ENABLE); + | TX_ENABLE__TEX_1_ENABLE__ENABLE + ); int clamp = 2; // clamp to [0.0, 1.0] int txformat = 29; // TX_FMT_32F_32F_32F_32F @@ -1151,7 +1341,9 @@ int main() int ib_dwords = indirect_buffer(shaders, state, theta, - vertexbuffer_ptr); + vertexbuffer_ptr, + floatbuffer_width, + floatbuffer_height); assert(textures_length == 2); int particle_position_handle = state.handles[fb_output + 0]; diff --git a/src/r500/indirect_buffer.c b/src/r500/indirect_buffer.c index f574753..e80322a 100644 --- a/src/r500/indirect_buffer.c +++ b/src/r500/indirect_buffer.c @@ -330,6 +330,39 @@ void ib_colorbuffer2(int buffer_index, TU(reloc_index * 4); // index into relocs array } +void ib_colorbuffer3(int buffer_index, + int reloc_index, + int offset, + int pitch, + int macrotile, int microtile, + int colorformat) +{ + assert(buffer_index >= 0 && buffer_index <= 3); + + int reg_offset = buffer_index * 4; + + ////////////////////////////////////////////////////////////////////////////// + // CB + ////////////////////////////////////////////////////////////////////////////// + + T0V(RB3D_COLOROFFSET0 + reg_offset + , offset // value replaced by kernel from relocs + ); + T3(_NOP, 0); + TU(reloc_index * 4); // index into relocs array + + T0V(RB3D_COLORPITCH0 + reg_offset + , RB3D_COLORPITCH__COLORPITCH(pitch >> 1) + | RB3D_COLORPITCH__COLORTILE(macrotile) + | RB3D_COLORPITCH__COLORMICROTILE(microtile) + | RB3D_COLORPITCH__COLORFORMAT(colorformat) + ); + // The COLORPITCH NOP is ignored/not applied due to + // RADEON_CS_KEEP_TILING_FLAGS, but is still required. + T3(_NOP, 0); + TU(reloc_index * 4); // index into relocs array +} + void ib_viewport(int width, int height) { ////////////////////////////////////////////////////////////////////////////// @@ -869,6 +902,36 @@ void ib_vap_stream_cntl__32() ); } +void ib_vap_stream_cntl__42() +{ + ////////////////////////////////////////////////////////////////////////////// + // VAP_PROG_STREAM_CNTL + ////////////////////////////////////////////////////////////////////////////// + + T0V(VAP_PROG_STREAM_CNTL_0 + , VAP_PROG_STREAM_CNTL__DATA_TYPE_0__FLOAT_4 + | VAP_PROG_STREAM_CNTL__SKIP_DWORDS_0(0) + | VAP_PROG_STREAM_CNTL__DST_VEC_LOC_0(0) + | VAP_PROG_STREAM_CNTL__LAST_VEC_0(0) + | VAP_PROG_STREAM_CNTL__DATA_TYPE_1__FLOAT_2 + | VAP_PROG_STREAM_CNTL__SKIP_DWORDS_1(0) + | VAP_PROG_STREAM_CNTL__DST_VEC_LOC_1(1) + | VAP_PROG_STREAM_CNTL__LAST_VEC_1(1) + ); + T0V(VAP_PROG_STREAM_CNTL_EXT_0 + , VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_X_0__SELECT_X + | VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_Y_0__SELECT_Y + | VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_Z_0__SELECT_Z + | VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_W_0__SELECT_W + | VAP_PROG_STREAM_CNTL_EXT__WRITE_ENA_0(0b1111) // XYZW + | VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_X_1__SELECT_X + | VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_Y_1__SELECT_Y + | VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_Z_1__SELECT_FP_ZERO + | VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_W_1__SELECT_FP_ONE + | VAP_PROG_STREAM_CNTL_EXT__WRITE_ENA_1(0b1111) // XYZW + ); +} + void ib_vap_stream_cntl__323() { ////////////////////////////////////////////////////////////////////////////// diff --git a/src/r500/indirect_buffer.h b/src/r500/indirect_buffer.h index edd795f..127133e 100644 --- a/src/r500/indirect_buffer.h +++ b/src/r500/indirect_buffer.h @@ -62,6 +62,12 @@ void ib_colorbuffer2(int buffer_index, int pitch, int macrotile, int microtile, int colorformat); +void ib_colorbuffer3(int buffer_index, + int reloc_index, + int offset, + int pitch, + int macrotile, int microtile, + int colorformat); void ib_zbuffer(int reloc_index, int pitch, int zfunc); void ib_rs_instructions(int count); void ib_texture__0(); @@ -87,6 +93,7 @@ void ib_ga_consts(const float * consts, int consts_length, int index); void ib_vap_stream_cntl__2(); void ib_vap_stream_cntl__3(); void ib_vap_stream_cntl__32(); +void ib_vap_stream_cntl__42(); void ib_vap_stream_cntl__323(); #ifdef __cplusplus diff --git a/src/vertex_buffer_copy.fs.asm b/src/vertex_buffer_copy.fs.asm new file mode 100644 index 0000000..bb7d428 --- /dev/null +++ b/src/vertex_buffer_copy.fs.asm @@ -0,0 +1,10 @@ +-- temp[0]: texture coordinate + +TEX TEX_SEM_WAIT TEX_SEM_ACQUIRE + temp[0].rgba = LD tex[0].rgba temp[0].rgaa ; + +OUT TEX_SEM_WAIT +src0.a = temp[0] , +src0.rgb = temp[0] : + out[0].a = MAX src0.a src0.a , + out[0].rgb = MAX src0.rgb src0.rgb ; diff --git a/src/vertex_buffer_copy.fs.bin b/src/vertex_buffer_copy.fs.bin new file mode 100644 index 0000000..bbfaa90 Binary files /dev/null and b/src/vertex_buffer_copy.fs.bin differ diff --git a/src/vertex_buffer_copy.vs.asm b/src/vertex_buffer_copy.vs.asm new file mode 100644 index 0000000..eadd23e --- /dev/null +++ b/src/vertex_buffer_copy.vs.asm @@ -0,0 +1,2 @@ +out[0].xyzw = VE_ADD input[0].xyzw input[0].0000 ; +out[1].xyzw = VE_ADD input[0].xy00 const[0].xy00 ; diff --git a/src/vertex_buffer_copy.vs.bin b/src/vertex_buffer_copy.vs.bin new file mode 100644 index 0000000..b140eb3 Binary files /dev/null and b/src/vertex_buffer_copy.vs.bin differ