From 940b0cd43df4930640fccf38a313bf56df6b485b Mon Sep 17 00:00:00 2001 From: Zack Buhman Date: Sat, 8 Nov 2025 17:45:35 -0600 Subject: [PATCH] add tx_rt_float --- src/r500/indirect_buffer.c | 62 +++++++ src/r500/indirect_buffer.h | 5 + src/tx_rt_float.cpp | 335 +++++++++++++++++++++++++++++++++++++ src/tx_rt_float.fs.asm | 8 + src/tx_rt_float.fs.bin | Bin 0 -> 48 bytes src/tx_rt_float.vs.asm | 2 + src/tx_rt_float.vs.bin | Bin 0 -> 32 bytes 7 files changed, 412 insertions(+) create mode 100644 src/tx_rt_float.cpp create mode 100644 src/tx_rt_float.fs.asm create mode 100644 src/tx_rt_float.fs.bin create mode 100644 src/tx_rt_float.vs.asm create mode 100644 src/tx_rt_float.vs.bin diff --git a/src/r500/indirect_buffer.c b/src/r500/indirect_buffer.c index 58637b4..c75d648 100644 --- a/src/r500/indirect_buffer.c +++ b/src/r500/indirect_buffer.c @@ -567,6 +567,55 @@ void ib_texture__1(int reloc_index, TU(reloc_index * 4); // index into relocs array } +void ib_texture__1_float32(int reloc_index, + int width, int height, + int macrotile, int microtile, + int clamp) +{ + ////////////////////////////////////////////////////////////////////////////// + // TX + ////////////////////////////////////////////////////////////////////////////// + + T0V(TX_INVALTAGS, 0x00000000); + + T0V(TX_ENABLE + , TX_ENABLE__TEX_0_ENABLE__ENABLE); + + T0V(TX_FILTER0_0 + , TX_FILTER0__CLAMP_S(clamp) + | TX_FILTER0__CLAMP_T(clamp) + | TX_FILTER0__MAG_FILTER__POINT + | TX_FILTER0__MIN_FILTER__POINT + ); + T0V(TX_FILTER1_0 + , TX_FILTER1__LOD_BIAS(1) + | TX_FILTER1__BORDER_FIX(1) + ); + T0V(TX_BORDER_COLOR_0, 0); + T0V(TX_FORMAT0_0 + , TX_FORMAT0__TXWIDTH(width - 1) + | TX_FORMAT0__TXHEIGHT(height - 1) + ); + + T0V(TX_FORMAT1_0 + , TX_FORMAT1__TXFORMAT__TX_FMT_32F_32F_32F_32F + | TX_FORMAT1__SEL_ALPHA(3) + | TX_FORMAT1__SEL_RED(0) + | TX_FORMAT1__SEL_GREEN(1) + | TX_FORMAT1__SEL_BLUE(2) + | TX_FORMAT1__TEX_COORD_TYPE__2D + ); + T0V(TX_FORMAT2_0, 0); + + T0V(TX_OFFSET_0 + , TX_OFFSET__MACRO_TILE(macrotile) + | TX_OFFSET__MICRO_TILE(microtile) + ); + + T3(_NOP, 0); + TU(reloc_index * 4); // index into relocs array +} + void ib_vap_pvs(struct shader_offset * offset) { const int instruction_size = 4 * 4; // bytes @@ -650,6 +699,19 @@ void ib_vap_pvs_const_offset(const float * consts, int size, int offset) TF(consts[i]); } +void ib_ga_consts(const float * consts, int consts_length, int index) +{ + assert(consts_length % 4 == 0); + + T0V(GA_US_VECTOR_INDEX + , GA_US_VECTOR_INDEX__INDEX(index) + | GA_US_VECTOR_INDEX__TYPE(1) + ); + T0_ONE_REG(GA_US_VECTOR_DATA, (consts_length - 1)); + for (int i = 0; i < consts_length; i++) + TF(consts[i]); +} + void ib_vap_stream_cntl__2() { ////////////////////////////////////////////////////////////////////////////// diff --git a/src/r500/indirect_buffer.h b/src/r500/indirect_buffer.h index d7f23a4..de4e37e 100644 --- a/src/r500/indirect_buffer.h +++ b/src/r500/indirect_buffer.h @@ -64,10 +64,15 @@ void ib_texture__1(int reloc_index, int width, int height, int macrotile, int microtile, int clamp); +void ib_texture__1_float32(int reloc_index, + int width, int height, + int macrotile, int microtile, + int clamp); void ib_vap_pvs(struct shader_offset * offset); void ib_ga_us(struct shader_offset * offset); void ib_vap_pvs_const_cntl(const float * consts, int size); void ib_vap_pvs_const_offset(const float * consts, int size, int offset); +void ib_ga_consts(const float * consts, int consts_length, int index); void ib_vap_stream_cntl__2(); void ib_vap_stream_cntl__3(); void ib_vap_stream_cntl__32(); diff --git a/src/tx_rt_float.cpp b/src/tx_rt_float.cpp new file mode 100644 index 0000000..f476771 --- /dev/null +++ b/src/tx_rt_float.cpp @@ -0,0 +1,335 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "r500/3d_registers.h" +#include "r500/3d_registers_undocumented.h" +#include "r500/3d_registers_bits.h" +#include "r500/indirect_buffer.h" +#include "r500/shader.h" +#include "r500/display_controller.h" + +#include "drm/buffer.h" +#include "drm/drm.h" + +#include "math/float_types.hpp" +#include "math/transform.hpp" +#include "math/constants.hpp" + +#include "../model/model2.h" +#include "../model/plane.h" + +#define TEXTURE_TILE_SHADER 0 + +const char * vertex_shader_paths[] = { + "tx_rt_float.vs.bin", +}; +const int vertex_shader_paths_length = (sizeof (vertex_shader_paths)) / (sizeof (vertex_shader_paths[0])); +const char * fragment_shader_paths[] = { + "tx_rt_float.fs.bin", +}; +const int fragment_shader_paths_length = (sizeof (fragment_shader_paths)) / (sizeof (fragment_shader_paths[0])); + +#define PARTICLE_TEXTURE 0 + +const char * textures[] = { + "../texture/butterfly_1024x1024_rgba8888.data", +}; +const int textures_length = (sizeof (textures)) / (sizeof (textures[0])); + +struct shaders { + struct shader_offset * vertex; + struct shader_offset * fragment; + int vertex_length; + int fragment_length; +}; + +const int floatbuffer_width = 64; +const int floatbuffer_height = 64; + +int _floatbuffer(const shaders& shaders, + int input_reloc_index, + int output_reloc_index) +{ + int viewport_width = floatbuffer_width; + int viewport_height = floatbuffer_height; + int texture_width = floatbuffer_width; + int texture_height = floatbuffer_height; + float vx = ((float)viewport_width) * 0.5f; + float vy = ((float)viewport_height) * 0.5f; + float tx = 0.5f / ((float)texture_width); + float ty = 0.5f / ((float)texture_height); + + ib_ix = 0; + + ib_generic_initialization(); + + printf("vp %d %d \n", viewport_width, viewport_height); + + T0V(SC_SCISSOR0 + , SC_SCISSOR0__XS0(0) + | SC_SCISSOR0__YS0(0) + ); + T0V(SC_SCISSOR1 + , SC_SCISSOR1__XS1(viewport_width - 1) + | SC_SCISSOR1__YS1(viewport_height - 1) + ); + T0Vf(VAP_VPORT_XSCALE, (float)viewport_width); + T0Vf(VAP_VPORT_YSCALE, (float)viewport_height); + + ib_colorbuffer(output_reloc_index, viewport_width, 0, 0); // macrotile, microtile + + T0V(RB3D_COLORPITCH0 + , RB3D_COLORPITCH__COLORPITCH(viewport_width >> 1) + | RB3D_COLORPITCH__COLORTILE(0) + | RB3D_COLORPITCH__COLORMICROTILE(0) + | RB3D_COLORPITCH__COLORFORMAT__ARGB32323232 + ); + + T0V(US_OUT_FMT_0 + , US_OUT_FMT__OUT_FMT(21) // C4_32_FP + | US_OUT_FMT__C0_SEL__RED + | US_OUT_FMT__C1_SEL__GREEN + | US_OUT_FMT__C2_SEL__BLUE + | US_OUT_FMT__C3_SEL__ALPHA + | US_OUT_FMT__OUT_SIGN(0) + ); + T0V(US_OUT_FMT_1 + , US_OUT_FMT__OUT_FMT(15) // render target is not used + ); + T0V(US_OUT_FMT_2 + , US_OUT_FMT__OUT_FMT(15) // render target is not used + ); + T0V(US_OUT_FMT_3 + , US_OUT_FMT__OUT_FMT(15) // render target is not used + ); + + // shaders + load_pvs_shaders(shaders.vertex, shaders.vertex_length); + load_us_shaders(shaders.fragment, shaders.fragment_length); + + // GA + + T0V(GB_ENABLE + , 0 + ); + + ////////////////////////////////////////////////////////////////////////////// + // RS + ////////////////////////////////////////////////////////////////////////////// + + ib_rs_instructions(1); + + ////////////////////////////////////////////////////////////////////////////// + // VAP OUT + ////////////////////////////////////////////////////////////////////////////// + + T0V(VAP_OUT_VTX_FMT_0 + , VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1) + ); + T0V(VAP_OUT_VTX_FMT_1 + , VAP_OUT_VTX_FMT_1__TEX_0_COMP_CNT(4) + ); + + // + + T0V(ZB_CNTL, 0); + T0V(ZB_ZSTENCILCNTL, 0); + + // + + int macrotile = 0; + int microtile = 0; + int clamp = 2; // clamp to [0.0, 1.0] + ib_texture__1_float32(input_reloc_index, + texture_width, texture_height, + macrotile, microtile, + clamp); + + ib_vap_stream_cntl__2(); + + T0V(US_PIXSIZE + , US_PIXSIZE__PIX_SIZE(1) + ); + + ib_ga_us(&shaders.fragment[TEXTURE_TILE_SHADER]); + ib_vap_pvs(&shaders.vertex[TEXTURE_TILE_SHADER]); + + const float vertex_consts[] = { + tx, ty, 0, 0, + }; + const int vertex_consts_size = (sizeof (vertex_consts)); + ib_vap_pvs_const_cntl(vertex_consts, vertex_consts_size); + + // fragment constants + + const float fragment_consts[] = { + 1234.0f, 0, 0, 0, + }; + int fragment_consts_length = (sizeof (fragment_consts)) / (sizeof (fragment_consts[0])); + ib_ga_consts(fragment_consts, fragment_consts_length, 0); + + ////////////////////////////////////////////////////////////////////////////// + // VAP + ////////////////////////////////////////////////////////////////////////////// + + T0V(VAP_CLIP_CNTL + , VAP_CLIP_CNTL__CLIP_DISABLE(1) + ); + + T0V(VAP_VTE_CNTL + , VAP_VTE_CNTL__VPORT_X_SCALE_ENA(1) + | VAP_VTE_CNTL__VPORT_Y_SCALE_ENA(1) + | VAP_VTE_CNTL__VTX_XY_FMT(1) // disable W division + | VAP_VTE_CNTL__VTX_Z_FMT(1) // disable W division + ); + + T0V(VAP_CNTL_STATUS + , VAP_CNTL_STATUS__PVS_BYPASS(0) + ); + + ////////////////////////////////////////////////////////////////////////////// + // 3D_DRAW + ////////////////////////////////////////////////////////////////////////////// + + const int dwords_per_vtx = 2; + + T0V(VAP_VTX_SIZE + , VAP_VTX_SIZE__DWORDS_PER_VTX(dwords_per_vtx) + ); + + const float vertices[] = { + 0.0f, 0.0f, + 1.0f, 0.0f, + 1.0f, 1.0f, + 0.0f, 1.0f, + }; + const int vertex_count = 4; + T3(_3D_DRAW_IMMD_2, (1 + vertex_count * dwords_per_vtx) - 1); + TU( VAP_VF_CNTL__PRIM_TYPE(5) // triangle fan + | VAP_VF_CNTL__PRIM_WALK(3) + | VAP_VF_CNTL__INDEX_SIZE(0) + | VAP_VF_CNTL__VTX_REUSE_DIS(0) + | VAP_VF_CNTL__DUAL_INDEX_MODE(0) + | VAP_VF_CNTL__USE_ALT_NUM_VERTS(0) + | VAP_VF_CNTL__NUM_VERTICES(vertex_count) + ); + for (int i = 0; i < vertex_count * 2; i++) { + TF(vertices[i]); + } + + return ib_ix; +} + +void floatbuffer_data(void * ptr, int size) +{ + float * f32 = (float*)ptr; + int vector_length = size / (4 * 4); + + int offset = 16384; + for (int i = 0; i < vector_length; i++) { + f32[i * 4 + 0] = offset--; + f32[i * 4 + 1] = offset--; + f32[i * 4 + 2] = offset--; + f32[i * 4 + 3] = offset--; + } +} + +void floatbuffer_compare(void * a, void * b, int size) +{ + float * a_f32 = (float*)a; + float * b_f32 = (float*)b; + int vector_length = size / (4 * 4); + + int offset = 1; + for (int i = 0; i < vector_length; i++) { + if ( (a_f32[i * 4 + 0] != b_f32[i * 4 + 0]) + || (a_f32[i * 4 + 1] != b_f32[i * 4 + 1]) + || (a_f32[i * 4 + 2] != b_f32[i * 4 + 2]) + || (a_f32[i * 4 + 3] != b_f32[i * 4 + 3])) { + printf("a[%d] = [% 2.02f % 2.02f % 2.02f % 2.02f] ; ", i, + a_f32[i * 4 + 0], a_f32[i * 4 + 1], a_f32[i * 4 + 2], a_f32[i * 4 + 3]); + printf("b[%d] = [% 2.02f % 2.02f % 2.02f % 2.02f] \n", i, + b_f32[i * 4 + 0], b_f32[i * 4 + 1], b_f32[i * 4 + 2], b_f32[i * 4 + 3]); + } + } +} + +int main() +{ + struct shaders shaders = { + .vertex = load_shaders(vertex_shader_paths, vertex_shader_paths_length), + .fragment = load_shaders(fragment_shader_paths, fragment_shader_paths_length), + .vertex_length = vertex_shader_paths_length, + .fragment_length = fragment_shader_paths_length, + }; + + void * rmmio = map_pci_resource2(); + + int fd = open("/dev/dri/card0", O_RDWR | O_CLOEXEC); + assert(fd != -1); + + const int colorbuffer_size = 1600 * 1200 * 4; + const int floatbuffer_size = floatbuffer_width * floatbuffer_height * 4 * 4; + const int floatbuffer_count = 2; + int colorbuffer_handle[2]; + int zbuffer_handle; + int * texturebuffer_handle; + int flush_handle; + int floatbuffer_handle[floatbuffer_count]; + + void * colorbuffer_ptr[2]; + void * floatbuffer_ptr[floatbuffer_count]; + void * zbuffer_ptr; + + // colorbuffer + colorbuffer_handle[0] = create_buffer(fd, colorbuffer_size, &colorbuffer_ptr[0]); + colorbuffer_handle[1] = create_buffer(fd, colorbuffer_size, &colorbuffer_ptr[1]); + zbuffer_handle = create_buffer(fd, colorbuffer_size, &zbuffer_ptr); + flush_handle = create_flush_buffer(fd); + texturebuffer_handle = load_textures(fd, textures, textures_length); + + floatbuffer_handle[0] = create_buffer(fd, floatbuffer_size, &floatbuffer_ptr[0]); + floatbuffer_handle[1] = create_buffer(fd, floatbuffer_size, &floatbuffer_ptr[1]); + + fprintf(stderr, "colorbuffer handle[0] %d\n", colorbuffer_handle[0]); + fprintf(stderr, "colorbuffer handle[1] %d\n", colorbuffer_handle[1]); + fprintf(stderr, "floatbuffer handle[0] %d\n", floatbuffer_handle[0]); + fprintf(stderr, "floatbuffer handle[1] %d\n", floatbuffer_handle[1]); + fprintf(stderr, "zbuffer handle %d\n", zbuffer_handle); + + int colorbuffer_ix = 0; + + floatbuffer_data(floatbuffer_ptr[0], floatbuffer_size); + + while (true) { + int ib_dwords = _floatbuffer(shaders, + TEXTUREBUFFER_RELOC_INDEX + 0, // input + COLORBUFFER_RELOC_INDEX); // output + + int ret = drm_radeon_cs(fd, + floatbuffer_handle[1], + zbuffer_handle, + flush_handle, + floatbuffer_handle, + floatbuffer_count, + ib_dwords); + if (ret == -1) + break; + + break; + } + + floatbuffer_compare(floatbuffer_ptr[0], floatbuffer_ptr[1], floatbuffer_size); + + close(fd); +} diff --git a/src/tx_rt_float.fs.asm b/src/tx_rt_float.fs.asm new file mode 100644 index 0000000..3e534e8 --- /dev/null +++ b/src/tx_rt_float.fs.asm @@ -0,0 +1,8 @@ +TEX TEX_SEM_WAIT TEX_SEM_ACQUIRE + temp[0].rgba = LD tex[0].rgba temp[0].rgaa ; + +OUT TEX_SEM_WAIT +src0.a = temp[0], +src0.rgb = temp[0] : + out[0].a = MAX src0.a src0.a , + out[0].rgb = MAX src0.rgb src0.rgb ; diff --git a/src/tx_rt_float.fs.bin b/src/tx_rt_float.fs.bin new file mode 100644 index 0000000000000000000000000000000000000000..bbfaa90109838e4c340fb53d5d938c5d4f5c5054 GIT binary patch literal 48 ocmZRtU;qLKCWbEzPe3dfU~OOrGMP9)Sb@off%!lJ11nGj0D>k2$N&HU literal 0 HcmV?d00001 diff --git a/src/tx_rt_float.vs.asm b/src/tx_rt_float.vs.asm new file mode 100644 index 0000000..b9692a2 --- /dev/null +++ b/src/tx_rt_float.vs.asm @@ -0,0 +1,2 @@ +out[0].xyzw = VE_ADD input[0].xyzw input[0].0000 ; +out[1].xyzw = VE_ADD input[0].xy00 const[0].0000 ; diff --git a/src/tx_rt_float.vs.bin b/src/tx_rt_float.vs.bin new file mode 100644 index 0000000000000000000000000000000000000000..f14a7b78b7c36f4c707a8012725943d507673c66 GIT binary patch literal 32 icmZQ(`oO@*aFKzrL4}d=!GA_(B@kbckqO9W0`dWUZ3qwm literal 0 HcmV?d00001