From 0d4e80b03efe45d54d0bbc3369a459505746dd07 Mon Sep 17 00:00:00 2001 From: Zack Buhman Date: Tue, 4 Nov 2025 18:37:17 -0600 Subject: [PATCH] add matrix_cubesphere_tiled (partially incomplete) --- shader_examples/mesa/gl_teximage2d.fs.txt | 12 + shader_examples/mesa/gl_teximage2d.vs.txt | 8 + src/Makefile | 3 + src/drm/drm.c | 2 +- src/drm/drm.h | 2 +- src/matrix_cubesphere.cpp | 27 +- src/matrix_cubesphere_tiled.cpp | 814 ++++++++++++++++++++++ src/r500/indirect_buffer.c | 92 +-- src/r500/indirect_buffer.h | 10 +- src/texture_tile.fs.asm | 8 + src/texture_tile.fs.bin | Bin 0 -> 48 bytes src/texture_tile.vs.asm | 1 + src/texture_tile.vs.bin | Bin 0 -> 16 bytes tools/bgra_to_rgba.py | 8 +- 14 files changed, 930 insertions(+), 57 deletions(-) create mode 100644 shader_examples/mesa/gl_teximage2d.fs.txt create mode 100644 shader_examples/mesa/gl_teximage2d.vs.txt create mode 100644 src/matrix_cubesphere_tiled.cpp create mode 100644 src/texture_tile.fs.asm create mode 100644 src/texture_tile.fs.bin create mode 100644 src/texture_tile.vs.asm create mode 100644 src/texture_tile.vs.bin diff --git a/shader_examples/mesa/gl_teximage2d.fs.txt b/shader_examples/mesa/gl_teximage2d.fs.txt new file mode 100644 index 0000000..241efcf --- /dev/null +++ b/shader_examples/mesa/gl_teximage2d.fs.txt @@ -0,0 +1,12 @@ +0x00007807, +0x02400000, +0xe400f400, +0x00000000, +0x00000000, +0x00000000, +0x00078005, +0x08020000, +0x08020000, +0x1c440220, +0x1c60c003, +0x00000005, diff --git a/shader_examples/mesa/gl_teximage2d.vs.txt b/shader_examples/mesa/gl_teximage2d.vs.txt new file mode 100644 index 0000000..d45b68e --- /dev/null +++ b/shader_examples/mesa/gl_teximage2d.vs.txt @@ -0,0 +1,8 @@ +0x00f00203, +0x00d10001, +0x01248001, +0x01248001, +0x00f02203, +0x00d10021, +0x01248021, +0x01248021, diff --git a/src/Makefile b/src/Makefile index d4b3875..bee8d17 100644 --- a/src/Makefile +++ b/src/Makefile @@ -24,6 +24,9 @@ R500_COMMON = \ matrix_cubesphere: $(R500_COMMON) matrix_cubesphere.o | shaders $(CXX) $(LDFLAGS) $^ -o $@ +matrix_cubesphere_tiled: $(R500_COMMON) matrix_cubesphere_tiled.o | shaders + $(CXX) $(LDFLAGS) $^ -o $@ + %.o: %.c $(CC) $(ARCH) $(CFLAGS) $(OPT) -c $< -o $@ diff --git a/src/drm/drm.c b/src/drm/drm.c index 14233d6..f8ab256 100644 --- a/src/drm/drm.c +++ b/src/drm/drm.c @@ -37,7 +37,7 @@ void drm_radeon_cs(int fd, }; for (int i = 0; i < texturebuffer_handles_length; i++) { - relocs[3 + i] = (struct drm_radeon_cs_reloc){ + relocs[TEXTUREBUFFER_RELOC_INDEX + i] = (struct drm_radeon_cs_reloc){ .handle = texturebuffer_handles[i], .read_domains = 4, // RADEON_GEM_DOMAIN_VRAM .write_domain = 4, // RADEON_GEM_DOMAIN_VRAM diff --git a/src/drm/drm.h b/src/drm/drm.h index 7314dcd..ff312fa 100644 --- a/src/drm/drm.h +++ b/src/drm/drm.h @@ -3,7 +3,7 @@ #define COLORBUFFER_RELOC_INDEX 0 #define ZBUFFER_RELOC_INDEX 1 #define FLUSH_RELOC_INDEX 2 -#define TEXTURE_RELOC_INDEX 3 +#define TEXTUREBUFFER_RELOC_INDEX 3 #ifdef __cplusplus extern "C" { diff --git a/src/matrix_cubesphere.cpp b/src/matrix_cubesphere.cpp index cd60745..f11e04e 100644 --- a/src/matrix_cubesphere.cpp +++ b/src/matrix_cubesphere.cpp @@ -67,7 +67,7 @@ void _3d_clear(struct shaders& shaders) // - ib_zbuffer(ZBUFFER_RELOC_INDEX, 7); // always + ib_zbuffer(ZBUFFER_RELOC_INDEX, 1600, 7); // always ib_texture__0(); @@ -315,7 +315,7 @@ vec3 _3d_light(struct shaders& shaders, // - ib_zbuffer(ZBUFFER_RELOC_INDEX, 1); // less + ib_zbuffer(ZBUFFER_RELOC_INDEX, 1600, 1); // less ib_texture__0(); @@ -403,9 +403,17 @@ void _3d_cube(struct shaders& shaders, // - ib_zbuffer(ZBUFFER_RELOC_INDEX, 1); // less + ib_zbuffer(ZBUFFER_RELOC_INDEX, 1600, 1); // less - ib_texture__1(TEXTURE_RELOC_INDEX); + int width = 1024; + int height = 1024; + int macrotile = 0; + int microtile = 0; + int clamp = 0; // wrap/repeat + ib_texture__1(TEXTUREBUFFER_RELOC_INDEX, + width, height, + macrotile, microtile, + clamp); ib_vap_stream_cntl__323(); @@ -473,10 +481,19 @@ void _3d_cube(struct shaders& shaders, int indirect_buffer(shaders& shaders, float theta) { + int width = 1600; + int height = 1200; + int pitch = width; + ib_ix = 0; ib_generic_initialization(); + ib_viewport(width, height); + ib_colorbuffer(COLORBUFFER_RELOC_INDEX, pitch, 0, 0); + + T0V(GB_ENABLE, 0); + T0V(US_OUT_FMT_0 , US_OUT_FMT__OUT_FMT(0) // C4_8 | US_OUT_FMT__C0_SEL(3) // Blue @@ -495,8 +512,6 @@ int indirect_buffer(shaders& shaders, , US_OUT_FMT__OUT_FMT(15) // render target is not used ); - ib_colorbuffer(COLORBUFFER_RELOC_INDEX); - load_pvs_shaders(shaders.vertex, shaders.vertex_length); load_us_shaders(shaders.fragment, shaders.fragment_length); diff --git a/src/matrix_cubesphere_tiled.cpp b/src/matrix_cubesphere_tiled.cpp new file mode 100644 index 0000000..1a5da29 --- /dev/null +++ b/src/matrix_cubesphere_tiled.cpp @@ -0,0 +1,814 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "r500/3d_registers.h" +#include "r500/3d_registers_undocumented.h" +#include "r500/3d_registers_bits.h" +#include "r500/indirect_buffer.h" +#include "r500/shader.h" +#include "r500/display_controller.h" + +#include "drm/buffer.h" +#include "drm/drm.h" + +#include "math/float_types.hpp" +#include "math/transform.hpp" +#include "math/constants.hpp" + +#include "../model/model2.h" +#include "../model/cubesphere.h" + +#define CLEAR_SHADER 0 +#define CUBESPHERE_SHADER 1 +#define LIGHT_SHADER 2 +#define TEXTURE_TILE_SHADER 3 + +const char * vertex_shader_paths[] = { + "clear.vs.bin", + "matrix_cubesphere.vs.bin", + "light.vs.bin", + "texture_tile.vs.bin", +}; +const int vertex_shader_paths_length = (sizeof (vertex_shader_paths)) / (sizeof (vertex_shader_paths[0])); +const char * fragment_shader_paths[] = { + "clear.fs.bin", + "matrix_cubesphere.fs.bin", + "light.fs.bin", + "texture_tile.fs.bin", +}; +const int fragment_shader_paths_length = (sizeof (fragment_shader_paths)) / (sizeof (fragment_shader_paths[0])); + +struct shaders { + struct shader_offset * vertex; + struct shader_offset * fragment; + int vertex_length; + int fragment_length; +}; + +void _3d_clear(const shaders& shaders) +{ + ib_rs_instructions(0); + + ////////////////////////////////////////////////////////////////////////////// + // VAP OUT + ////////////////////////////////////////////////////////////////////////////// + + T0V(VAP_OUT_VTX_FMT_0 + , VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1)); + T0V(VAP_OUT_VTX_FMT_1 + , 0); + + // + + ib_zbuffer(ZBUFFER_RELOC_INDEX, 1600, 7); // always + + ib_texture__0(); + + ib_vap_stream_cntl__2(); + + // shaders + T0V(US_PIXSIZE + , US_PIXSIZE__PIX_SIZE(1) + ); + ib_ga_us(&shaders.fragment[CLEAR_SHADER]); + ib_vap_pvs(&shaders.vertex[CLEAR_SHADER]); + + ////////////////////////////////////////////////////////////////////////////// + // VAP INDEX + ////////////////////////////////////////////////////////////////////////////// + + T0V(VAP_INDEX_OFFSET, 0); + + T0V(VAP_VF_MAX_VTX_INDX + , VAP_VF_MAX_VTX_INDX__MAX_INDX(0) + ); + T0V(VAP_VF_MIN_VTX_INDX + , VAP_VF_MIN_VTX_INDX__MIN_INDX(0) + ); + + ////////////////////////////////////////////////////////////////////////////// + // VAP + ////////////////////////////////////////////////////////////////////////////// + + T0V(VAP_CLIP_CNTL + , VAP_CLIP_CNTL__CLIP_DISABLE(1) + ); + + T0V(VAP_VTE_CNTL + , VAP_VTE_CNTL__VTX_XY_FMT(1) // disable W division + | VAP_VTE_CNTL__VTX_Z_FMT(1) // disable W division + ); + + T0V(VAP_CNTL_STATUS + , VAP_CNTL_STATUS__PVS_BYPASS(0) + ); + + ////////////////////////////////////////////////////////////////////////////// + // GA POINT SIZE + ////////////////////////////////////////////////////////////////////////////// + + T0V(GA_POINT_SIZE + , GA_POINT_SIZE__HEIGHT(600 * 12) + | GA_POINT_SIZE__WIDTH(800 * 12) + ); + + ////////////////////////////////////////////////////////////////////////////// + // 3D_DRAW + ////////////////////////////////////////////////////////////////////////////// + + const int dwords_per_vtx = 2; + + T0V(VAP_VTX_SIZE + , VAP_VTX_SIZE__DWORDS_PER_VTX(dwords_per_vtx) + ); + + const float center[] = { + 800.0f, 600.0f, + }; + const int vertex_count = 1; + T3(_3D_DRAW_IMMD_2, (1 + vertex_count * dwords_per_vtx) - 1); + TU( VAP_VF_CNTL__PRIM_TYPE(1) // point list + | VAP_VF_CNTL__PRIM_WALK(3) + | VAP_VF_CNTL__INDEX_SIZE(0) + | VAP_VF_CNTL__VTX_REUSE_DIS(0) + | VAP_VF_CNTL__DUAL_INDEX_MODE(0) + | VAP_VF_CNTL__USE_ALT_NUM_VERTS(0) + | VAP_VF_CNTL__NUM_VERTICES(vertex_count) + ); + for (int i = 0; i < 2; i++) { + TF(center[i]); + } +} + +mat4x4 perspective(float low1, float high1, + float low2, float high2, + float low3, float high3) +{ + float scale2 = (high2 - low2) / (high1 - low1); + float scale3 = (high3 - low3) / (high1 - low1); + + mat4x4 m1 = mat4x4(1, 0, 0, 0, + 0, 1, 0, 0, + 0, 0, 1, -low1, + 0, 0, 0, 1 + ); + + mat4x4 m2 = mat4x4(1, 0, 0, 0, + 0, 1, 0, 0, + 0, 0, scale2, low2, + 0, 0, scale3, low3 + ); + + return m2 * m1; +} + +void _3d_cube_inner(mat4x4 trans, + mat4x4 world_trans, + vec4 light_pos) +{ + T0V(VAP_PVS_STATE_FLUSH_REG, 0x00000000); + + ////////////////////////////////////////////////////////////////////////////// + // VAP_PVS + ////////////////////////////////////////////////////////////////////////////// + + const float consts[] = { + // 0 + trans[0][0], trans[0][1], trans[0][2], trans[0][3], + trans[1][0], trans[1][1], trans[1][2], trans[1][3], + trans[2][0], trans[2][1], trans[2][2], trans[2][3], + trans[3][0], trans[3][1], trans[3][2], trans[3][3], + + // 4 + world_trans[0][0], world_trans[0][1], world_trans[0][2], world_trans[0][3], + world_trans[1][0], world_trans[1][1], world_trans[1][2], world_trans[1][3], + world_trans[2][0], world_trans[2][1], world_trans[2][2], world_trans[2][3], + world_trans[3][0], world_trans[3][1], world_trans[3][2], world_trans[3][3], + + // 8 + light_pos.x, light_pos.y, light_pos.z, light_pos.w, + }; + ib_vap_pvs_const_cntl(consts, (sizeof (consts))); + + ////////////////////////////////////////////////////////////////////////////// + // 3D_DRAW + ////////////////////////////////////////////////////////////////////////////// + + const model * model = &cubesphere_model; + const object * obj = model->object[0]; + const int triangle_count = obj->triangle_count; + const int vertex_count = triangle_count * 3; + + const int dwords_per_vtx = 8; + + T0V(VAP_VTX_SIZE + , VAP_VTX_SIZE__DWORDS_PER_VTX(dwords_per_vtx) + ); + + T3(_3D_DRAW_IMMD_2, (1 + vertex_count * dwords_per_vtx) - 1); + TU( VAP_VF_CNTL__PRIM_TYPE(4) + | VAP_VF_CNTL__PRIM_WALK(3) + | VAP_VF_CNTL__INDEX_SIZE(0) + | VAP_VF_CNTL__VTX_REUSE_DIS(0) + | VAP_VF_CNTL__DUAL_INDEX_MODE(0) + | VAP_VF_CNTL__USE_ALT_NUM_VERTS(0) + | VAP_VF_CNTL__NUM_VERTICES(vertex_count) + ); + + for (int i = 0; i < triangle_count; i++) { + for (int j = 0; j < 3; j++) { + vec3 p = model->position[obj->triangle[i][j].position]; + vec2 t = model->texture[obj->triangle[i][j].texture]; + vec3 n = model->normal[obj->triangle[i][j].normal]; + + TF(p.x); + TF(p.y); + TF(p.z); + TF(t.x); + TF(t.y); + TF(n.x); + TF(n.y); + TF(n.z); + } + } +} + +void _3d_light_inner(mat4x4 trans) +{ + T0V(VAP_PVS_STATE_FLUSH_REG, 0x00000000); + + ////////////////////////////////////////////////////////////////////////////// + // VAP_PVS + ////////////////////////////////////////////////////////////////////////////// + + const vec4 color = {1, 1, 0, 1}; + + const float consts[] = { + // 0 + trans[0][0], trans[0][1], trans[0][2], trans[0][3], + trans[1][0], trans[1][1], trans[1][2], trans[1][3], + trans[2][0], trans[2][1], trans[2][2], trans[2][3], + trans[3][0], trans[3][1], trans[3][2], trans[3][3], + + // 4 + color[0], color[1], color[2], color[2], + }; + ib_vap_pvs_const_cntl(consts, (sizeof (consts))); + + ////////////////////////////////////////////////////////////////////////////// + // 3D_DRAW + ////////////////////////////////////////////////////////////////////////////// + + const model * model = &cubesphere_model; + const object * obj = model->object[0]; + const int triangle_count = obj->triangle_count; + const int vertex_count = triangle_count * 3; + + int dwords_per_vtx = 3; + + T0V(VAP_VTX_SIZE + , VAP_VTX_SIZE__DWORDS_PER_VTX(dwords_per_vtx) + ); + + T3(_3D_DRAW_IMMD_2, (1 + vertex_count * dwords_per_vtx) - 1); + TU( VAP_VF_CNTL__PRIM_TYPE(4) + | VAP_VF_CNTL__PRIM_WALK(3) + | VAP_VF_CNTL__INDEX_SIZE(0) + | VAP_VF_CNTL__VTX_REUSE_DIS(0) + | VAP_VF_CNTL__DUAL_INDEX_MODE(0) + | VAP_VF_CNTL__USE_ALT_NUM_VERTS(0) + | VAP_VF_CNTL__NUM_VERTICES(vertex_count) + ); + + for (int i = 0; i < triangle_count; i++) { + for (int j = 0; j < 3; j++) { + vec3 p = model->position[obj->triangle[i][j].position]; + + TF(p.x); + TF(p.y); + TF(p.z); + } + } +} + +vec3 _3d_light(const shaders& shaders, + const mat4x4& view_to_clip, + float theta) +{ + ib_rs_instructions(1); + + ////////////////////////////////////////////////////////////////////////////// + // VAP OUT + ////////////////////////////////////////////////////////////////////////////// + + T0V(VAP_OUT_VTX_FMT_0 + , VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1)); + T0V(VAP_OUT_VTX_FMT_1 + , VAP_OUT_VTX_FMT_1__TEX_0_COMP_CNT(4)); + + // + + ib_zbuffer(ZBUFFER_RELOC_INDEX, 1600, 1); // less + + ib_texture__0(); + + ib_vap_stream_cntl__3(); + + // shaders + T0V(US_PIXSIZE + , US_PIXSIZE__PIX_SIZE(1) + ); + ib_ga_us(&shaders.fragment[LIGHT_SHADER]); + ib_vap_pvs(&shaders.vertex[LIGHT_SHADER]); + + ////////////////////////////////////////////////////////////////////////////// + // VAP + ////////////////////////////////////////////////////////////////////////////// + + T0V(VAP_CLIP_CNTL + , VAP_CLIP_CNTL__PS_UCP_MODE(3) + ); + + T0V(VAP_VTE_CNTL + , VAP_VTE_CNTL__VPORT_X_SCALE_ENA(1) + | VAP_VTE_CNTL__VPORT_X_OFFSET_ENA(1) + | VAP_VTE_CNTL__VPORT_Y_SCALE_ENA(1) + | VAP_VTE_CNTL__VPORT_Y_OFFSET_ENA(1) + | VAP_VTE_CNTL__VPORT_Z_SCALE_ENA(0) + | VAP_VTE_CNTL__VPORT_Z_OFFSET_ENA(0) + | VAP_VTE_CNTL__VTX_XY_FMT(0) // enable W division + | VAP_VTE_CNTL__VTX_Z_FMT(1) // disable W division + | VAP_VTE_CNTL__VTX_W0_FMT(1) + | VAP_VTE_CNTL__SERIAL_PROC_ENA(0) + ); + + T0V(VAP_CNTL_STATUS + , VAP_CNTL_STATUS__PVS_BYPASS(0) + ); + + ////////////////////////////////////////////////////////////////////////////// + // VAP INDEX + ////////////////////////////////////////////////////////////////////////////// + + T0V(VAP_INDEX_OFFSET, 0); + + T0V(VAP_VF_MAX_VTX_INDX + , VAP_VF_MAX_VTX_INDX__MAX_INDX(0) + ); + T0V(VAP_VF_MIN_VTX_INDX + , VAP_VF_MIN_VTX_INDX__MIN_INDX(0) + ); + + // light + mat4x4 t1 = translate(vec3(1, 0, 0)); + mat4x4 s = scale(0.1f); + mat4x4 rz = rotate_y(theta * 2.f); + + mat4x4 world_trans = rz * t1 * s; + + mat4x4 trans = view_to_clip * world_trans; + + _3d_light_inner(trans); + + vec3 light_pos = world_trans * light_pos; + + return light_pos; +} + +void _3d_cube(const shaders& shaders, + const mat4x4& view_to_clip, + float theta, + const vec3& light_pos) +{ + ib_rs_instructions(4); + + ////////////////////////////////////////////////////////////////////////////// + // VAP OUT + ////////////////////////////////////////////////////////////////////////////// + + T0V(VAP_OUT_VTX_FMT_0 + , VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1)); + T0V(VAP_OUT_VTX_FMT_1 + , VAP_OUT_VTX_FMT_1__TEX_0_COMP_CNT(4) + | VAP_OUT_VTX_FMT_1__TEX_1_COMP_CNT(4) + | VAP_OUT_VTX_FMT_1__TEX_2_COMP_CNT(4) + | VAP_OUT_VTX_FMT_1__TEX_3_COMP_CNT(4)); + + // + + ib_zbuffer(ZBUFFER_RELOC_INDEX, 1600, 1); // less + + int width = 1024; + int height = 1024; + int macrotile = 0; + int microtile = 0; + int clamp = 0; // wrap/repeat + ib_texture__1(TEXTUREBUFFER_RELOC_INDEX, + width, height, + macrotile, microtile, + clamp); + + ib_vap_stream_cntl__323(); + + // shaders + T0V(US_PIXSIZE + , US_PIXSIZE__PIX_SIZE(4) + ); + ib_ga_us(&shaders.fragment[CUBESPHERE_SHADER]); + ib_vap_pvs(&shaders.vertex[CUBESPHERE_SHADER]); + + ////////////////////////////////////////////////////////////////////////////// + // VAP + ////////////////////////////////////////////////////////////////////////////// + + T0V(VAP_CLIP_CNTL + , VAP_CLIP_CNTL__PS_UCP_MODE(3) + ); + + T0V(VAP_VTE_CNTL + , VAP_VTE_CNTL__VPORT_X_SCALE_ENA(1) + | VAP_VTE_CNTL__VPORT_X_OFFSET_ENA(1) + | VAP_VTE_CNTL__VPORT_Y_SCALE_ENA(1) + | VAP_VTE_CNTL__VPORT_Y_OFFSET_ENA(1) + | VAP_VTE_CNTL__VPORT_Z_SCALE_ENA(0) + | VAP_VTE_CNTL__VPORT_Z_OFFSET_ENA(0) + | VAP_VTE_CNTL__VTX_XY_FMT(0) // enable W division + | VAP_VTE_CNTL__VTX_Z_FMT(1) // disable W division + | VAP_VTE_CNTL__VTX_W0_FMT(1) + | VAP_VTE_CNTL__SERIAL_PROC_ENA(0) + ); + + T0V(VAP_CNTL_STATUS + , VAP_CNTL_STATUS__PVS_BYPASS(0) + ); + + ////////////////////////////////////////////////////////////////////////////// + // VAP INDEX + ////////////////////////////////////////////////////////////////////////////// + + T0V(VAP_INDEX_OFFSET, 0); + + T0V(VAP_VF_MAX_VTX_INDX + , VAP_VF_MAX_VTX_INDX__MAX_INDX(0) + ); + T0V(VAP_VF_MIN_VTX_INDX + , VAP_VF_MIN_VTX_INDX__MIN_INDX(0) + ); + + ////////////////////////////////////////////////////////////////////////////// + // matrix + ////////////////////////////////////////////////////////////////////////////// + + // cube + mat4x4 rx = rotate_x(1 * theta * 0.5f); + mat4x4 ry = rotate_y(0 * theta * 0.8f + 1.4f); + mat4x4 s = scale(0.9f); + + mat4x4 world_trans = rx * ry * s; + + mat4x4 trans = view_to_clip * world_trans; + + _3d_cube_inner(trans, world_trans, light_pos); +} + +int indirect_buffer(const shaders& shaders, + float theta) +{ + int width = 1600; + int height = 1200; + int pitch = width; + + ib_ix = 0; + + ib_generic_initialization(); + + ib_viewport(width, height); + ib_colorbuffer(COLORBUFFER_RELOC_INDEX, pitch, 0, 0); + + T0V(GB_ENABLE, 0); + + T0V(US_OUT_FMT_0 + , US_OUT_FMT__OUT_FMT(0) // C4_8 + | US_OUT_FMT__C0_SEL__BLUE + | US_OUT_FMT__C1_SEL__GREEN + | US_OUT_FMT__C2_SEL__RED + | US_OUT_FMT__C3_SEL__ALPHA + | US_OUT_FMT__OUT_SIGN(0) + ); + T0V(US_OUT_FMT_1 + , US_OUT_FMT__OUT_FMT(15) // render target is not used + ); + T0V(US_OUT_FMT_2 + , US_OUT_FMT__OUT_FMT(15) // render target is not used + ); + T0V(US_OUT_FMT_3 + , US_OUT_FMT__OUT_FMT(15) // render target is not used + ); + + load_pvs_shaders(shaders.vertex, shaders.vertex_length); + load_us_shaders(shaders.fragment, shaders.fragment_length); + + ////////////////////////////////////////////////////////////////////////////// + // DRAW + ////////////////////////////////////////////////////////////////////////////// + + mat4x4 aspect = scale(vec3(3.0f/4.0f, 1, 1)); + mat4x4 p = perspective(0.01f, 5.0f, + 0.001f, 0.999f, + 0.5f, 2.0f); + mat4x4 t = translate(vec3(0, 0, 3)); + mat4x4 view_to_clip = aspect * p * t; + + _3d_clear(shaders); + vec3 light_pos = _3d_light(shaders, view_to_clip, theta); + _3d_cube(shaders, view_to_clip, theta, light_pos); + + ////////////////////////////////////////////////////////////////////////////// + // padding + ////////////////////////////////////////////////////////////////////////////// + + while ((ib_ix % 8) != 0) { + TU(0x80000000); + } + + return ib_ix; +} + +int _tile_texture(const shaders& shaders, + int input_reloc_index, + int output_reloc_index) +{ + int width = 1024; + int height = 1024; + int pitch = width; + float x = (float)width * 0.5f; + float y = (float)height * 0.5f; + + ib_ix = 0; + + ib_generic_initialization(); + + ib_viewport(width, height); + ib_colorbuffer(output_reloc_index, pitch, 0, 0); // macrotile, microtile + + T0V(US_OUT_FMT_0 + , US_OUT_FMT__OUT_FMT(0) // C4_8 + | US_OUT_FMT__C0_SEL__RED + | US_OUT_FMT__C1_SEL__GREEN + | US_OUT_FMT__C2_SEL__BLUE + | US_OUT_FMT__C3_SEL__ALPHA + | US_OUT_FMT__OUT_SIGN(0) + ); + T0V(US_OUT_FMT_1 + , US_OUT_FMT__OUT_FMT(15) // render target is not used + ); + T0V(US_OUT_FMT_2 + , US_OUT_FMT__OUT_FMT(15) // render target is not used + ); + T0V(US_OUT_FMT_3 + , US_OUT_FMT__OUT_FMT(15) // render target is not used + ); + + // shaders + + load_pvs_shaders(shaders.vertex, shaders.vertex_length); + load_us_shaders(shaders.fragment, shaders.fragment_length); + + // GA + + T0V(GB_ENABLE + , GB_ENABLE__POINT_STUFF_ENABLE(1) + | GB_ENABLE__TEX0_SOURCE(2) // stuff with source texture coordinates s,t + ); + + T0Vf(GA_POINT_S0, 0.0f); + T0Vf(GA_POINT_T0, 1.0f); + T0Vf(GA_POINT_S1, 1.0f); + T0Vf(GA_POINT_T1, 0.0f); + + ////////////////////////////////////////////////////////////////////////////// + // RS + ////////////////////////////////////////////////////////////////////////////// + + int rs_instructions = 1; + + ib_rs_instructions(0); + + T0V(RS_IP_0 + , RS_IP__TEX_PTR_S(0) + | RS_IP__TEX_PTR_T(1) + | RS_IP__TEX_PTR_R(62) // constant 0.0 + | RS_IP__TEX_PTR_Q(63) // constant 1.0 + ); + + T0V(RS_COUNT + , RS_COUNT__IT_COUNT(2) + | RS_COUNT__IC_COUNT(0) + | RS_COUNT__W_ADDR(0) + | RS_COUNT__HIRES_EN(1) + ); + + T0V(RS_INST_COUNT + , RS_INST_COUNT__INST_COUNT(rs_instructions - 1)); + + T0V(RS_INST_0 + , RS_INST__TEX_ID(0) + | RS_INST__TEX_CN(1) + | RS_INST__TEX_ADDR(0) + ); + + ////////////////////////////////////////////////////////////////////////////// + // VAP OUT + ////////////////////////////////////////////////////////////////////////////// + + T0V(VAP_OUT_VTX_FMT_0 + , VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1) + ); + T0V(VAP_OUT_VTX_FMT_1 + , 0 + ); + + // + + T0V(ZB_CNTL, 0); + T0V(ZB_ZSTENCILCNTL, 0); + + // + + int macrotile = 0; + int microtile = 0; + int clamp = 2; // clamp to [0.0, 1.0] + ib_texture__1(input_reloc_index, + width, height, + macrotile, microtile, + clamp); + + ib_vap_stream_cntl__2(); + + T0V(US_PIXSIZE + , US_PIXSIZE__PIX_SIZE(1) + ); + + ib_ga_us(&shaders.fragment[TEXTURE_TILE_SHADER]); + ib_vap_pvs(&shaders.vertex[TEXTURE_TILE_SHADER]); + + ////////////////////////////////////////////////////////////////////////////// + // VAP + ////////////////////////////////////////////////////////////////////////////// + + T0V(VAP_CLIP_CNTL + , VAP_CLIP_CNTL__CLIP_DISABLE(1) + ); + + T0V(VAP_VTE_CNTL + , VAP_VTE_CNTL__VTX_XY_FMT(1) // disable W division + | VAP_VTE_CNTL__VTX_Z_FMT(1) // disable W division + ); + + T0V(VAP_CNTL_STATUS + , VAP_CNTL_STATUS__PVS_BYPASS(0) + ); + + ////////////////////////////////////////////////////////////////////////////// + // GA POINT SIZE + ////////////////////////////////////////////////////////////////////////////// + + T0V(GA_POINT_SIZE + , GA_POINT_SIZE__HEIGHT((int)(x * 12.0f)) + | GA_POINT_SIZE__WIDTH((int)(y * 12.0f)) + ); + + ////////////////////////////////////////////////////////////////////////////// + // 3D_DRAW + ////////////////////////////////////////////////////////////////////////////// + + const int dwords_per_vtx = 2; + + T0V(VAP_VTX_SIZE + , VAP_VTX_SIZE__DWORDS_PER_VTX(dwords_per_vtx) + ); + + const float center[] = { + x, y, + }; + const int vertex_count = 1; + T3(_3D_DRAW_IMMD_2, (1 + vertex_count * dwords_per_vtx) - 1); + TU( VAP_VF_CNTL__PRIM_TYPE(1) // point list + | VAP_VF_CNTL__PRIM_WALK(3) + | VAP_VF_CNTL__INDEX_SIZE(0) + | VAP_VF_CNTL__VTX_REUSE_DIS(0) + | VAP_VF_CNTL__DUAL_INDEX_MODE(0) + | VAP_VF_CNTL__USE_ALT_NUM_VERTS(0) + | VAP_VF_CNTL__NUM_VERTICES(vertex_count) + ); + for (int i = 0; i < 2; i++) { + TF(center[i]); + } + + return ib_ix; +} + +const char * textures[] = { + "../texture/butterfly_1024x1024_argb8888.data", +}; +const int textures_length = (sizeof (textures)) / (sizeof (textures[0])); + +int main() +{ + struct shaders shaders = { + .vertex = load_shaders(vertex_shader_paths, vertex_shader_paths_length), + .fragment = load_shaders(fragment_shader_paths, fragment_shader_paths_length), + .vertex_length = vertex_shader_paths_length, + .fragment_length = fragment_shader_paths_length, + }; + + void * rmmio = map_pci_resource2(); + + int fd = open("/dev/dri/card0", O_RDWR | O_CLOEXEC); + assert(fd != -1); + + const int colorbuffer_size = 1600 * 1200 * 4; + int colorbuffer_handle[2]; + int zbuffer_handle; + int * texturebuffer_handle; + int flush_handle; + int test_handle; + + void * colorbuffer_ptr[2]; + void * zbuffer_ptr; + void * test_ptr; + + // colorbuffer + colorbuffer_handle[0] = create_buffer(fd, colorbuffer_size, &colorbuffer_ptr[0]); + colorbuffer_handle[1] = create_buffer(fd, colorbuffer_size, &colorbuffer_ptr[1]); + test_handle = create_buffer(fd, 1600 * 1200 * 4, &test_ptr); + zbuffer_handle = create_buffer(fd, colorbuffer_size, &zbuffer_ptr); + flush_handle = create_flush_buffer(fd); + texturebuffer_handle = load_textures(fd, textures, textures_length); + + fprintf(stderr, "colorbuffer handle[0] %d\n", colorbuffer_handle[0]); + fprintf(stderr, "colorbuffer handle[1] %d\n", colorbuffer_handle[1]); + fprintf(stderr, "zbuffer handle %d\n", zbuffer_handle); + fprintf(stderr, "test handle %d\n", test_handle); + + int colorbuffer_ix = 0; + float theta = 0; + + { + int ib_dwords = _tile_texture(shaders, + TEXTUREBUFFER_RELOC_INDEX, // input + COLORBUFFER_RELOC_INDEX); // output + + //int ib_dwords = indirect_buffer(shaders, theta); + + printf("here2\n"); + + drm_radeon_cs(fd, + test_handle, // colorbuffer + zbuffer_handle, // unused + flush_handle, + texturebuffer_handle, + textures_length, + ib_dwords); + } + + while (false) { + int ib_dwords = indirect_buffer(shaders, theta); + + drm_radeon_cs(fd, + colorbuffer_handle[colorbuffer_ix], + zbuffer_handle, + flush_handle, + texturebuffer_handle, + textures_length, + ib_dwords); + + primary_surface_address(rmmio, colorbuffer_ix); + + // next state + theta += 0.01f; + colorbuffer_ix = (colorbuffer_ix + 1) & 1; + } + + + { + printf("test.data\n"); + int out_fd = open("test.data", O_RDWR|O_CREAT, 0644); + assert(out_fd >= 0); + ssize_t write_length = write(out_fd, test_ptr, 1024 * 1024 * 4); + assert(write_length == 1024 * 1024 * 4); + close(out_fd); + } + + close(fd); +} diff --git a/src/r500/indirect_buffer.c b/src/r500/indirect_buffer.c index 9cdd37a..cce0916 100644 --- a/src/r500/indirect_buffer.c +++ b/src/r500/indirect_buffer.c @@ -136,11 +136,6 @@ void ib_generic_initialization() | GA_ROUND_MODE__GEOMETRY_MASK(0) ); - T0Vf(GA_POINT_S0, 0.0f); - T0Vf(GA_POINT_T0, 1.0f); - T0Vf(GA_POINT_S1, 1.0f); - T0Vf(GA_POINT_T1, 0.0f); - T0V(GA_COLOR_CONTROL , GA_COLOR_CONTROL__RGB0_SHADING(2) | GA_COLOR_CONTROL__ALPHA0_SHADING(2) @@ -160,7 +155,6 @@ void ib_generic_initialization() ); T0V(GB_SELECT, 0x00000000); - T0V(GB_ENABLE, 0x00000000); T0V(GB_MSPOS0 , GB_MSPOS0__MS_X0(6) @@ -278,33 +272,9 @@ void ib_generic_initialization() T0V(US_W_FMT , US_W_FMT__W_FMT(0) // W is always zero ); - - ////////////////////////////////////////////////////////////////////////////// - // SC - ////////////////////////////////////////////////////////////////////////////// - - T0V(SC_SCISSOR0 - , SC_SCISSOR0__XS0(0) - | SC_SCISSOR0__YS0(0) - ); - T0V(SC_SCISSOR1 - , SC_SCISSOR1__XS1(1600 - 1) - | SC_SCISSOR1__YS1(1200 - 1) - ); - - ////////////////////////////////////////////////////////////////////////////// - // VAP - ////////////////////////////////////////////////////////////////////////////// - - T0Vf(VAP_VPORT_XSCALE, 800.0f); - T0Vf(VAP_VPORT_XOFFSET, 800.0f); - T0Vf(VAP_VPORT_YSCALE, -600.0f); - T0Vf(VAP_VPORT_YOFFSET, 600.0f); - T0Vf(VAP_VPORT_ZSCALE, 0.5f); - T0Vf(VAP_VPORT_ZOFFSET, 0.5f); } -void ib_colorbuffer(int reloc_index) +void ib_colorbuffer(int reloc_index, int pitch, int macrotile, int microtile) { ////////////////////////////////////////////////////////////////////////////// @@ -318,8 +288,10 @@ void ib_colorbuffer(int reloc_index) TU(reloc_index * 4); // index into relocs array T0V(RB3D_COLORPITCH0 - , RB3D_COLORPITCH__COLORPITCH(1600 >> 1) - | RB3D_COLORPITCH__COLORFORMAT(6) // ARGB8888 + , RB3D_COLORPITCH__COLORPITCH(pitch >> 1) + | RB3D_COLORPITCH__COLORTILE(macrotile) + | RB3D_COLORPITCH__COLORMICROTILE(microtile) + | RB3D_COLORPITCH__COLORFORMAT__ARGB8888 ); // The COLORPITCH NOP is ignored/not applied due to // RADEON_CS_KEEP_TILING_FLAGS, but is still required. @@ -327,7 +299,37 @@ void ib_colorbuffer(int reloc_index) TU(reloc_index * 4); // index into relocs array } -void ib_zbuffer(int reloc_index, int zfunc) +void ib_viewport(int width, int height) +{ + ////////////////////////////////////////////////////////////////////////////// + // SC + ////////////////////////////////////////////////////////////////////////////// + + T0V(SC_SCISSOR0 + , SC_SCISSOR0__XS0(0) + | SC_SCISSOR0__YS0(0) + ); + T0V(SC_SCISSOR1 + , SC_SCISSOR1__XS1(width - 1) + | SC_SCISSOR1__YS1(height - 1) + ); + + ////////////////////////////////////////////////////////////////////////////// + // VAP + ////////////////////////////////////////////////////////////////////////////// + + float x = ((float)width) * 0.5f; + float y = ((float)height) * 0.5f; + + T0Vf(VAP_VPORT_XSCALE, x); + T0Vf(VAP_VPORT_XOFFSET, x); + T0Vf(VAP_VPORT_YSCALE, -y); + T0Vf(VAP_VPORT_YOFFSET, y); + T0Vf(VAP_VPORT_ZSCALE, 0.5f); + T0Vf(VAP_VPORT_ZOFFSET, 0.5f); +} + +void ib_zbuffer(int reloc_index, int pitch, int zfunc) { ////////////////////////////////////////////////////////////////////////////// // ZB @@ -349,7 +351,7 @@ void ib_zbuffer(int reloc_index, int zfunc) TU(reloc_index * 4); // index into relocs array T0V(ZB_DEPTHPITCH - , ZB_DEPTHPITCH__DEPTHPITCH(1600 >> 2) + , ZB_DEPTHPITCH__DEPTHPITCH(pitch >> 2) | ZB_DEPTHPITCH__DEPTHMACROTILE(1) | ZB_DEPTHPITCH__DEPTHMICROTILE(1) ); @@ -518,7 +520,10 @@ void ib_texture__0() T0V(TX_ENABLE, 0x00000000); } -void ib_texture__1(int reloc_index) +void ib_texture__1(int reloc_index, + int width, int height, + int macrotile, int microtile, + int clamp) { ////////////////////////////////////////////////////////////////////////////// // TX @@ -528,17 +533,21 @@ void ib_texture__1(int reloc_index) T0V(TX_ENABLE , TX_ENABLE__TEX_0_ENABLE__ENABLE); + T0V(TX_FILTER0_0 - , TX_FILTER0__MAG_FILTER__LINEAR + , TX_FILTER0__CLAMP_S(clamp) + | TX_FILTER0__CLAMP_T(clamp) + | TX_FILTER0__MAG_FILTER__LINEAR | TX_FILTER0__MIN_FILTER__LINEAR ); T0V(TX_FILTER1_0 , TX_FILTER1__LOD_BIAS(1) + | TX_FILTER1__BORDER_FIX(1) ); T0V(TX_BORDER_COLOR_0, 0); T0V(TX_FORMAT0_0 - , TX_FORMAT0__TXWIDTH(1024 - 1) - | TX_FORMAT0__TXHEIGHT(1024 - 1) + , TX_FORMAT0__TXWIDTH(width - 1) + | TX_FORMAT0__TXHEIGHT(height - 1) ); T0V(TX_FORMAT1_0 @@ -552,9 +561,8 @@ void ib_texture__1(int reloc_index) T0V(TX_FORMAT2_0, 0); T0V(TX_OFFSET_0 - //, TX_OFFSET__MACRO_TILE(1) - //| TX_OFFSET__MICRO_TILE(1) - , 0 + , TX_OFFSET__MACRO_TILE(macrotile) + | TX_OFFSET__MICRO_TILE(microtile) ); T3(_NOP, 0); diff --git a/src/r500/indirect_buffer.h b/src/r500/indirect_buffer.h index 87db0c7..980015b 100644 --- a/src/r500/indirect_buffer.h +++ b/src/r500/indirect_buffer.h @@ -55,11 +55,15 @@ extern union u32_f32 ib[16384]; extern volatile int ib_ix; void ib_generic_initialization(); -void ib_colorbuffer(int reloc_index); -void ib_zbuffer(int reloc_index, int zfunc); +void ib_viewport(int width, int height); +void ib_colorbuffer(int reloc_index, int pitch, int macrotile, int microtile); +void ib_zbuffer(int reloc_index, int pitch, int zfunc); void ib_rs_instructions(int count); void ib_texture__0(); -void ib_texture__1(int reloc_index); +void ib_texture__1(int reloc_index, + int width, int height, + int macrotile, int microtile, + int clamp); void ib_vap_pvs(struct shader_offset * offset); void ib_ga_us(struct shader_offset * offset); void ib_vap_pvs_const_cntl(const float * consts, int size); diff --git a/src/texture_tile.fs.asm b/src/texture_tile.fs.asm new file mode 100644 index 0000000..3e534e8 --- /dev/null +++ b/src/texture_tile.fs.asm @@ -0,0 +1,8 @@ +TEX TEX_SEM_WAIT TEX_SEM_ACQUIRE + temp[0].rgba = LD tex[0].rgba temp[0].rgaa ; + +OUT TEX_SEM_WAIT +src0.a = temp[0], +src0.rgb = temp[0] : + out[0].a = MAX src0.a src0.a , + out[0].rgb = MAX src0.rgb src0.rgb ; diff --git a/src/texture_tile.fs.bin b/src/texture_tile.fs.bin new file mode 100644 index 0000000000000000000000000000000000000000..bbfaa90109838e4c340fb53d5d938c5d4f5c5054 GIT binary patch literal 48 ocmZRtU;qLKCWbEzPe3dfU~OOrGMP9)Sb@off%!lJ11nGj0D>k2$N&HU literal 0 HcmV?d00001 diff --git a/src/texture_tile.vs.asm b/src/texture_tile.vs.asm new file mode 100644 index 0000000..e779e8c --- /dev/null +++ b/src/texture_tile.vs.asm @@ -0,0 +1 @@ +out[0].xyzw = VE_ADD input[0].xyzw input[0].0000 ; diff --git a/src/texture_tile.vs.bin b/src/texture_tile.vs.bin new file mode 100644 index 0000000000000000000000000000000000000000..69e1685b5aafcbc0d6e84a0272ca453314860f5e GIT binary patch literal 16 XcmZQ(`oO@*aFKzrL4}d=!GA^o9=rrk literal 0 HcmV?d00001 diff --git a/tools/bgra_to_rgba.py b/tools/bgra_to_rgba.py index ceac35a..cc4a9c8 100644 --- a/tools/bgra_to_rgba.py +++ b/tools/bgra_to_rgba.py @@ -17,7 +17,7 @@ for i in range(len(buf) // 4): out[i * 4 + 2] = b out[i * 4 + 3] = a -im = Image.frombuffer("RGBA", (1600, 1200), out) -im.save(sys.argv[2]) -#with open(sys.argv[2], 'wb') as f: -# f.write(out) +#im = Image.frombuffer("RGBA", (1600, 1200), out) +#im.save(sys.argv[2]) +with open(sys.argv[2], 'wb') as f: + f.write(out)