diff --git a/src/drm/drm.c b/src/drm/drm.c index 2b128da..3411a20 100644 --- a/src/drm/drm.c +++ b/src/drm/drm.c @@ -98,3 +98,65 @@ int drm_radeon_cs(int fd, return 0; } + +int drm_radeon_cs2(int fd, + int * handles, + int handles_length, + int ib_dwords) +{ + struct drm_radeon_cs_reloc relocs[handles_length]; + + for (int i = 0; i < handles_length; i++) { + relocs[i] = (struct drm_radeon_cs_reloc){ + .handle = handles[i], + .read_domains = 4, // RADEON_GEM_DOMAIN_VRAM + .write_domain = 4, // RADEON_GEM_DOMAIN_VRAM + .flags = 8, + }; + } + + const uint32_t flags[2] = { + 5, // RADEON_CS_KEEP_TILING_FLAGS | RADEON_CS_END_OF_FRAME + 0, // RADEON_CS_RING_GFX + }; + + struct drm_radeon_cs_chunk chunks[3] = { + { + .chunk_id = RADEON_CHUNK_ID_IB, + .length_dw = ib_dwords, + .chunk_data = (uint64_t)(uintptr_t)ib, + }, + { + .chunk_id = RADEON_CHUNK_ID_RELOCS, + .length_dw = (sizeof (relocs)) / (sizeof (uint32_t)), + .chunk_data = (uint64_t)(uintptr_t)relocs, + }, + { + .chunk_id = RADEON_CHUNK_ID_FLAGS, + .length_dw = (sizeof (flags)) / (sizeof (uint32_t)), + .chunk_data = (uint64_t)(uintptr_t)&flags, + }, + }; + + uint64_t chunks_array[3] = { + (uint64_t)(uintptr_t)&chunks[0], + (uint64_t)(uintptr_t)&chunks[1], + (uint64_t)(uintptr_t)&chunks[2], + }; + + struct drm_radeon_cs cs = { + .num_chunks = 3, + .cs_id = 0, + .chunks = (uint64_t)(uintptr_t)chunks_array, + .gart_limit = 0, + .vram_limit = 0, + }; + + int ret = drmCommandWriteRead(fd, DRM_RADEON_CS, &cs, (sizeof (struct drm_radeon_cs))); + if (ret != 0) { + perror("drmCommandWriteRead(DRM_RADEON_CS)"); + return -1; + } + + return 0; +} diff --git a/src/drm/drm.h b/src/drm/drm.h index 443a73f..bc27e09 100644 --- a/src/drm/drm.h +++ b/src/drm/drm.h @@ -18,6 +18,11 @@ int drm_radeon_cs(int fd, int texturebuffer_handles_length, int ib_dwords); +int drm_radeon_cs2(int fd, + int * handles, + int handles_length, + int ib_dwords); + #ifdef __cplusplus } #endif diff --git a/src/particle_oriented_animated_quad_vbuf_pixel_shader.cpp b/src/particle_oriented_animated_quad_vbuf_pixel_shader.cpp new file mode 100644 index 0000000..67dbb6d --- /dev/null +++ b/src/particle_oriented_animated_quad_vbuf_pixel_shader.cpp @@ -0,0 +1,1190 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "r500/3d_registers.h" +#include "r500/3d_registers_undocumented.h" +#include "r500/3d_registers_bits.h" +#include "r500/indirect_buffer.h" +#include "r500/shader.h" +#include "r500/display_controller.h" + +#include "drm/buffer.h" +#include "drm/drm.h" + +#include "math/float_types.hpp" +#include "math/transform.hpp" +#include "math/constants.hpp" + +#include "../model/model2.h" + +#define CLEAR_SHADER 0 +#define PLANE_SHADER 1 +#define PARTICLE_SHADER 2 +#define TEXTURE_TILE_SHADER 3 +#define PARTICLE_PHYSICS_SHADER 4 + +const char * vertex_shader_paths[] = { + "clear.vs.bin", + "particle_plane_fan.vs.bin", + "particle_particle_animated_quad_vbuf.vs.bin", + "texture_tile.vs.bin", + "particle_physics.vs.bin", +}; +const int vertex_shader_paths_length = (sizeof (vertex_shader_paths)) / (sizeof (vertex_shader_paths[0])); +const char * fragment_shader_paths[] = { + "clear.fs.bin", + "particle_plane.fs.bin", + "particle_particle.fs.bin", + "texture_tile.fs.bin", + "particle_physics.fs.bin", +}; +const int fragment_shader_paths_length = (sizeof (fragment_shader_paths)) / (sizeof (fragment_shader_paths[0])); + +#define PLANE_TEXTURE 0 +#define PARTICLE_TEXTURE 1 + +const char * textures[] = { + "../texture/plane_32x32_rgba8888.data", + "../texture/particle_32x32_rgba8888.data", +}; +const int textures_length = (sizeof (textures)) / (sizeof (textures[0])); + +struct shaders { + struct shader_offset * vertex; + struct shader_offset * fragment; + int vertex_length; + int fragment_length; +}; + +static inline uint32_t xorshift32(uint32_t state) +{ + uint32_t x = state; + x ^= x << 13; + x ^= x >> 17; + x ^= x << 5; + return x; +} + +static inline float xorshift32f(uint32_t& state) +{ + state = xorshift32(state); + return (float)(state & 0xffffff) * (1.0f / 16777215.0f); +} + +const float max_age = 3.0f; + +struct particle_position { + vec3 position; + float age; +}; + +struct particle_velocity { + vec3 velocity; + float delta; +}; + +struct particle { + vec3 position; + float age; + vec3 velocity; + float delta; +}; + +struct floatbuffer_state { + int handles[4]; + void * ptrs[4]; + int length; + int flip; + + inline particle_position * position_output() const + { + int fb_output = (!flip) * 2; + particle_position * out_pos = (particle_position *)this->ptrs[fb_output + 0]; + return out_pos; + } +}; + +void _3d_clear(const shaders& shaders) +{ + ib_rs_instructions(0); + + ////////////////////////////////////////////////////////////////////////////// + // VAP OUT + ////////////////////////////////////////////////////////////////////////////// + + T0V(VAP_OUT_VTX_FMT_0 + , VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1)); + T0V(VAP_OUT_VTX_FMT_1 + , 0); + + // + + ib_zbuffer(ZBUFFER_RELOC_INDEX, 1600, 7); // always + + ib_texture__0(); + + ib_vap_stream_cntl__2(); + + // shaders + T0V(US_PIXSIZE + , US_PIXSIZE__PIX_SIZE(1) + ); + ib_ga_us(&shaders.fragment[CLEAR_SHADER]); + ib_vap_pvs(&shaders.vertex[CLEAR_SHADER]); + + ////////////////////////////////////////////////////////////////////////////// + // VAP INDEX + ////////////////////////////////////////////////////////////////////////////// + + T0V(VAP_INDEX_OFFSET, 0); + + T0V(VAP_VF_MAX_VTX_INDX + , VAP_VF_MAX_VTX_INDX__MAX_INDX(0) + ); + T0V(VAP_VF_MIN_VTX_INDX + , VAP_VF_MIN_VTX_INDX__MIN_INDX(0) + ); + + ////////////////////////////////////////////////////////////////////////////// + // VAP + ////////////////////////////////////////////////////////////////////////////// + + T0V(VAP_CLIP_CNTL + , VAP_CLIP_CNTL__CLIP_DISABLE(1) + ); + + T0V(VAP_VTE_CNTL + , VAP_VTE_CNTL__VTX_XY_FMT(1) // disable W division + | VAP_VTE_CNTL__VTX_Z_FMT(1) // disable W division + ); + + T0V(VAP_CNTL_STATUS + , VAP_CNTL_STATUS__PVS_BYPASS(0) + ); + + ////////////////////////////////////////////////////////////////////////////// + // GA POINT SIZE + ////////////////////////////////////////////////////////////////////////////// + + T0V(GA_POINT_SIZE + , GA_POINT_SIZE__HEIGHT(600 * 12) + | GA_POINT_SIZE__WIDTH(800 * 12) + ); + + ////////////////////////////////////////////////////////////////////////////// + // 3D_DRAW + ////////////////////////////////////////////////////////////////////////////// + + const int dwords_per_vtx = 2; + + T0V(VAP_VTX_SIZE + , VAP_VTX_SIZE__DWORDS_PER_VTX(dwords_per_vtx) + ); + + const float center[] = { + 800.0f, 600.0f, + }; + const int vertex_count = 1; + T3(_3D_DRAW_IMMD_2, (1 + vertex_count * dwords_per_vtx) - 1); + TU( VAP_VF_CNTL__PRIM_TYPE(1) // point list + | VAP_VF_CNTL__PRIM_WALK(3) + | VAP_VF_CNTL__INDEX_SIZE(0) + | VAP_VF_CNTL__VTX_REUSE_DIS(0) + | VAP_VF_CNTL__DUAL_INDEX_MODE(0) + | VAP_VF_CNTL__USE_ALT_NUM_VERTS(0) + | VAP_VF_CNTL__NUM_VERTICES(vertex_count) + ); + for (int i = 0; i < 2; i++) { + TF(center[i]); + } +} + +mat4x4 perspective(float low1, float high1, + float low2, float high2, + float low3, float high3) +{ + float scale2 = (high2 - low2) / (high1 - low1); + float scale3 = (high3 - low3) / (high1 - low1); + + mat4x4 m1 = mat4x4(1, 0, 0, 0, + 0, 1, 0, 0, + 0, 0, 1, -low1, + 0, 0, 0, 1 + ); + + mat4x4 m2 = mat4x4(1, 0, 0, 0, + 0, 1, 0, 0, + 0, 0, scale2, low2, + 0, 0, scale3, low3 + ); + + return m2 * m1; +} + +void _3d_plane_inner() +{ + ////////////////////////////////////////////////////////////////////////////// + // 3D_DRAW + ////////////////////////////////////////////////////////////////////////////// + + const int dwords_per_vtx = 2; + + T0V(VAP_VTX_SIZE + , VAP_VTX_SIZE__DWORDS_PER_VTX(dwords_per_vtx) + ); + + const vec2 vertices[] = { + {0.0, 0.0f}, + {1.0, 0.0f}, + {1.0, 1.0f}, + {0.0, 1.0f}, + }; + + const int vertex_count = 4; + T3(_3D_DRAW_IMMD_2, (1 + vertex_count * dwords_per_vtx) - 1); + TU( VAP_VF_CNTL__PRIM_TYPE(5) // triangle fan + | VAP_VF_CNTL__PRIM_WALK(3) + | VAP_VF_CNTL__INDEX_SIZE(0) + | VAP_VF_CNTL__VTX_REUSE_DIS(0) + | VAP_VF_CNTL__DUAL_INDEX_MODE(0) + | VAP_VF_CNTL__USE_ALT_NUM_VERTS(0) + | VAP_VF_CNTL__NUM_VERTICES(vertex_count) + ); + for (int i = 0; i < vertex_count; i++) { + TF(vertices[i].x); + TF(vertices[i].y); + } +} + +void _3d_particle_inner(int particles_length, int position_offset) +{ + const int vertex_count = 4 * particles_length; + + ////////////////////////////////////////////////////////////////////////////// + // VF + ////////////////////////////////////////////////////////////////////////////// + + T0V(VAP_VF_MAX_VTX_INDX + , VAP_VF_MAX_VTX_INDX__MAX_INDX(vertex_count - 1) + ); + T0V(VAP_VF_MIN_VTX_INDX + , VAP_VF_MIN_VTX_INDX__MIN_INDX(0) + ); + + ////////////////////////////////////////////////////////////////////////////// + // AOS + ////////////////////////////////////////////////////////////////////////////// + + T3(_3D_LOAD_VBPNTR, (4 - 1)); + TU( // VAP_VTX_NUM_ARRAYS + VAP_VTX_NUM_ARRAYS__VTX_NUM_ARRAYS(2) + | VAP_VTX_NUM_ARRAYS__VC_FORCE_PREFETCH(1) + ); + TU( // VAP_VTX_AOS_ATTR01 + VAP_VTX_AOS_ATTR__VTX_AOS_COUNT0(3) + | VAP_VTX_AOS_ATTR__VTX_AOS_STRIDE0(3) + | VAP_VTX_AOS_ATTR__VTX_AOS_COUNT1(2) + | VAP_VTX_AOS_ATTR__VTX_AOS_STRIDE1(2) + ); + TU( // VAP_VTX_AOS_ADDR0 + (4 * position_offset); + ); + TU( // VAP_VTX_AOS_ADDR1 + (4 * 0); + ); + + T3(_NOP, 0); + TU(VERTEXBUFFER_RELOC_INDEX * 4); // index into relocs array for VAP_VTX_AOS_ADDR0 + T3(_NOP, 0); + TU(VERTEXBUFFER_RELOC_INDEX * 4); // index into relocs array for VAP_VTX_AOS_ADDR0 + + ////////////////////////////////////////////////////////////////////////////// + // 3D_DRAW + ////////////////////////////////////////////////////////////////////////////// + + T3(_3D_DRAW_VBUF_2, (1 - 1)); + TU( VAP_VF_CNTL__PRIM_TYPE(13) // quad list + | VAP_VF_CNTL__PRIM_WALK(2) // vertex list (data fetched from memory) + | VAP_VF_CNTL__INDEX_SIZE(0) + | VAP_VF_CNTL__VTX_REUSE_DIS(0) + | VAP_VF_CNTL__DUAL_INDEX_MODE(0) + | VAP_VF_CNTL__USE_ALT_NUM_VERTS(0) + | VAP_VF_CNTL__NUM_VERTICES(vertex_count) + ); +} + +void _3d_plane(const shaders& shaders, + const mat4x4& world_to_clip, + float theta) +{ + ib_rs_instructions(1); + + ////////////////////////////////////////////////////////////////////////////// + // VAP OUT + ////////////////////////////////////////////////////////////////////////////// + + T0V(VAP_OUT_VTX_FMT_0 + , VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1)); + T0V(VAP_OUT_VTX_FMT_1 + , VAP_OUT_VTX_FMT_1__TEX_0_COMP_CNT(4)); + + // + + ib_zbuffer(ZBUFFER_RELOC_INDEX, 1600, 1); // less + + int width = 32; + int height = 32; + int macrotile = 0; + int microtile = 0; + int clamp = 0; // wrap/repeat + ib_texture__1(TEXTUREBUFFER_RELOC_INDEX + PLANE_TEXTURE, + width, height, + macrotile, microtile, + clamp); + + ib_vap_stream_cntl__2(); + + // shaders + T0V(US_PIXSIZE + , US_PIXSIZE__PIX_SIZE(4) + ); + ib_ga_us(&shaders.fragment[PLANE_SHADER]); + ib_vap_pvs(&shaders.vertex[PLANE_SHADER]); + + ////////////////////////////////////////////////////////////////////////////// + // VAP + ////////////////////////////////////////////////////////////////////////////// + + T0V(VAP_CLIP_CNTL + , VAP_CLIP_CNTL__PS_UCP_MODE(3) + ); + + T0V(VAP_VTE_CNTL + , VAP_VTE_CNTL__VPORT_X_SCALE_ENA(1) + | VAP_VTE_CNTL__VPORT_X_OFFSET_ENA(1) + | VAP_VTE_CNTL__VPORT_Y_SCALE_ENA(1) + | VAP_VTE_CNTL__VPORT_Y_OFFSET_ENA(1) + | VAP_VTE_CNTL__VPORT_Z_SCALE_ENA(1) + | VAP_VTE_CNTL__VPORT_Z_OFFSET_ENA(1) + | VAP_VTE_CNTL__VTX_XY_FMT(0) // enable W division + | VAP_VTE_CNTL__VTX_Z_FMT(0) // enable W division + | VAP_VTE_CNTL__VTX_W0_FMT(1) + | VAP_VTE_CNTL__SERIAL_PROC_ENA(0) + ); + + T0V(VAP_CNTL_STATUS + , VAP_CNTL_STATUS__PVS_BYPASS(0) + ); + + ////////////////////////////////////////////////////////////////////////////// + // matrix + ////////////////////////////////////////////////////////////////////////////// + + mat4x4 s = scale(1.0f); + mat4x4 rx = rotate_x(-PI / 2.0f); + mat4x4 local_to_world = s * rx; + + mat4x4 trans = world_to_clip * local_to_world; + + ////////////////////////////////////////////////////////////////////////////// + // consts + ////////////////////////////////////////////////////////////////////////////// + + T0V(VAP_PVS_STATE_FLUSH_REG, 0x00000000); + + const float consts[] = { + // 0 + trans[0][0], trans[0][1], trans[0][2], trans[0][3], + trans[1][0], trans[1][1], trans[1][2], trans[1][3], + trans[2][0], trans[2][1], trans[2][2], trans[2][3], + trans[3][0], trans[3][1], trans[3][2], trans[3][3], + // 4 + -2.0f, 0, 0, 0, + }; + ib_vap_pvs_const_cntl(consts, (sizeof (consts))); + + // plane_inner + + _3d_plane_inner(); +} + +void _3d_particle(const shaders& shaders, + const mat4x4& world_to_clip, + const mat4x4& world_to_view, + const floatbuffer_state& state, + const float theta, + float * vertexbuffer_ptr) +{ + // enable blending + T0V(RB3D_BLENDCNTL + , RB3D_BLENDCNTL__ALPHA_BLEND_ENABLE__ENABLE + | RB3D_BLENDCNTL__READ_ENABLE(1) + | RB3D_BLENDCNTL__SRCBLEND__GL_ONE + | RB3D_BLENDCNTL__DESTBLEND__GL_ONE + | RB3D_BLENDCNTL__SRC_ALPHA_0_NO_READ(0) + | RB3D_BLENDCNTL__SRC_ALPHA_1_NO_READ(0) + ); + + ib_rs_instructions(1); + + ////////////////////////////////////////////////////////////////////////////// + // VAP OUT + ////////////////////////////////////////////////////////////////////////////// + + T0V(VAP_OUT_VTX_FMT_0 + , VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1)); + T0V(VAP_OUT_VTX_FMT_1 + , VAP_OUT_VTX_FMT_1__TEX_0_COMP_CNT(4)); + + // + + //ib_zbuffer(ZBUFFER_RELOC_INDEX, 1600, 1); // less + T0V(ZB_CNTL + , 0 + ); + T0V(ZB_ZSTENCILCNTL + , 0 + ); + + int width = 32; + int height = 32; + int macrotile = 0; + int microtile = 0; + int clamp = 0; // wrap/repeat + ib_texture__1(TEXTUREBUFFER_RELOC_INDEX + PARTICLE_TEXTURE, + width, height, + macrotile, microtile, + clamp); + + ib_vap_stream_cntl__32(); + + // shaders + T0V(US_PIXSIZE + , US_PIXSIZE__PIX_SIZE(4) + ); + ib_ga_us(&shaders.fragment[PARTICLE_SHADER]); + ib_vap_pvs(&shaders.vertex[PARTICLE_SHADER]); + + ////////////////////////////////////////////////////////////////////////////// + // VAP + ////////////////////////////////////////////////////////////////////////////// + + T0V(VAP_CLIP_CNTL + , VAP_CLIP_CNTL__PS_UCP_MODE(3) + ); + + T0V(VAP_VTE_CNTL + , VAP_VTE_CNTL__VPORT_X_SCALE_ENA(1) + | VAP_VTE_CNTL__VPORT_X_OFFSET_ENA(1) + | VAP_VTE_CNTL__VPORT_Y_SCALE_ENA(1) + | VAP_VTE_CNTL__VPORT_Y_OFFSET_ENA(1) + | VAP_VTE_CNTL__VPORT_Z_SCALE_ENA(1) + | VAP_VTE_CNTL__VPORT_Z_OFFSET_ENA(1) + | VAP_VTE_CNTL__VTX_XY_FMT(0) // enable W division + | VAP_VTE_CNTL__VTX_Z_FMT(0) // enable W division + | VAP_VTE_CNTL__VTX_W0_FMT(1) + | VAP_VTE_CNTL__SERIAL_PROC_ENA(0) + ); + + T0V(VAP_CNTL_STATUS + , VAP_CNTL_STATUS__PVS_BYPASS(0) + ); + + ////////////////////////////////////////////////////////////////////////////// + // matrix + ////////////////////////////////////////////////////////////////////////////// + + mat4x4 s = scale(1.0f); + mat4x4 local_to_world = s; + + mat4x4 local_to_view = world_to_view * local_to_world; + + mat4x4 trans = world_to_clip * local_to_world; + + ////////////////////////////////////////////////////////////////////////////// + // consts + ////////////////////////////////////////////////////////////////////////////// + + T0V(VAP_PVS_STATE_FLUSH_REG, 0x00000000); + + const float scale = 0.005f; + + const float consts[] = { + // 0 + trans[0][0], trans[0][1], trans[0][2], trans[0][3], + trans[1][0], trans[1][1], trans[1][2], trans[1][3], + trans[2][0], trans[2][1], trans[2][2], trans[2][3], + trans[3][0], trans[3][1], trans[3][2], trans[3][3], + + // 4: dx (right) + local_to_view[0][0], local_to_view[0][1], local_to_view[0][2], 0, + + // 5: dy (up) + local_to_view[1][0], local_to_view[1][1], local_to_view[1][2], 0, + + // 6: xyz:position w:scale + 0, 0, 0, scale, + + // 7: + -2.0, 0, 0, 0, + }; + ib_vap_pvs_const_cntl(consts, (sizeof (consts))); + + int offset = state.length * 4 * 2; + int ix = 0; + + particle_position * pos = state.position_output(); + + for (int i = 0; i < state.length; i++) { + const vec3& position = pos[i].position; + for (int j = 0; j < 4; j++) { + vertexbuffer_ptr[offset + ix] = position.x; + ix++; + vertexbuffer_ptr[offset + ix] = position.y; + ix++; + vertexbuffer_ptr[offset + ix] = position.z; + ix++; + }; + } + asm volatile ("" ::: "memory"); + + _3d_particle_inner(state.length, offset); +} + +int indirect_buffer(const shaders& shaders, + const floatbuffer_state& state, + //const particle * particles, + //const int particles_length, + float theta, + float * vertexbuffer_ptr) +{ + int width = 1600; + int height = 1200; + int pitch = width; + + ib_ix = 0; + + ib_generic_initialization(); + + T0V(RB3D_BLENDCNTL, 0); + T0V(RB3D_ABLENDCNTL, 0); + + ib_viewport(width, height); + ib_colorbuffer(COLORBUFFER_RELOC_INDEX, pitch, 0, 0); + + T0V(GB_ENABLE, 0); + + T0V(US_OUT_FMT_0 + , US_OUT_FMT__OUT_FMT(0) // C4_8 + | US_OUT_FMT__C0_SEL__BLUE + | US_OUT_FMT__C1_SEL__GREEN + | US_OUT_FMT__C2_SEL__RED + | US_OUT_FMT__C3_SEL__ALPHA + | US_OUT_FMT__OUT_SIGN(0) + ); + T0V(US_OUT_FMT_1 + , US_OUT_FMT__OUT_FMT(15) // render target is not used + ); + T0V(US_OUT_FMT_2 + , US_OUT_FMT__OUT_FMT(15) // render target is not used + ); + T0V(US_OUT_FMT_2 + , US_OUT_FMT__OUT_FMT(15) // render target is not used + ); + + load_pvs_shaders(shaders.vertex, shaders.vertex_length); + load_us_shaders(shaders.fragment, shaders.fragment_length); + + ////////////////////////////////////////////////////////////////////////////// + // DRAW + ////////////////////////////////////////////////////////////////////////////// + + mat4x4 aspect = scale(vec3(3.0f/4.0f, 1, 1)); + mat4x4 p = perspective(0.01f, 3.0f, + 0.001f, 0.999f, + 1.0f, 3.0f); + mat4x4 t = translate(vec3(0, 0, 1)); + mat4x4 rx = rotate_x(-PI / 8.0f); + mat4x4 ry = rotate_y(theta * 0.8f); + mat4x4 world_to_view = t * rx * ry; + + mat4x4 world_to_clip = aspect * p * world_to_view; + + _3d_clear(shaders); + _3d_plane(shaders, world_to_clip, theta); + _3d_particle(shaders, + world_to_clip, + world_to_view, + state, + //particles, + //particles_length, + theta, + vertexbuffer_ptr); + + ////////////////////////////////////////////////////////////////////////////// + // padding + ////////////////////////////////////////////////////////////////////////////// + + while ((ib_ix % 8) != 0) { + TU(0x80000000); + } + + assert((unsigned int)ib_ix < (sizeof (ib)) / (sizeof (ib[0]))); + + return ib_ix; +} + +void reset_particle(particle& p) +{ + //vec3 pos = normalize(p.position); + + p.position = normalize(vec3(p.velocity.x, + 0, + p.velocity.z)) * 20.0f; + + //printf("position %f %f %f\n", p.position.x, p.position.y, p.position.z); + + p.velocity = vec3(p.velocity.x, + 2.0f * p.delta, + p.velocity.z); + + //printf("velocity %f %f %f\n\n", p.velocity.x, p.velocity.y, p.velocity.z); +} + +void init_particles(particle * particles, const int particles_length) +{ + uint32_t state = 0x12345678; + + const float rl = 1.0f / (float)(particles_length); + + for (int i = 0; i < particles_length; i++) { + float fi = ((float)i); + + float sx = xorshift32f(state) * 2.0f - 1.0f; + float sy = xorshift32f(state) * 2.0f - 1.0f; + float sz = xorshift32f(state) * 2.0f - 1.0f; + + float delta = xorshift32f(state) * 0.5f + 0.5f; + + float vx = xorshift32f(state) * 2.0f - 1.0f; + float vz = xorshift32f(state) * 2.0f - 1.0f; + + particles[i].age = max_age * sinf(fi * rl * 2) * 0.5f + 0.5f; + particles[i].delta = delta; + particles[i].position.x = sx; + particles[i].position.y = sy; + particles[i].position.z = sz; + particles[i].velocity = normalize(vec3(vx * 0.5f, 0.0f, vz * 0.5f)); + } +} + +int init_particles_vertexbuffer(int fd, int particles_length, float ** ptr_out) +{ + const vec2 vertices[] = { + {0.0, 0.0f}, + {1.0, 0.0f}, + {1.0, 1.0f}, + {0.0, 1.0f}, + }; + const int vertex_count = 4; + + const int size = particles_length * vertex_count * 2 * (sizeof (float)) * 2; + + void * ptr; + int handle = create_buffer(fd, size, &ptr); + + float * ptrf = (float*)ptr; + + int ix = 0; + for (int j = 0; j < particles_length; j++) { + for (int i = 0; i < vertex_count; i++) { + ptrf[ix++] = vertices[i].x; + ptrf[ix++] = vertices[i].y; + } + } + printf("init vertexbuffer %d %d\n", ix, size); + + assert(ptr_out != NULL); + *ptr_out = ptrf; + + return handle; +} + +void reset_particle2(particle_position& position, + particle_velocity& velocity) +{ + //vec3 pos = normalize(p.position); + + position.position = normalize(vec3(velocity.velocity.x, + 0, + velocity.velocity.z)) * 20.0f; + + //printf("position %f %f %f\n", p.position.x, p.position.y, p.position.z); + + velocity.velocity = vec3(velocity.velocity.x, + 2.0f * velocity.delta, + velocity.velocity.z); + + //printf("velocity %f %f %f\n\n", p.velocity.x, p.velocity.y, p.velocity.z); +} + +void init_particles2(void * position_ptr, + void * velocity_ptr, + const int particles_length) +{ + uint32_t state = 0x12345678; + + particle_position * position = (particle_position *)position_ptr; + particle_velocity * velocity = (particle_velocity *)velocity_ptr; + + const float rl = 1.0f / (float)(particles_length); + + for (int i = 0; i < particles_length; i++) { + float fi = ((float)i); + + float sx = xorshift32f(state) * 2.0f - 1.0f; + float sy = xorshift32f(state) * 2.0f - 1.0f; + float sz = xorshift32f(state) * 2.0f - 1.0f; + + float new_delta = xorshift32f(state) * 0.5f + 0.5f; + + float vx = xorshift32f(state) * 2.0f - 1.0f; + float vz = xorshift32f(state) * 2.0f - 1.0f; + + float new_age = max_age * sinf(fi * rl * 2) * 0.5f + 0.5f; + + vec3 new_position = vec3(sx, sy, sz); + vec3 new_velocity = normalize(vec3(vx * 0.5f, 0.0f, vz * 0.5f)); + + position[i].position = new_position; + position[i].age = new_age; + velocity[i].velocity = new_velocity; + velocity[i].delta = new_delta; + + reset_particle2(position[i], velocity[i]); + } +} + +floatbuffer_state create_floatbuffers(int fd, + int length) +{ + floatbuffer_state state; + int size = length * 4 * 4; + for (int i = 0; i < 4; i++) { + state.handles[i] = create_buffer(fd, size, &state.ptrs[i]); + } + + init_particles2(state.ptrs[0], + state.ptrs[1], + length); + + state.flip = 0; + state.length = length; + + return state; +} + +int _floatbuffer(const shaders& shaders, + int input_reloc_index0, + int input_reloc_index1, + int output_reloc_index0, + int output_reloc_index1, + int floatbuffer_width, + int floatbuffer_height) +{ + int viewport_width = floatbuffer_width; + int viewport_height = floatbuffer_height; + int texture_width = floatbuffer_width; + int texture_height = floatbuffer_height; + /* + float vx = ((float)viewport_width) * 0.5f; + float vy = ((float)viewport_height) * 0.5f; + */ + float tx = 0.5f / ((float)texture_width); + float ty = 0.5f / ((float)texture_height); + printf("tx ty: %f %f\n", tx, ty); + + printf("relocs: %d %d %d %d\n", + input_reloc_index0, + input_reloc_index1, + output_reloc_index0, + output_reloc_index1); + + int macrotile = 0; + int microtile = 0; + + ib_ix = 0; + + ib_generic_initialization(); + + T0V(SC_SCISSOR0 + , SC_SCISSOR0__XS0(0) + | SC_SCISSOR0__YS0(0) + ); + T0V(SC_SCISSOR1 + , SC_SCISSOR1__XS1(viewport_width - 1) + | SC_SCISSOR1__YS1(viewport_height - 1) + ); + T0Vf(VAP_VPORT_XSCALE, (float)viewport_width); + T0Vf(VAP_VPORT_YSCALE, (float)viewport_height); + + int colorformat = 7; // ARGB32323232 + + ib_colorbuffer2(0, + output_reloc_index0, + viewport_width, + macrotile, + microtile, + colorformat); + + ib_colorbuffer2(1, + output_reloc_index1, + viewport_width, + macrotile, + microtile, + colorformat); + + T0V(US_OUT_FMT_0 + , US_OUT_FMT__OUT_FMT(21) // C4_32_FP + | US_OUT_FMT__C0_SEL__RED + | US_OUT_FMT__C1_SEL__GREEN + | US_OUT_FMT__C2_SEL__BLUE + | US_OUT_FMT__C3_SEL__ALPHA + | US_OUT_FMT__OUT_SIGN(0) + ); + T0V(US_OUT_FMT_1 + , US_OUT_FMT__OUT_FMT(21) // C4_32_FP + | US_OUT_FMT__C0_SEL__RED + | US_OUT_FMT__C1_SEL__GREEN + | US_OUT_FMT__C2_SEL__BLUE + | US_OUT_FMT__C3_SEL__ALPHA + | US_OUT_FMT__OUT_SIGN(0) + ); + T0V(US_OUT_FMT_2 + , US_OUT_FMT__OUT_FMT(15) // render target is not used + ); + T0V(US_OUT_FMT_3 + , US_OUT_FMT__OUT_FMT(15) // render target is not used + ); + + // shaders + load_pvs_shaders(shaders.vertex, shaders.vertex_length); + load_us_shaders(shaders.fragment, shaders.fragment_length); + + // GA + + T0V(GB_ENABLE + , 0 + ); + + ////////////////////////////////////////////////////////////////////////////// + // RS + ////////////////////////////////////////////////////////////////////////////// + + ib_rs_instructions(1); + + ////////////////////////////////////////////////////////////////////////////// + // VAP OUT + ////////////////////////////////////////////////////////////////////////////// + + T0V(VAP_OUT_VTX_FMT_0 + , VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1) + ); + T0V(VAP_OUT_VTX_FMT_1 + , VAP_OUT_VTX_FMT_1__TEX_0_COMP_CNT(4) + ); + + // + + T0V(ZB_CNTL, 0); + T0V(ZB_ZSTENCILCNTL, 0); + + // + + ////////////////////////////////////////////////////////////////////////////// + // TX + ////////////////////////////////////////////////////////////////////////////// + + T0V(TX_INVALTAGS, 0x00000000); + + T0V(TX_ENABLE + , TX_ENABLE__TEX_0_ENABLE__ENABLE + | TX_ENABLE__TEX_1_ENABLE__ENABLE); + + int clamp = 2; // clamp to [0.0, 1.0] + int txformat = 29; // TX_FMT_32F_32F_32F_32F + ib_texture2(0, + input_reloc_index0, + texture_width, texture_height, + macrotile, microtile, + clamp, + txformat); + + ib_texture2(1, + input_reloc_index1, + texture_width, texture_height, + macrotile, microtile, + clamp, + txformat); + + // shaders + + ib_vap_stream_cntl__2(); + + T0V(US_PIXSIZE + , US_PIXSIZE__PIX_SIZE(6) + ); + + ib_ga_us(&shaders.fragment[PARTICLE_PHYSICS_SHADER]); + ib_vap_pvs(&shaders.vertex[PARTICLE_PHYSICS_SHADER]); + + const float vertex_consts[] = { + //-tx, -ty, 0, 0, + 0, 0, 0, 0, + }; + const int vertex_consts_size = (sizeof (vertex_consts)); + ib_vap_pvs_const_cntl(vertex_consts, vertex_consts_size); + + // fragment constants + //const vec3 velocity_scale = vec3(0.003f, 0.01f, 0.003f); + const vec3 velocity_scale = vec3(0.09f, 0.50f, 0.09f); + const float delta_age = 0.01f; + const float velocity_attenuation = -0.6f; // multiplied by velocity.y after bounce + const float gravity = -0.04f; + const float fragment_consts[] = { + // 0: + velocity_scale.x, velocity_scale.y, velocity_scale.z, delta_age, + // 1: + velocity_attenuation, gravity, 0, 0, + }; + int fragment_consts_length = (sizeof (fragment_consts)) / (sizeof (fragment_consts[0])); + ib_ga_consts(fragment_consts, fragment_consts_length, 0); + + ////////////////////////////////////////////////////////////////////////////// + // VAP + ////////////////////////////////////////////////////////////////////////////// + + T0V(VAP_CLIP_CNTL + , VAP_CLIP_CNTL__CLIP_DISABLE(1) + ); + + T0V(VAP_VTE_CNTL + , VAP_VTE_CNTL__VPORT_X_SCALE_ENA(1) + | VAP_VTE_CNTL__VPORT_Y_SCALE_ENA(1) + | VAP_VTE_CNTL__VTX_XY_FMT(1) // disable W division + | VAP_VTE_CNTL__VTX_Z_FMT(1) // disable W division + ); + + T0V(VAP_CNTL_STATUS + , VAP_CNTL_STATUS__PVS_BYPASS(0) + ); + + ////////////////////////////////////////////////////////////////////////////// + // 3D_DRAW + ////////////////////////////////////////////////////////////////////////////// + + const int dwords_per_vtx = 2; + + T0V(VAP_VTX_SIZE + , VAP_VTX_SIZE__DWORDS_PER_VTX(dwords_per_vtx) + ); + + const float vertices[] = { + 0.0f, 0.0f, + 1.0f, 0.0f, + 1.0f, 1.0f, + 0.0f, 1.0f, + }; + const int vertex_count = 4; + T3(_3D_DRAW_IMMD_2, (1 + vertex_count * dwords_per_vtx) - 1); + TU( VAP_VF_CNTL__PRIM_TYPE(5) // triangle fan + | VAP_VF_CNTL__PRIM_WALK(3) + | VAP_VF_CNTL__INDEX_SIZE(0) + | VAP_VF_CNTL__VTX_REUSE_DIS(0) + | VAP_VF_CNTL__DUAL_INDEX_MODE(0) + | VAP_VF_CNTL__USE_ALT_NUM_VERTS(0) + | VAP_VF_CNTL__NUM_VERTICES(vertex_count) + ); + for (int i = 0; i < vertex_count * 2; i++) { + TF(vertices[i]); + } + + ////////////////////////////////////////////////////////////////////////////// + // padding + ////////////////////////////////////////////////////////////////////////////// + + while ((ib_ix % 8) != 0) { + TU(0x80000000); + } + + return ib_ix; +} + +void check_particles2(floatbuffer_state& state) +{ + assert(state.flip == 0 || state.flip == 1); + int fb_input = state.flip * 2; + int fb_output = (!state.flip) * 2; + + particle_position * in_pos = (particle_position *)state.ptrs[fb_input + 0]; + particle_velocity * in_vel = (particle_velocity *)state.ptrs[fb_input + 1]; + + particle_position * out_pos = (particle_position *)state.ptrs[fb_output + 0]; + particle_velocity * out_vel = (particle_velocity *)state.ptrs[fb_output + 1]; + + for (int i = 0; i < state.length; i++) { + printf("[%d] %d %d %d %d\n", i, fb_input + 0, fb_input + 1, fb_output + 0, fb_output + 1); + printf(" in pos (% 3.04f % 3.04f % 3.04f)\n", in_pos[i].position.x, in_pos[i].position.y, in_pos[i].position.z); + printf(" in vel (% 3.04f % 3.04f % 3.04f)\n", in_vel[i].velocity.x, in_vel[i].velocity.y, in_vel[i].velocity.z); + printf(" out pos (% 3.04f % 3.04f % 3.04f)\n", out_pos[i].position.x, out_pos[i].position.y, out_pos[i].position.z); + printf(" out vel (% 3.04f % 3.04f % 3.04f)\n", out_vel[i].velocity.x, out_vel[i].velocity.y, out_vel[i].velocity.z); + } +} + +int main() +{ + struct shaders shaders = { + .vertex = load_shaders(vertex_shader_paths, vertex_shader_paths_length), + .fragment = load_shaders(fragment_shader_paths, fragment_shader_paths_length), + .vertex_length = vertex_shader_paths_length, + .fragment_length = fragment_shader_paths_length, + }; + + void * rmmio = map_pci_resource2(); + + int fd = open("/dev/dri/card0", O_RDWR | O_CLOEXEC); + assert(fd != -1); + + const int colorbuffer_size = 1600 * 1200 * 4; + int colorbuffer_handle[2]; + int zbuffer_handle; + int * texturebuffer_handle; + //int flush_handle; + int vertexbuffer_handle; + + void * colorbuffer_ptr[2]; + void * zbuffer_ptr; + float * vertexbuffer_ptr; + + // colorbuffer + colorbuffer_handle[0] = create_buffer(fd, colorbuffer_size, &colorbuffer_ptr[0]); + colorbuffer_handle[1] = create_buffer(fd, colorbuffer_size, &colorbuffer_ptr[1]); + zbuffer_handle = create_buffer(fd, colorbuffer_size, &zbuffer_ptr); + //flush_handle = create_flush_buffer(fd); + texturebuffer_handle = load_textures(fd, textures, textures_length); + + fprintf(stderr, "colorbuffer handle[0] %d\n", colorbuffer_handle[0]); + fprintf(stderr, "colorbuffer handle[1] %d\n", colorbuffer_handle[1]); + fprintf(stderr, "zbuffer handle %d\n", zbuffer_handle); + + int colorbuffer_ix = 0; + float theta = PI * 0.5; + + const int floatbuffer_width = 4; + const int floatbuffer_height = 4; + floatbuffer_state state = create_floatbuffers(fd, floatbuffer_width * floatbuffer_height); + + vertexbuffer_handle = init_particles_vertexbuffer(fd, state.length, &vertexbuffer_ptr); + fprintf(stderr, "vertexbuffer handle %d\n", vertexbuffer_handle); + + while (true) { + assert(state.flip == 0 || state.flip == 1); + int fb_input = state.flip * 2; + int fb_output = (!state.flip) * 2; + + { + int ib_dwords = _floatbuffer(shaders, + fb_input + 0, // input_reloc_index0, + fb_input + 1, // input_reloc_index1, + fb_output + 0, // output_reloc_index0, + fb_output + 1, // output_reloc_index1, + floatbuffer_width, + floatbuffer_height); + + int ret = drm_radeon_cs2(fd, + state.handles, + 4, + ib_dwords); + assert(ret != -1); + //printf("floatbuffer return %d\n", ret); + //check_particles2(state); + } + + { + int ib_dwords = indirect_buffer(shaders, + state, + theta, + vertexbuffer_ptr); + + int ret = drm_radeon_cs(fd, + colorbuffer_handle[colorbuffer_ix], + zbuffer_handle, + vertexbuffer_handle, + texturebuffer_handle, + textures_length, + ib_dwords); + + if (ret == -1) + break; + } + + primary_surface_address(rmmio, colorbuffer_ix); + + // next state + theta += 0.01f; + colorbuffer_ix = (colorbuffer_ix + 1) & 1; + + state.flip = (state.flip + 1) & 1; + + // + // update particles + // + /* + for (int i = 0; i < particles_length; i++) { + if (particles[i].age <= 0) { + particles[i].age += max_age; + reset_particle(particles[i]); + } else { + particles[i].age -= 0.01f; + particles[i].position += vec3(particles[i].velocity.x * 0.9f, + particles[i].velocity.y * 5.0f, + particles[i].velocity.z * 0.9f); + particles[i].velocity += vec3(0, -0.04, 0); + if (particles[i].position.y < 0) { + particles[i].position.y = fabsf(particles[i].position.y); + particles[i].velocity.y *= -0.6f; + } + } + } + */ + } + + { + printf("colorbuffer0.data\n"); + int out_fd = open("colorbuffer0.data", O_RDWR|O_CREAT, 0644); + assert(out_fd >= 0); + ssize_t write_length = write(out_fd, colorbuffer_ptr[0], colorbuffer_size); + assert(write_length == colorbuffer_size); + close(out_fd); + } + { + printf("zbuffer.data\n"); + int out_fd = open("zbuffer.data", O_RDWR|O_CREAT, 0644); + assert(out_fd >= 0); + ssize_t write_length = write(out_fd, zbuffer_ptr, colorbuffer_size); + assert(write_length == colorbuffer_size); + close(out_fd); + } + + close(fd); +} diff --git a/src/particle_physics.fs.asm b/src/particle_physics.fs.asm new file mode 100644 index 0000000..6f4cd4c --- /dev/null +++ b/src/particle_physics.fs.asm @@ -0,0 +1,77 @@ +-- temp[0].rgb : position +-- temp[0].a : age +-- temp[1].rgb : velocity +-- temp[1].a : delta + +-- temp[2].rgb : reset__position +-- temp[2].a : reset__age +-- temp[3].rgb : reset__velocity +-- temp[3].a : reset__delta + +-- temp[4].rgb : update__position +-- temp[4].a : update__age +-- temp[5].rgb : update__velocity +-- temp[5].a : update__delta + +-- temp[6].rgb : temp + +-- velocity_scale.rgb = vec3(0.003 , 0.01, 0.003) +-- delta_age = 0.01 +-- const[0] = { velocity_scale.rgb, delta_age } +-- gravity = -0.05 +-- velocity_attenuation = -0.7 +-- const[1] = { velocity_attenuation, gravity, 0, 0 } + +-- out[0].rgb : position +-- out[0].a : age + +TEX + temp[0].rgba = LD tex[0].rgba temp[0].rgaa ; + +TEX TEX_SEM_WAIT TEX_SEM_ACQUIRE + temp[1].rgba = LD tex[1].rgba temp[0].rgaa ; + +-- update_particle (position) +TEX_SEM_WAIT +src0.a = temp[0] , -- age +src0.rgb = temp[0] , -- position +src1.a = const[0] , -- delta_age +src1.rgb = const[0] , -- scale +src2.rgb = temp[1] : -- velocity + temp[4].a = MAD src0.a src0.1 src1.a , -- update__age = (age * 1) - delta_age + temp[4].rgb = MAD src2.rgb src1.rgb src0.rgb ; -- update__position = (velocity * scale) + position + +-- update_particle (velocity gravity) +-- p.velocity.y += -0.05; +src0.rgb = temp[1] , -- velocity +src1.rgb = const[1] : -- gravity (g) + temp[5].rgb = MAD src0.rgb src1.111 src1.0g0 ; + +-- update_particle (velocity bounce) +-- p.velocity.y *= -0.7; +src0.rgb = temp[5] , -- velocity +src1.rgb = const[1] : -- velocity_attenuation (r) + temp[6].rgb = MAD src0.rgb src1.1r1 src1.000 ; + +-- update_particle (velocity bounce) +-- p.velocity = (p.position.y >= 0) ? temp[5] : temp[6] +src0.rgb = temp[5] , -- velocity +src1.rgb = temp[6] , -- velocity +src2.rgb = temp[4] : -- position + temp[5].rgb = CMP src0.rgb src1.rgb src2.ggg ; + +-- position.y = abs(position.y) +src0.rgb = temp[4] : + temp[4].g = MAX |src0.0g0| |src0.0g0| ; + +OUT +src0.a = temp[4] , -- update__age +src0.rgb = temp[4] : -- update__position + out[0].a = MAX src0.a src0.a , + out[0].rgb = MAX src0.rgb src0.rgb ; + +OUT TEX_SEM_WAIT +src0.a = temp[1] , -- delta +src0.rgb = temp[5] : -- update__velocity + out[1].a = MAX src0.a src0.a , + out[1].rgb = MAX src0.rgb src0.rgb ; diff --git a/src/particle_physics.fs.bin b/src/particle_physics.fs.bin new file mode 100644 index 0000000..a85c47e Binary files /dev/null and b/src/particle_physics.fs.bin differ diff --git a/src/particle_physics.vs.asm b/src/particle_physics.vs.asm new file mode 100644 index 0000000..eadd23e --- /dev/null +++ b/src/particle_physics.vs.asm @@ -0,0 +1,2 @@ +out[0].xyzw = VE_ADD input[0].xyzw input[0].0000 ; +out[1].xyzw = VE_ADD input[0].xy00 const[0].xy00 ; diff --git a/src/particle_physics.vs.bin b/src/particle_physics.vs.bin new file mode 100644 index 0000000..b140eb3 Binary files /dev/null and b/src/particle_physics.vs.bin differ diff --git a/src/r500/indirect_buffer.c b/src/r500/indirect_buffer.c index c75d648..f574753 100644 --- a/src/r500/indirect_buffer.c +++ b/src/r500/indirect_buffer.c @@ -272,7 +272,8 @@ void ib_generic_initialization() ); } -void ib_colorbuffer(int reloc_index, int pitch, int macrotile, int microtile) +void ib_colorbuffer(int reloc_index, int pitch, + int macrotile, int microtile) { ////////////////////////////////////////////////////////////////////////////// @@ -297,6 +298,38 @@ void ib_colorbuffer(int reloc_index, int pitch, int macrotile, int microtile) TU(reloc_index * 4); // index into relocs array } +void ib_colorbuffer2(int buffer_index, + int reloc_index, + int pitch, + int macrotile, int microtile, + int colorformat) +{ + assert(buffer_index >= 0 && buffer_index <= 3); + + int reg_offset = buffer_index * 4; + + ////////////////////////////////////////////////////////////////////////////// + // CB + ////////////////////////////////////////////////////////////////////////////// + + T0V(RB3D_COLOROFFSET0 + reg_offset + , 0x00000000 // value replaced by kernel from relocs + ); + T3(_NOP, 0); + TU(reloc_index * 4); // index into relocs array + + T0V(RB3D_COLORPITCH0 + reg_offset + , RB3D_COLORPITCH__COLORPITCH(pitch >> 1) + | RB3D_COLORPITCH__COLORTILE(macrotile) + | RB3D_COLORPITCH__COLORMICROTILE(microtile) + | RB3D_COLORPITCH__COLORFORMAT(colorformat) + ); + // The COLORPITCH NOP is ignored/not applied due to + // RADEON_CS_KEEP_TILING_FLAGS, but is still required. + T3(_NOP, 0); + TU(reloc_index * 4); // index into relocs array +} + void ib_viewport(int width, int height) { ////////////////////////////////////////////////////////////////////////////// @@ -586,6 +619,7 @@ void ib_texture__1_float32(int reloc_index, | TX_FILTER0__CLAMP_T(clamp) | TX_FILTER0__MAG_FILTER__POINT | TX_FILTER0__MIN_FILTER__POINT + | TX_FILTER0__ID(0) ); T0V(TX_FILTER1_0 , TX_FILTER1__LOD_BIAS(1) @@ -616,6 +650,57 @@ void ib_texture__1_float32(int reloc_index, TU(reloc_index * 4); // index into relocs array } +void ib_texture2(int texture_index, + int reloc_index, + int width, int height, + int macrotile, int microtile, + int clamp, + int txformat) +{ + assert(texture_index >= 0 && texture_index <= 15); + + int texture_offset = texture_index * 4; + + T0V(TX_FILTER0_0 + texture_offset + , TX_FILTER0__CLAMP_S(clamp) + | TX_FILTER0__CLAMP_T(clamp) + | TX_FILTER0__MAG_FILTER__POINT + | TX_FILTER0__MIN_FILTER__POINT + | TX_FILTER0__ID(texture_index) + ); + T0V(TX_FILTER1_0 + texture_offset + , TX_FILTER1__LOD_BIAS(1) + | TX_FILTER1__BORDER_FIX(0) + ); + T0V(TX_BORDER_COLOR_0 + texture_offset + , 0 + ); + T0V(TX_FORMAT0_0 + texture_offset + , TX_FORMAT0__TXWIDTH(width - 1) + | TX_FORMAT0__TXHEIGHT(height - 1) + ); + + T0V(TX_FORMAT1_0 + texture_offset + , TX_FORMAT1__TXFORMAT(txformat) + | TX_FORMAT1__SEL_ALPHA(3) + | TX_FORMAT1__SEL_RED(0) + | TX_FORMAT1__SEL_GREEN(1) + | TX_FORMAT1__SEL_BLUE(2) + | TX_FORMAT1__TEX_COORD_TYPE__2D + ); + T0V(TX_FORMAT2_0 + texture_offset + , 0 + ); + + T0V(TX_OFFSET_0 + texture_offset + , TX_OFFSET__MACRO_TILE(macrotile) + | TX_OFFSET__MICRO_TILE(microtile) + ); + + T3(_NOP, 0); + TU(reloc_index * 4); // index into relocs array +} + void ib_vap_pvs(struct shader_offset * offset) { const int instruction_size = 4 * 4; // bytes diff --git a/src/r500/indirect_buffer.h b/src/r500/indirect_buffer.h index de4e37e..edd795f 100644 --- a/src/r500/indirect_buffer.h +++ b/src/r500/indirect_buffer.h @@ -7,23 +7,23 @@ #define T0(address, count) \ do { \ - ib[ib_ix++].u32 = TYPE_0_COUNT(count) | TYPE_0_BASE_INDEX(address >> 2); \ + ib[ib_ix++].u32 = TYPE_0_COUNT(count) | TYPE_0_BASE_INDEX((address) >> 2); \ } while (0); #define T0_ONE_REG(address, count) \ do { \ - ib[ib_ix++].u32 = TYPE_0_COUNT(count) | TYPE_0_ONE_REG | TYPE_0_BASE_INDEX(address >> 2); \ + ib[ib_ix++].u32 = TYPE_0_COUNT(count) | TYPE_0_ONE_REG | TYPE_0_BASE_INDEX((address) >> 2); \ } while (0); #define T0V(address, value) \ do { \ - ib[ib_ix++].u32 = TYPE_0_COUNT(0) | TYPE_0_BASE_INDEX(address >> 2); \ + ib[ib_ix++].u32 = TYPE_0_COUNT(0) | TYPE_0_BASE_INDEX((address) >> 2); \ ib[ib_ix++].u32 = value; \ } while (0); #define T0Vf(address, value) \ do { \ - ib[ib_ix++].u32 = TYPE_0_COUNT(0) | TYPE_0_BASE_INDEX(address >> 2); \ + ib[ib_ix++].u32 = TYPE_0_COUNT(0) | TYPE_0_BASE_INDEX((address) >> 2); \ ib[ib_ix++].f32 = value; \ } while (0); @@ -57,6 +57,11 @@ extern volatile int ib_ix; void ib_generic_initialization(); void ib_viewport(int width, int height); void ib_colorbuffer(int reloc_index, int pitch, int macrotile, int microtile); +void ib_colorbuffer2(int buffer_index, + int reloc_index, + int pitch, + int macrotile, int microtile, + int colorformat); void ib_zbuffer(int reloc_index, int pitch, int zfunc); void ib_rs_instructions(int count); void ib_texture__0(); @@ -68,6 +73,12 @@ void ib_texture__1_float32(int reloc_index, int width, int height, int macrotile, int microtile, int clamp); +void ib_texture2(int texture_index, + int reloc_index, + int width, int height, + int macrotile, int microtile, + int clamp, + int txformat); void ib_vap_pvs(struct shader_offset * offset); void ib_ga_us(struct shader_offset * offset); void ib_vap_pvs_const_cntl(const float * consts, int size);