#include #include #include #include #include #include #include #include #include #include #include #include "r500/3d_registers.h" #include "r500/3d_registers_undocumented.h" #include "r500/3d_registers_bits.h" #include "r500/indirect_buffer.h" #include "r500/shader.h" #include "r500/display_controller.h" #include "drm/buffer.h" #include "drm/drm.h" #include "math/float_types.hpp" #include "math/transform.hpp" #include "math/constants.hpp" #include "../model/model2.h" #define CLEAR_SHADER 0 #define PLANE_SHADER 1 #define PARTICLE_SHADER 2 #define TEXTURE_TILE_SHADER 3 #define PARTICLE_PHYSICS_SHADER 4 const char * vertex_shader_paths[] = { "clear.vs.bin", "particle_plane_fan.vs.bin", "particle_particle_animated_quad_vbuf.vs.bin", "texture_tile.vs.bin", "particle_physics.vs.bin", }; const int vertex_shader_paths_length = (sizeof (vertex_shader_paths)) / (sizeof (vertex_shader_paths[0])); const char * fragment_shader_paths[] = { "clear.fs.bin", "particle_plane.fs.bin", "particle_particle.fs.bin", "texture_tile.fs.bin", "particle_physics.fs.bin", }; const int fragment_shader_paths_length = (sizeof (fragment_shader_paths)) / (sizeof (fragment_shader_paths[0])); #define PLANE_TEXTURE 0 #define PARTICLE_TEXTURE 1 const char * textures[] = { "../texture/plane_32x32_rgba8888.data", "../texture/particle_32x32_rgba8888.data", }; const int textures_length = (sizeof (textures)) / (sizeof (textures[0])); struct shaders { struct shader_offset * vertex; struct shader_offset * fragment; int vertex_length; int fragment_length; }; static inline uint32_t xorshift32(uint32_t state) { uint32_t x = state; x ^= x << 13; x ^= x >> 17; x ^= x << 5; return x; } static inline float xorshift32f(uint32_t& state) { state = xorshift32(state); return (float)(state & 0xffffff) * (1.0f / 16777215.0f); } const float max_age = 3.0f; struct particle_position { vec3 position; float age; }; struct particle_velocity { vec3 velocity; float delta; }; struct particle { vec3 position; float age; vec3 velocity; float delta; }; struct floatbuffer_state { int handles[4]; void * ptrs[4]; int length; int flip; inline particle_position * position_output() const { int fb_output = (!flip) * 2; particle_position * out_pos = (particle_position *)this->ptrs[fb_output + 0]; return out_pos; } }; void _3d_clear(const shaders& shaders) { ib_rs_instructions(0); ////////////////////////////////////////////////////////////////////////////// // VAP OUT ////////////////////////////////////////////////////////////////////////////// T0V(VAP_OUT_VTX_FMT_0 , VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1)); T0V(VAP_OUT_VTX_FMT_1 , 0); // ib_zbuffer(ZBUFFER_RELOC_INDEX, 1600, 7); // always ib_texture__0(); ib_vap_stream_cntl__2(); // shaders T0V(US_PIXSIZE , US_PIXSIZE__PIX_SIZE(1) ); ib_ga_us(&shaders.fragment[CLEAR_SHADER]); ib_vap_pvs(&shaders.vertex[CLEAR_SHADER]); ////////////////////////////////////////////////////////////////////////////// // VAP INDEX ////////////////////////////////////////////////////////////////////////////// T0V(VAP_INDEX_OFFSET, 0); T0V(VAP_VF_MAX_VTX_INDX , VAP_VF_MAX_VTX_INDX__MAX_INDX(0) ); T0V(VAP_VF_MIN_VTX_INDX , VAP_VF_MIN_VTX_INDX__MIN_INDX(0) ); ////////////////////////////////////////////////////////////////////////////// // VAP ////////////////////////////////////////////////////////////////////////////// T0V(VAP_CLIP_CNTL , VAP_CLIP_CNTL__CLIP_DISABLE(1) ); T0V(VAP_VTE_CNTL , VAP_VTE_CNTL__VTX_XY_FMT(1) // disable W division | VAP_VTE_CNTL__VTX_Z_FMT(1) // disable W division ); T0V(VAP_CNTL_STATUS , VAP_CNTL_STATUS__PVS_BYPASS(0) ); ////////////////////////////////////////////////////////////////////////////// // GA POINT SIZE ////////////////////////////////////////////////////////////////////////////// T0V(GA_POINT_SIZE , GA_POINT_SIZE__HEIGHT(600 * 12) | GA_POINT_SIZE__WIDTH(800 * 12) ); ////////////////////////////////////////////////////////////////////////////// // 3D_DRAW ////////////////////////////////////////////////////////////////////////////// const int dwords_per_vtx = 2; T0V(VAP_VTX_SIZE , VAP_VTX_SIZE__DWORDS_PER_VTX(dwords_per_vtx) ); const float center[] = { 800.0f, 600.0f, }; const int vertex_count = 1; T3(_3D_DRAW_IMMD_2, (1 + vertex_count * dwords_per_vtx) - 1); TU( VAP_VF_CNTL__PRIM_TYPE(1) // point list | VAP_VF_CNTL__PRIM_WALK(3) | VAP_VF_CNTL__INDEX_SIZE(0) | VAP_VF_CNTL__VTX_REUSE_DIS(0) | VAP_VF_CNTL__DUAL_INDEX_MODE(0) | VAP_VF_CNTL__USE_ALT_NUM_VERTS(0) | VAP_VF_CNTL__NUM_VERTICES(vertex_count) ); for (int i = 0; i < 2; i++) { TF(center[i]); } } mat4x4 perspective(float low1, float high1, float low2, float high2, float low3, float high3) { float scale2 = (high2 - low2) / (high1 - low1); float scale3 = (high3 - low3) / (high1 - low1); mat4x4 m1 = mat4x4(1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, -low1, 0, 0, 0, 1 ); mat4x4 m2 = mat4x4(1, 0, 0, 0, 0, 1, 0, 0, 0, 0, scale2, low2, 0, 0, scale3, low3 ); return m2 * m1; } void _3d_plane_inner() { ////////////////////////////////////////////////////////////////////////////// // 3D_DRAW ////////////////////////////////////////////////////////////////////////////// const int dwords_per_vtx = 2; T0V(VAP_VTX_SIZE , VAP_VTX_SIZE__DWORDS_PER_VTX(dwords_per_vtx) ); const vec2 vertices[] = { {0.0, 0.0f}, {1.0, 0.0f}, {1.0, 1.0f}, {0.0, 1.0f}, }; const int vertex_count = 4; T3(_3D_DRAW_IMMD_2, (1 + vertex_count * dwords_per_vtx) - 1); TU( VAP_VF_CNTL__PRIM_TYPE(5) // triangle fan | VAP_VF_CNTL__PRIM_WALK(3) | VAP_VF_CNTL__INDEX_SIZE(0) | VAP_VF_CNTL__VTX_REUSE_DIS(0) | VAP_VF_CNTL__DUAL_INDEX_MODE(0) | VAP_VF_CNTL__USE_ALT_NUM_VERTS(0) | VAP_VF_CNTL__NUM_VERTICES(vertex_count) ); for (int i = 0; i < vertex_count; i++) { TF(vertices[i].x); TF(vertices[i].y); } } void _3d_particle_inner(int particles_length, int position_offset) { const int vertex_count = 4 * particles_length; ////////////////////////////////////////////////////////////////////////////// // VF ////////////////////////////////////////////////////////////////////////////// T0V(VAP_VF_MAX_VTX_INDX , VAP_VF_MAX_VTX_INDX__MAX_INDX(vertex_count - 1) ); T0V(VAP_VF_MIN_VTX_INDX , VAP_VF_MIN_VTX_INDX__MIN_INDX(0) ); ////////////////////////////////////////////////////////////////////////////// // AOS ////////////////////////////////////////////////////////////////////////////// T3(_3D_LOAD_VBPNTR, (4 - 1)); TU( // VAP_VTX_NUM_ARRAYS VAP_VTX_NUM_ARRAYS__VTX_NUM_ARRAYS(2) | VAP_VTX_NUM_ARRAYS__VC_FORCE_PREFETCH(1) ); TU( // VAP_VTX_AOS_ATTR01 VAP_VTX_AOS_ATTR__VTX_AOS_COUNT0(3) | VAP_VTX_AOS_ATTR__VTX_AOS_STRIDE0(3) | VAP_VTX_AOS_ATTR__VTX_AOS_COUNT1(2) | VAP_VTX_AOS_ATTR__VTX_AOS_STRIDE1(2) ); TU( // VAP_VTX_AOS_ADDR0 (4 * position_offset); ); TU( // VAP_VTX_AOS_ADDR1 (4 * 0); ); T3(_NOP, 0); TU(VERTEXBUFFER_RELOC_INDEX * 4); // index into relocs array for VAP_VTX_AOS_ADDR0 T3(_NOP, 0); TU(VERTEXBUFFER_RELOC_INDEX * 4); // index into relocs array for VAP_VTX_AOS_ADDR0 ////////////////////////////////////////////////////////////////////////////// // 3D_DRAW ////////////////////////////////////////////////////////////////////////////// T3(_3D_DRAW_VBUF_2, (1 - 1)); TU( VAP_VF_CNTL__PRIM_TYPE(13) // quad list | VAP_VF_CNTL__PRIM_WALK(2) // vertex list (data fetched from memory) | VAP_VF_CNTL__INDEX_SIZE(0) | VAP_VF_CNTL__VTX_REUSE_DIS(0) | VAP_VF_CNTL__DUAL_INDEX_MODE(0) | VAP_VF_CNTL__USE_ALT_NUM_VERTS(0) | VAP_VF_CNTL__NUM_VERTICES(vertex_count) ); } void _3d_plane(const shaders& shaders, const mat4x4& world_to_clip, float theta) { ib_rs_instructions(1); ////////////////////////////////////////////////////////////////////////////// // VAP OUT ////////////////////////////////////////////////////////////////////////////// T0V(VAP_OUT_VTX_FMT_0 , VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1)); T0V(VAP_OUT_VTX_FMT_1 , VAP_OUT_VTX_FMT_1__TEX_0_COMP_CNT(4)); // ib_zbuffer(ZBUFFER_RELOC_INDEX, 1600, 1); // less int width = 32; int height = 32; int macrotile = 0; int microtile = 0; int clamp = 0; // wrap/repeat ib_texture__1(TEXTUREBUFFER_RELOC_INDEX + PLANE_TEXTURE, width, height, macrotile, microtile, clamp); ib_vap_stream_cntl__2(); // shaders T0V(US_PIXSIZE , US_PIXSIZE__PIX_SIZE(4) ); ib_ga_us(&shaders.fragment[PLANE_SHADER]); ib_vap_pvs(&shaders.vertex[PLANE_SHADER]); ////////////////////////////////////////////////////////////////////////////// // VAP ////////////////////////////////////////////////////////////////////////////// T0V(VAP_CLIP_CNTL , VAP_CLIP_CNTL__PS_UCP_MODE(3) ); T0V(VAP_VTE_CNTL , VAP_VTE_CNTL__VPORT_X_SCALE_ENA(1) | VAP_VTE_CNTL__VPORT_X_OFFSET_ENA(1) | VAP_VTE_CNTL__VPORT_Y_SCALE_ENA(1) | VAP_VTE_CNTL__VPORT_Y_OFFSET_ENA(1) | VAP_VTE_CNTL__VPORT_Z_SCALE_ENA(1) | VAP_VTE_CNTL__VPORT_Z_OFFSET_ENA(1) | VAP_VTE_CNTL__VTX_XY_FMT(0) // enable W division | VAP_VTE_CNTL__VTX_Z_FMT(0) // enable W division | VAP_VTE_CNTL__VTX_W0_FMT(1) | VAP_VTE_CNTL__SERIAL_PROC_ENA(0) ); T0V(VAP_CNTL_STATUS , VAP_CNTL_STATUS__PVS_BYPASS(0) ); ////////////////////////////////////////////////////////////////////////////// // matrix ////////////////////////////////////////////////////////////////////////////// mat4x4 s = scale(1.0f); mat4x4 rx = rotate_x(-PI / 2.0f); mat4x4 local_to_world = s * rx; mat4x4 trans = world_to_clip * local_to_world; ////////////////////////////////////////////////////////////////////////////// // consts ////////////////////////////////////////////////////////////////////////////// T0V(VAP_PVS_STATE_FLUSH_REG, 0x00000000); const float consts[] = { // 0 trans[0][0], trans[0][1], trans[0][2], trans[0][3], trans[1][0], trans[1][1], trans[1][2], trans[1][3], trans[2][0], trans[2][1], trans[2][2], trans[2][3], trans[3][0], trans[3][1], trans[3][2], trans[3][3], // 4 -2.0f, 0, 0, 0, }; ib_vap_pvs_const_cntl(consts, (sizeof (consts))); // plane_inner _3d_plane_inner(); } void _3d_particle(const shaders& shaders, const mat4x4& world_to_clip, const mat4x4& world_to_view, const floatbuffer_state& state, const float theta, float * vertexbuffer_ptr) { // enable blending T0V(RB3D_BLENDCNTL , RB3D_BLENDCNTL__ALPHA_BLEND_ENABLE__ENABLE | RB3D_BLENDCNTL__READ_ENABLE(1) | RB3D_BLENDCNTL__SRCBLEND__GL_ONE | RB3D_BLENDCNTL__DESTBLEND__GL_ONE | RB3D_BLENDCNTL__SRC_ALPHA_0_NO_READ(0) | RB3D_BLENDCNTL__SRC_ALPHA_1_NO_READ(0) ); ib_rs_instructions(1); ////////////////////////////////////////////////////////////////////////////// // VAP OUT ////////////////////////////////////////////////////////////////////////////// T0V(VAP_OUT_VTX_FMT_0 , VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1)); T0V(VAP_OUT_VTX_FMT_1 , VAP_OUT_VTX_FMT_1__TEX_0_COMP_CNT(4)); // //ib_zbuffer(ZBUFFER_RELOC_INDEX, 1600, 1); // less T0V(ZB_CNTL , 0 ); T0V(ZB_ZSTENCILCNTL , 0 ); int width = 32; int height = 32; int macrotile = 0; int microtile = 0; int clamp = 0; // wrap/repeat ib_texture__1(TEXTUREBUFFER_RELOC_INDEX + PARTICLE_TEXTURE, width, height, macrotile, microtile, clamp); ib_vap_stream_cntl__32(); // shaders T0V(US_PIXSIZE , US_PIXSIZE__PIX_SIZE(4) ); ib_ga_us(&shaders.fragment[PARTICLE_SHADER]); ib_vap_pvs(&shaders.vertex[PARTICLE_SHADER]); ////////////////////////////////////////////////////////////////////////////// // VAP ////////////////////////////////////////////////////////////////////////////// T0V(VAP_CLIP_CNTL , VAP_CLIP_CNTL__PS_UCP_MODE(3) ); T0V(VAP_VTE_CNTL , VAP_VTE_CNTL__VPORT_X_SCALE_ENA(1) | VAP_VTE_CNTL__VPORT_X_OFFSET_ENA(1) | VAP_VTE_CNTL__VPORT_Y_SCALE_ENA(1) | VAP_VTE_CNTL__VPORT_Y_OFFSET_ENA(1) | VAP_VTE_CNTL__VPORT_Z_SCALE_ENA(1) | VAP_VTE_CNTL__VPORT_Z_OFFSET_ENA(1) | VAP_VTE_CNTL__VTX_XY_FMT(0) // enable W division | VAP_VTE_CNTL__VTX_Z_FMT(0) // enable W division | VAP_VTE_CNTL__VTX_W0_FMT(1) | VAP_VTE_CNTL__SERIAL_PROC_ENA(0) ); T0V(VAP_CNTL_STATUS , VAP_CNTL_STATUS__PVS_BYPASS(0) ); ////////////////////////////////////////////////////////////////////////////// // matrix ////////////////////////////////////////////////////////////////////////////// mat4x4 s = scale(1.0f); mat4x4 local_to_world = s; mat4x4 local_to_view = world_to_view * local_to_world; mat4x4 trans = world_to_clip * local_to_world; ////////////////////////////////////////////////////////////////////////////// // consts ////////////////////////////////////////////////////////////////////////////// T0V(VAP_PVS_STATE_FLUSH_REG, 0x00000000); const float scale = 0.005f; const float consts[] = { // 0 trans[0][0], trans[0][1], trans[0][2], trans[0][3], trans[1][0], trans[1][1], trans[1][2], trans[1][3], trans[2][0], trans[2][1], trans[2][2], trans[2][3], trans[3][0], trans[3][1], trans[3][2], trans[3][3], // 4: dx (right) local_to_view[0][0], local_to_view[0][1], local_to_view[0][2], 0, // 5: dy (up) local_to_view[1][0], local_to_view[1][1], local_to_view[1][2], 0, // 6: xyz:position w:scale 0, 0, 0, scale, // 7: -2.0, 0, 0, 0, }; ib_vap_pvs_const_cntl(consts, (sizeof (consts))); int offset = state.length * 4 * 2; int ix = 0; particle_position * pos = state.position_output(); for (int i = 0; i < state.length; i++) { const vec3& position = pos[i].position; for (int j = 0; j < 4; j++) { vertexbuffer_ptr[offset + ix] = position.x; ix++; vertexbuffer_ptr[offset + ix] = position.y; ix++; vertexbuffer_ptr[offset + ix] = position.z; ix++; }; } asm volatile ("" ::: "memory"); _3d_particle_inner(state.length, offset); } int indirect_buffer(const shaders& shaders, const floatbuffer_state& state, //const particle * particles, //const int particles_length, float theta, float * vertexbuffer_ptr) { int width = 1600; int height = 1200; int pitch = width; ib_ix = 0; ib_generic_initialization(); T0V(RB3D_BLENDCNTL, 0); T0V(RB3D_ABLENDCNTL, 0); ib_viewport(width, height); ib_colorbuffer(COLORBUFFER_RELOC_INDEX, pitch, 0, 0); T0V(GB_ENABLE, 0); T0V(US_OUT_FMT_0 , US_OUT_FMT__OUT_FMT(0) // C4_8 | US_OUT_FMT__C0_SEL__BLUE | US_OUT_FMT__C1_SEL__GREEN | US_OUT_FMT__C2_SEL__RED | US_OUT_FMT__C3_SEL__ALPHA | US_OUT_FMT__OUT_SIGN(0) ); T0V(US_OUT_FMT_1 , US_OUT_FMT__OUT_FMT(15) // render target is not used ); T0V(US_OUT_FMT_2 , US_OUT_FMT__OUT_FMT(15) // render target is not used ); T0V(US_OUT_FMT_2 , US_OUT_FMT__OUT_FMT(15) // render target is not used ); load_pvs_shaders(shaders.vertex, shaders.vertex_length); load_us_shaders(shaders.fragment, shaders.fragment_length); ////////////////////////////////////////////////////////////////////////////// // DRAW ////////////////////////////////////////////////////////////////////////////// mat4x4 aspect = scale(vec3(3.0f/4.0f, 1, 1)); mat4x4 p = perspective(0.01f, 3.0f, 0.001f, 0.999f, 1.0f, 3.0f); mat4x4 t = translate(vec3(0, 0, 1)); mat4x4 rx = rotate_x(-PI / 8.0f); mat4x4 ry = rotate_y(theta * 0.8f); mat4x4 world_to_view = t * rx * ry; mat4x4 world_to_clip = aspect * p * world_to_view; _3d_clear(shaders); _3d_plane(shaders, world_to_clip, theta); _3d_particle(shaders, world_to_clip, world_to_view, state, //particles, //particles_length, theta, vertexbuffer_ptr); ////////////////////////////////////////////////////////////////////////////// // padding ////////////////////////////////////////////////////////////////////////////// while ((ib_ix % 8) != 0) { TU(0x80000000); } assert((unsigned int)ib_ix < (sizeof (ib)) / (sizeof (ib[0]))); return ib_ix; } void reset_particle(particle& p) { //vec3 pos = normalize(p.position); p.position = normalize(vec3(p.velocity.x, 0, p.velocity.z)) * 20.0f; //printf("position %f %f %f\n", p.position.x, p.position.y, p.position.z); p.velocity = vec3(p.velocity.x, 2.0f * p.delta, p.velocity.z); //printf("velocity %f %f %f\n\n", p.velocity.x, p.velocity.y, p.velocity.z); } void init_particles(particle * particles, const int particles_length) { uint32_t state = 0x12345678; const float rl = 1.0f / (float)(particles_length); for (int i = 0; i < particles_length; i++) { float fi = ((float)i); float sx = xorshift32f(state) * 2.0f - 1.0f; float sy = xorshift32f(state) * 2.0f - 1.0f; float sz = xorshift32f(state) * 2.0f - 1.0f; float delta = xorshift32f(state) * 0.5f + 0.5f; float vx = xorshift32f(state) * 2.0f - 1.0f; float vz = xorshift32f(state) * 2.0f - 1.0f; particles[i].age = max_age * sinf(fi * rl * 2) * 0.5f + 0.5f; particles[i].delta = delta; particles[i].position.x = sx; particles[i].position.y = sy; particles[i].position.z = sz; particles[i].velocity = normalize(vec3(vx * 0.5f, 0.0f, vz * 0.5f)); } } int init_particles_vertexbuffer(int fd, int particles_length, float ** ptr_out) { const vec2 vertices[] = { {0.0, 0.0f}, {1.0, 0.0f}, {1.0, 1.0f}, {0.0, 1.0f}, }; const int vertex_count = 4; const int size = particles_length * vertex_count * 2 * (sizeof (float)) + particles_length * vertex_count * 2 * (sizeof (float)); void * ptr; int handle = create_buffer(fd, size, &ptr); float * ptrf = (float*)ptr; int ix = 0; for (int j = 0; j < particles_length; j++) { for (int i = 0; i < vertex_count; i++) { ptrf[ix++] = vertices[i].x; ptrf[ix++] = vertices[i].y; } } printf("init vertexbuffer %d %d\n", ix, size); assert(ptr_out != NULL); *ptr_out = ptrf; return handle; } void reset_particle2(particle_position& position, particle_velocity& velocity) { //vec3 pos = normalize(p.position); position.position = normalize(vec3(velocity.velocity.x, 0, velocity.velocity.z)) * 20.0f; //printf("position %f %f %f\n", p.position.x, p.position.y, p.position.z); velocity.velocity = vec3(velocity.velocity.x, 2.0f * velocity.delta, velocity.velocity.z); //printf("velocity %f %f %f\n\n", p.velocity.x, p.velocity.y, p.velocity.z); } void init_particles2(void * position_ptr, void * velocity_ptr, const int particles_length) { uint32_t state = 0x12345678; particle_position * position = (particle_position *)position_ptr; particle_velocity * velocity = (particle_velocity *)velocity_ptr; const float rl = 1.0f / (float)(particles_length); for (int i = 0; i < particles_length; i++) { float fi = ((float)i); float sx = xorshift32f(state) * 2.0f - 1.0f; float sy = xorshift32f(state) * 2.0f - 1.0f; float sz = xorshift32f(state) * 2.0f - 1.0f; float new_delta = xorshift32f(state) * 0.5f + 0.5f; float vx = xorshift32f(state) * 2.0f - 1.0f; float vz = xorshift32f(state) * 2.0f - 1.0f; float new_age = max_age * sinf(fi * rl * 2) * 0.5f + 0.5f; vec3 new_position = vec3(sx, sy, sz); vec3 new_velocity = normalize(vec3(vx * 0.5f, 0.0f, vz * 0.5f)); position[i].position = new_position; position[i].age = new_age; velocity[i].velocity = new_velocity; velocity[i].delta = new_delta; reset_particle2(position[i], velocity[i]); } } floatbuffer_state create_floatbuffers(int fd, int length) { floatbuffer_state state; int size = length * 4 * 4; for (int i = 0; i < 4; i++) { state.handles[i] = create_buffer(fd, size, &state.ptrs[i]); } init_particles2(state.ptrs[0], state.ptrs[1], length); state.flip = 0; state.length = length; return state; } int _floatbuffer(const shaders& shaders, int input_reloc_index0, int input_reloc_index1, int output_reloc_index0, int output_reloc_index1, int floatbuffer_width, int floatbuffer_height) { int viewport_width = floatbuffer_width; int viewport_height = floatbuffer_height; int texture_width = floatbuffer_width; int texture_height = floatbuffer_height; /* float vx = ((float)viewport_width) * 0.5f; float vy = ((float)viewport_height) * 0.5f; */ float tx = 0.5f / ((float)texture_width); float ty = 0.5f / ((float)texture_height); printf("tx ty: %f %f\n", tx, ty); printf("relocs: %d %d %d %d\n", input_reloc_index0, input_reloc_index1, output_reloc_index0, output_reloc_index1); int macrotile = 0; int microtile = 0; ib_ix = 0; ib_generic_initialization(); T0V(SC_SCISSOR0 , SC_SCISSOR0__XS0(0) | SC_SCISSOR0__YS0(0) ); T0V(SC_SCISSOR1 , SC_SCISSOR1__XS1(viewport_width - 1) | SC_SCISSOR1__YS1(viewport_height - 1) ); T0Vf(VAP_VPORT_XSCALE, (float)viewport_width); T0Vf(VAP_VPORT_YSCALE, (float)viewport_height); int colorformat = 7; // ARGB32323232 ib_colorbuffer2(0, output_reloc_index0, viewport_width, macrotile, microtile, colorformat); ib_colorbuffer2(1, output_reloc_index1, viewport_width, macrotile, microtile, colorformat); T0V(US_OUT_FMT_0 , US_OUT_FMT__OUT_FMT(21) // C4_32_FP | US_OUT_FMT__C0_SEL__RED | US_OUT_FMT__C1_SEL__GREEN | US_OUT_FMT__C2_SEL__BLUE | US_OUT_FMT__C3_SEL__ALPHA | US_OUT_FMT__OUT_SIGN(0) ); T0V(US_OUT_FMT_1 , US_OUT_FMT__OUT_FMT(21) // C4_32_FP | US_OUT_FMT__C0_SEL__RED | US_OUT_FMT__C1_SEL__GREEN | US_OUT_FMT__C2_SEL__BLUE | US_OUT_FMT__C3_SEL__ALPHA | US_OUT_FMT__OUT_SIGN(0) ); T0V(US_OUT_FMT_2 , US_OUT_FMT__OUT_FMT(15) // render target is not used ); T0V(US_OUT_FMT_3 , US_OUT_FMT__OUT_FMT(15) // render target is not used ); // shaders load_pvs_shaders(shaders.vertex, shaders.vertex_length); load_us_shaders(shaders.fragment, shaders.fragment_length); // GA T0V(GB_ENABLE , 0 ); ////////////////////////////////////////////////////////////////////////////// // RS ////////////////////////////////////////////////////////////////////////////// ib_rs_instructions(1); ////////////////////////////////////////////////////////////////////////////// // VAP OUT ////////////////////////////////////////////////////////////////////////////// T0V(VAP_OUT_VTX_FMT_0 , VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1) ); T0V(VAP_OUT_VTX_FMT_1 , VAP_OUT_VTX_FMT_1__TEX_0_COMP_CNT(4) ); // T0V(ZB_CNTL, 0); T0V(ZB_ZSTENCILCNTL, 0); // ////////////////////////////////////////////////////////////////////////////// // TX ////////////////////////////////////////////////////////////////////////////// T0V(TX_INVALTAGS, 0x00000000); T0V(TX_ENABLE , TX_ENABLE__TEX_0_ENABLE__ENABLE | TX_ENABLE__TEX_1_ENABLE__ENABLE); int clamp = 2; // clamp to [0.0, 1.0] int txformat = 29; // TX_FMT_32F_32F_32F_32F ib_texture2(0, input_reloc_index0, texture_width, texture_height, macrotile, microtile, clamp, txformat); ib_texture2(1, input_reloc_index1, texture_width, texture_height, macrotile, microtile, clamp, txformat); // shaders ib_vap_stream_cntl__2(); T0V(US_PIXSIZE , US_PIXSIZE__PIX_SIZE(6) ); ib_ga_us(&shaders.fragment[PARTICLE_PHYSICS_SHADER]); ib_vap_pvs(&shaders.vertex[PARTICLE_PHYSICS_SHADER]); const float vertex_consts[] = { //-tx, -ty, 0, 0, 0, 0, 0, 0, }; const int vertex_consts_size = (sizeof (vertex_consts)); ib_vap_pvs_const_cntl(vertex_consts, vertex_consts_size); // fragment constants //const vec3 velocity_scale = vec3(0.003f, 0.01f, 0.003f); const vec3 velocity_scale = vec3(0.09f, 0.50f, 0.09f); const float delta_age = 0.01f; const float velocity_attenuation = -0.6f; // multiplied by velocity.y after bounce const float gravity = -0.04f; const float fragment_consts[] = { // 0: velocity_scale.x, velocity_scale.y, velocity_scale.z, delta_age, // 1: velocity_attenuation, gravity, 0, 0, }; int fragment_consts_length = (sizeof (fragment_consts)) / (sizeof (fragment_consts[0])); ib_ga_consts(fragment_consts, fragment_consts_length, 0); ////////////////////////////////////////////////////////////////////////////// // VAP ////////////////////////////////////////////////////////////////////////////// T0V(VAP_CLIP_CNTL , VAP_CLIP_CNTL__CLIP_DISABLE(1) ); T0V(VAP_VTE_CNTL , VAP_VTE_CNTL__VPORT_X_SCALE_ENA(1) | VAP_VTE_CNTL__VPORT_Y_SCALE_ENA(1) | VAP_VTE_CNTL__VTX_XY_FMT(1) // disable W division | VAP_VTE_CNTL__VTX_Z_FMT(1) // disable W division ); T0V(VAP_CNTL_STATUS , VAP_CNTL_STATUS__PVS_BYPASS(0) ); ////////////////////////////////////////////////////////////////////////////// // 3D_DRAW ////////////////////////////////////////////////////////////////////////////// const int dwords_per_vtx = 2; T0V(VAP_VTX_SIZE , VAP_VTX_SIZE__DWORDS_PER_VTX(dwords_per_vtx) ); const float vertices[] = { 0.0f, 0.0f, 1.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f, }; const int vertex_count = 4; T3(_3D_DRAW_IMMD_2, (1 + vertex_count * dwords_per_vtx) - 1); TU( VAP_VF_CNTL__PRIM_TYPE(13) // quad list | VAP_VF_CNTL__PRIM_WALK(3) | VAP_VF_CNTL__INDEX_SIZE(0) | VAP_VF_CNTL__VTX_REUSE_DIS(0) | VAP_VF_CNTL__DUAL_INDEX_MODE(0) | VAP_VF_CNTL__USE_ALT_NUM_VERTS(0) | VAP_VF_CNTL__NUM_VERTICES(vertex_count) ); for (int i = 0; i < vertex_count * 2; i++) { TF(vertices[i]); } ////////////////////////////////////////////////////////////////////////////// // padding ////////////////////////////////////////////////////////////////////////////// while ((ib_ix % 8) != 0) { TU(0x80000000); } return ib_ix; } void check_particles2(floatbuffer_state& state) { assert(state.flip == 0 || state.flip == 1); int fb_input = state.flip * 2; int fb_output = (!state.flip) * 2; particle_position * in_pos = (particle_position *)state.ptrs[fb_input + 0]; particle_velocity * in_vel = (particle_velocity *)state.ptrs[fb_input + 1]; particle_position * out_pos = (particle_position *)state.ptrs[fb_output + 0]; particle_velocity * out_vel = (particle_velocity *)state.ptrs[fb_output + 1]; int unequal = 0; for (int i = 0; i < state.length; i++) { bool pos_eq = (in_pos[i].position.x == out_pos[i].position.x) && (in_pos[i].position.y == out_pos[i].position.y) && (in_pos[i].position.z == out_pos[i].position.z); bool vel_eq = (in_vel[i].velocity.x == out_vel[i].velocity.x) && (in_vel[i].velocity.y == out_vel[i].velocity.y) && (in_vel[i].velocity.z == out_vel[i].velocity.z); if (!(pos_eq && vel_eq)) { unequal += 1; printf("[%d] %d %d %d %d\n", i, fb_input + 0, fb_input + 1, fb_output + 0, fb_output + 1); printf(" in pos (% 3.04f % 3.04f % 3.04f)\n", in_pos[i].position.x, in_pos[i].position.y, in_pos[i].position.z); printf(" in vel (% 3.04f % 3.04f % 3.04f)\n", in_vel[i].velocity.x, in_vel[i].velocity.y, in_vel[i].velocity.z); printf(" out pos (% 3.04f % 3.04f % 3.04f)\n", out_pos[i].position.x, out_pos[i].position.y, out_pos[i].position.z); printf(" out vel (% 3.04f % 3.04f % 3.04f)\n", out_vel[i].velocity.x, out_vel[i].velocity.y, out_vel[i].velocity.z); } } printf("unequal %d\n", unequal); } int main() { struct shaders shaders = { .vertex = load_shaders(vertex_shader_paths, vertex_shader_paths_length), .fragment = load_shaders(fragment_shader_paths, fragment_shader_paths_length), .vertex_length = vertex_shader_paths_length, .fragment_length = fragment_shader_paths_length, }; void * rmmio = map_pci_resource2(); int fd = open("/dev/dri/card0", O_RDWR | O_CLOEXEC); assert(fd != -1); const int colorbuffer_size = 1600 * 1200 * 4; int colorbuffer_handle[2]; int zbuffer_handle; int * texturebuffer_handle; //int flush_handle; int vertexbuffer_handle; void * colorbuffer_ptr[2]; void * zbuffer_ptr; float * vertexbuffer_ptr; // colorbuffer colorbuffer_handle[0] = create_buffer(fd, colorbuffer_size, &colorbuffer_ptr[0]); colorbuffer_handle[1] = create_buffer(fd, colorbuffer_size, &colorbuffer_ptr[1]); zbuffer_handle = create_buffer(fd, colorbuffer_size, &zbuffer_ptr); //flush_handle = create_flush_buffer(fd); texturebuffer_handle = load_textures(fd, textures, textures_length); fprintf(stderr, "colorbuffer handle[0] %d\n", colorbuffer_handle[0]); fprintf(stderr, "colorbuffer handle[1] %d\n", colorbuffer_handle[1]); fprintf(stderr, "zbuffer handle %d\n", zbuffer_handle); int colorbuffer_ix = 0; float theta = PI * 0.5; const int floatbuffer_width = 8; const int floatbuffer_height = 8; floatbuffer_state state = create_floatbuffers(fd, floatbuffer_width * floatbuffer_height); vertexbuffer_handle = init_particles_vertexbuffer(fd, state.length, &vertexbuffer_ptr); fprintf(stderr, "vertexbuffer handle %d\n", vertexbuffer_handle); while (true) { assert(state.flip == 0 || state.flip == 1); int fb_input = state.flip * 2; int fb_output = (!state.flip) * 2; { int ib_dwords = _floatbuffer(shaders, fb_input + 0, // input_reloc_index0, fb_input + 1, // input_reloc_index1, fb_output + 0, // output_reloc_index0, fb_output + 1, // output_reloc_index1, floatbuffer_width, floatbuffer_height); int ret = drm_radeon_cs2(fd, state.handles, 4, ib_dwords); assert(ret != -1); //printf("floatbuffer return %d\n", ret); //check_particles2(state); } { int ib_dwords = indirect_buffer(shaders, state, theta, vertexbuffer_ptr); int ret = drm_radeon_cs(fd, colorbuffer_handle[colorbuffer_ix], zbuffer_handle, vertexbuffer_handle, texturebuffer_handle, textures_length, ib_dwords); if (ret == -1) break; } primary_surface_address(rmmio, colorbuffer_ix); // next state theta += 0.01f; colorbuffer_ix = (colorbuffer_ix + 1) & 1; state.flip = (state.flip + 1) & 1; // // update particles // /* for (int i = 0; i < particles_length; i++) { if (particles[i].age <= 0) { particles[i].age += max_age; reset_particle(particles[i]); } else { particles[i].age -= 0.01f; particles[i].position += vec3(particles[i].velocity.x * 0.9f, particles[i].velocity.y * 5.0f, particles[i].velocity.z * 0.9f); particles[i].velocity += vec3(0, -0.04, 0); if (particles[i].position.y < 0) { particles[i].position.y = fabsf(particles[i].position.y); particles[i].velocity.y *= -0.6f; } } } */ } { printf("colorbuffer0.data\n"); int out_fd = open("colorbuffer0.data", O_RDWR|O_CREAT, 0644); assert(out_fd >= 0); ssize_t write_length = write(out_fd, colorbuffer_ptr[0], colorbuffer_size); assert(write_length == colorbuffer_size); close(out_fd); } { printf("zbuffer.data\n"); int out_fd = open("zbuffer.data", O_RDWR|O_CREAT, 0644); assert(out_fd >= 0); ssize_t write_length = write(out_fd, zbuffer_ptr, colorbuffer_size); assert(write_length == colorbuffer_size); close(out_fd); } close(fd); }