r500/drm2/matrix_cubesphere.cpp
2025-11-04 00:00:47 -06:00

543 lines
14 KiB
C++

#include <assert.h>
#include <errno.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <stdlib.h>
#include <math.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <xf86drm.h>
#include <libdrm/radeon_drm.h>
#include "r500/3d_registers.h"
#include "r500/3d_registers_undocumented.h"
#include "r500/3d_registers_bits.h"
#include "r500/indirect_buffer.h"
#include "r500/shader.h"
#include "r500/display_controller.h"
#include "r500/buffer.h"
#include "file.h"
#include "math/float_types.hpp"
#include "math/transform.hpp"
#include "math/constants.hpp"
#include "../model/model2.h"
#include "../model/cubesphere.h"
#define COLORBUFFER_RELOC_INDEX 0
#define ZBUFFER_RELOC_INDEX 1
#define TEXTURE_RELOC_INDEX 2
#define CUBESPHERE_SHADER 0
#define CLEAR_SHADER 1
const char * vertex_shader_paths[] = {
"matrix_cubesphere.vs.bin",
"clear.vs.bin",
};
const int vertex_shader_paths_length = (sizeof (vertex_shader_paths)) / (sizeof (vertex_shader_paths[0]));
const char * fragment_shader_paths[] = {
"matrix_cubesphere.fs.bin",
"clear.fs.bin",
};
const int fragment_shader_paths_length = (sizeof (fragment_shader_paths)) / (sizeof (fragment_shader_paths[0]));
struct shaders {
struct shader_offset * vertex;
struct shader_offset * fragment;
int vertex_length;
int fragment_length;
};
void _3d_clear(struct shaders& shaders)
{
ib_zbuffer(ZBUFFER_RELOC_INDEX, 7); // always
ib_rs_instructions(0);
ib_texture__0();
ib_vap_stream_cntl__2();
// shaders
T0V(US_PIXSIZE
, US_PIXSIZE__PIX_SIZE(1)
);
ib_ga_us(&shaders.fragment[CLEAR_SHADER]);
ib_vap_pvs(&shaders.vertex[CLEAR_SHADER]);
//////////////////////////////////////////////////////////////////////////////
// VAP INDEX
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_INDEX_OFFSET, 0);
T0V(VAP_VF_MAX_VTX_INDX
, VAP_VF_MAX_VTX_INDX__MAX_INDX(0)
);
T0V(VAP_VF_MIN_VTX_INDX
, VAP_VF_MIN_VTX_INDX__MIN_INDX(0)
);
//////////////////////////////////////////////////////////////////////////////
// VAP
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_CLIP_CNTL
, VAP_CLIP_CNTL__CLIP_DISABLE(1)
);
T0V(VAP_VTE_CNTL
, VAP_VTE_CNTL__VTX_XY_FMT(1)
| VAP_VTE_CNTL__VTX_Z_FMT(1)
);
T0V(VAP_CNTL_STATUS
, VAP_CNTL_STATUS__PVS_BYPASS(0)
);
T0V(VAP_VTX_SIZE
, VAP_VTX_SIZE__DWORDS_PER_VTX(2)
);
T0V(VAP_OUT_VTX_FMT_0
, VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1));
T0V(VAP_OUT_VTX_FMT_1
, 0x0);
//////////////////////////////////////////////////////////////////////////////
// GA POINT SIZE
//////////////////////////////////////////////////////////////////////////////
T0V(GA_POINT_SIZE
, GA_POINT_SIZE__HEIGHT(600 * 12)
| GA_POINT_SIZE__WIDTH(800 * 12)
);
//////////////////////////////////////////////////////////////////////////////
// 3D_DRAW
//////////////////////////////////////////////////////////////////////////////
const float center[] = {
800.0f, 600.0f,
};
T3(_3D_DRAW_IMMD_2, (1 + 2) - 1);
TU( VAP_VF_CNTL__PRIM_TYPE(1) // point list
| VAP_VF_CNTL__PRIM_WALK(3)
| VAP_VF_CNTL__INDEX_SIZE(0)
| VAP_VF_CNTL__VTX_REUSE_DIS(0)
| VAP_VF_CNTL__DUAL_INDEX_MODE(0)
| VAP_VF_CNTL__USE_ALT_NUM_VERTS(0)
| VAP_VF_CNTL__NUM_VERTICES(1)
);
for (int i = 0; i < 2; i++) {
TF(center[i]);
}
}
mat4x4 perspective(float low1, float high1,
float low2, float high2,
float low3, float high3)
{
float scale2 = (high2 - low2) / (high1 - low1);
float scale3 = (high3 - low3) / (high1 - low1);
mat4x4 m1 = mat4x4(1, 0, 0, 0,
0, 1, 0, 0,
0, 0, 1, -low1,
0, 0, 0, 1
);
mat4x4 m2 = mat4x4(1, 0, 0, 0,
0, 1, 0, 0,
0, 0, scale2, low2,
0, 0, scale3, low3
);
return m2 * m1;
}
void _3d_cube_inner(mat4x4 trans,
mat4x4 world_trans,
vec4 light_pos)
{
T0V(VAP_PVS_STATE_FLUSH_REG, 0x00000000);
//////////////////////////////////////////////////////////////////////////////
// VAP_PVS
//////////////////////////////////////////////////////////////////////////////
const float consts[] = {
// 0
trans[0][0], trans[0][1], trans[0][2], trans[0][3],
trans[1][0], trans[1][1], trans[1][2], trans[1][3],
trans[2][0], trans[2][1], trans[2][2], trans[2][3],
trans[3][0], trans[3][1], trans[3][2], trans[3][3],
// 4
world_trans[0][0], world_trans[0][1], world_trans[0][2], world_trans[0][3],
world_trans[1][0], world_trans[1][1], world_trans[1][2], world_trans[1][3],
world_trans[2][0], world_trans[2][1], world_trans[2][2], world_trans[2][3],
world_trans[3][0], world_trans[3][1], world_trans[3][2], world_trans[3][3],
// 8
light_pos.x, light_pos.y, light_pos.z, light_pos.w,
};
ib_vap_pvs_const_cntl(consts, (sizeof (consts)));
//////////////////////////////////////////////////////////////////////////////
// 3D_DRAW
//////////////////////////////////////////////////////////////////////////////
const model * model = &cubesphere_model;
const object * obj = model->object[0];
const int triangle_count = obj->triangle_count;
const int vertex_count = triangle_count * 3;
T3(_3D_DRAW_IMMD_2, (1 + vertex_count * 8) - 1);
TU( VAP_VF_CNTL__PRIM_TYPE(4)
| VAP_VF_CNTL__PRIM_WALK(3)
| VAP_VF_CNTL__INDEX_SIZE(0)
| VAP_VF_CNTL__VTX_REUSE_DIS(0)
| VAP_VF_CNTL__DUAL_INDEX_MODE(0)
| VAP_VF_CNTL__USE_ALT_NUM_VERTS(0)
| VAP_VF_CNTL__NUM_VERTICES(vertex_count)
);
for (int i = 0; i < triangle_count; i++) {
for (int j = 0; j < 3; j++) {
vec3 p = model->position[obj->triangle[i][j].position];
vec2 t = model->texture[obj->triangle[i][j].texture];
vec3 n = model->normal[obj->triangle[i][j].normal];
TF(p.x);
TF(p.y);
TF(p.z);
TF(t.x);
TF(t.y);
TF(n.x);
TF(n.y);
TF(n.z);
}
}
}
void _3d_cube(struct shaders& shaders,
float theta)
{
ib_zbuffer(ZBUFFER_RELOC_INDEX, 1); // less
ib_rs_instructions(4);
ib_texture__1(TEXTURE_RELOC_INDEX);
ib_vap_stream_cntl__323();
// shaders
T0V(US_PIXSIZE
, US_PIXSIZE__PIX_SIZE(4)
);
ib_ga_us(&shaders.fragment[CUBESPHERE_SHADER]);
ib_vap_pvs(&shaders.vertex[CUBESPHERE_SHADER]);
//////////////////////////////////////////////////////////////////////////////
// VAP
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_VTX_SIZE
, VAP_VTX_SIZE__DWORDS_PER_VTX(8)
);
T0V(VAP_CLIP_CNTL
, VAP_CLIP_CNTL__PS_UCP_MODE(3)
);
T0V(VAP_VTE_CNTL
, VAP_VTE_CNTL__VPORT_X_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_X_OFFSET_ENA(1)
| VAP_VTE_CNTL__VPORT_Y_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_Y_OFFSET_ENA(1)
| VAP_VTE_CNTL__VPORT_Z_SCALE_ENA(0)
| VAP_VTE_CNTL__VPORT_Z_OFFSET_ENA(0)
| VAP_VTE_CNTL__VTX_XY_FMT(0)
| VAP_VTE_CNTL__VTX_Z_FMT(1)
| VAP_VTE_CNTL__VTX_W0_FMT(1)
| VAP_VTE_CNTL__SERIAL_PROC_ENA(0)
);
T0V(VAP_CNTL_STATUS
, VAP_CNTL_STATUS__PVS_BYPASS(0)
);
//////////////////////////////////////////////////////////////////////////////
// VAP INDEX
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_INDEX_OFFSET, 0);
T0V(VAP_VF_MAX_VTX_INDX
, VAP_VF_MAX_VTX_INDX__MAX_INDX(0)
);
T0V(VAP_VF_MIN_VTX_INDX
, VAP_VF_MIN_VTX_INDX__MIN_INDX(0)
);
//////////////////////////////////////////////////////////////////////////////
// VAP OUT
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_OUT_VTX_FMT_0
, VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1));
T0V(VAP_OUT_VTX_FMT_1
, VAP_OUT_VTX_FMT_1__TEX_0_COMP_CNT(4)
| VAP_OUT_VTX_FMT_1__TEX_1_COMP_CNT(4)
| VAP_OUT_VTX_FMT_1__TEX_2_COMP_CNT(4)
| VAP_OUT_VTX_FMT_1__TEX_3_COMP_CNT(4));
//////////////////////////////////////////////////////////////////////////////
// matrix
//////////////////////////////////////////////////////////////////////////////
float theta1 = theta;
float theta2 = theta;
mat4x4 aspect = scale(vec3(3.0f/4.0f, 1, 1));
mat4x4 p = perspective(0.01f, 5.0f,
0.001f, 0.999f,
0.5f, 2.0f);
vec4 light_pos = vec4(0, 0, 0, 1.0f);
// light
if (1) {
mat4x4 t = translate(vec3(0, 0, 3));
mat4x4 t1 = translate(vec3(1, 0, 0));
mat4x4 s = scale(0.1f);
mat4x4 rz = rotate_y(theta * 2.f);
mat4x4 world_trans = rz * t1 * s;
//mat3x3 normal_trans = transpose(inverse(submatrix(world_trans, 3, 3)));
mat4x4 trans = aspect * p * t * world_trans;
light_pos = world_trans * light_pos;
_3d_cube_inner(trans, world_trans, light_pos);
}
// object
if (1) {
mat4x4 t = translate(vec3(0, 0, 3));
mat4x4 rx = rotate_x(1 * theta1 * 0.5f);
mat4x4 ry = rotate_y(0 * theta2 * 0.8f + 1.4f);
mat4x4 s = scale(0.9f);
mat4x4 world_trans = rx * ry * s;
//mat3x3 normal_trans = transpose(inverse(submatrix(world_trans, 3, 3)));
mat4x4 trans = aspect * p * t * world_trans;
_3d_cube_inner(trans, world_trans, light_pos);
}
}
int indirect_buffer(shaders& shaders,
float theta)
{
ib_ix = 0;
ib_generic_initialization();
T0V(US_OUT_FMT_0
, US_OUT_FMT__OUT_FMT(0) // C4_8
| US_OUT_FMT__C0_SEL(3) // Blue
| US_OUT_FMT__C1_SEL(2) // Green
| US_OUT_FMT__C2_SEL(1) // Red
| US_OUT_FMT__C3_SEL(0) // Alpha
| US_OUT_FMT__OUT_SIGN(0)
);
T0V(US_OUT_FMT_1
, US_OUT_FMT__OUT_FMT(15) // render target is not used
);
T0V(US_OUT_FMT_2
, US_OUT_FMT__OUT_FMT(15) // render target is not used
);
T0V(US_OUT_FMT_2
, US_OUT_FMT__OUT_FMT(15) // render target is not used
);
ib_colorbuffer(COLORBUFFER_RELOC_INDEX);
load_pvs_shaders(shaders.vertex, shaders.vertex_length);
load_us_shaders(shaders.fragment, shaders.fragment_length);
//////////////////////////////////////////////////////////////////////////////
// DRAW
//////////////////////////////////////////////////////////////////////////////
_3d_clear(shaders);
_3d_cube(shaders, theta);
//////////////////////////////////////////////////////////////////////////////
// padding
//////////////////////////////////////////////////////////////////////////////
while ((ib_ix % 8) != 0) {
TU(0x80000000);
}
return ib_ix;
}
int main()
{
struct shaders shaders = {
.vertex = load_shaders(vertex_shader_paths, vertex_shader_paths_length),
.fragment = load_shaders(fragment_shader_paths, fragment_shader_paths_length),
.vertex_length = vertex_shader_paths_length,
.fragment_length = fragment_shader_paths_length,
};
void * rmmio = map_pci_resource2();
//////////////////////////////////////////////////////////////////////////////
// DRI card0
//////////////////////////////////////////////////////////////////////////////
int ret;
int fd = open("/dev/dri/card0", O_RDWR | O_CLOEXEC);
assert(fd != -1);
const int colorbuffer_size = 1600 * 1200 * 4;
int colorbuffer_handle[2];
int zbuffer_handle;
int texturebuffer_handle;
int flush_handle;
void * colorbuffer_ptr[2];
void * zbuffer_ptr;
// colorbuffer
colorbuffer_handle[0] = create_buffer(fd, colorbuffer_size, &colorbuffer_ptr[0]);
colorbuffer_handle[1] = create_buffer(fd, colorbuffer_size, &colorbuffer_ptr[1]);
zbuffer_handle = create_buffer(fd, colorbuffer_size, &zbuffer_ptr);
flush_handle = create_flush_buffer(fd);
fprintf(stderr, "colorbuffer handle[0] %d\n", colorbuffer_handle[0]);
fprintf(stderr, "colorbuffer handle[1] %d\n", colorbuffer_handle[1]);
fprintf(stderr, "zbuffer handle %d\n", zbuffer_handle);
// texture
{
const int texture_size = 1024 * 1024 * 4;
void * texturebuffer_ptr;
texturebuffer_handle = create_buffer(fd, texture_size, &texturebuffer_ptr);
void * texture_buf = file_read("../texture/butterfly_1024x1024_argb8888.data", NULL);
assert(texture_buf != NULL);
for (int i = 0; i < texture_size / 4; i++) {
((uint32_t*)texturebuffer_ptr)[i] = ((uint32_t*)texture_buf)[i];
}
asm volatile ("" ::: "memory");
free(texture_buf);
munmap(texturebuffer_ptr, texture_size);
}
uint32_t flags[2] = {
5, // RADEON_CS_KEEP_TILING_FLAGS | RADEON_CS_END_OF_FRAME
0, // RADEON_CS_RING_GFX
};
int colorbuffer_ix = 0;
float theta = 0;
while (true) {
int ib_dwords = indirect_buffer(shaders, theta);
struct drm_radeon_cs_reloc relocs[] = {
{
.handle = colorbuffer_handle[colorbuffer_ix],
.read_domains = 4, // RADEON_GEM_DOMAIN_VRAM
.write_domain = 4, // RADEON_GEM_DOMAIN_VRAM
.flags = 8,
},
{
.handle = zbuffer_handle,
.read_domains = 4, // RADEON_GEM_DOMAIN_VRAM
.write_domain = 4, // RADEON_GEM_DOMAIN_VRAM
.flags = 8,
},
{
.handle = texturebuffer_handle,
.read_domains = 4, // RADEON_GEM_DOMAIN_VRAM
.write_domain = 4, // RADEON_GEM_DOMAIN_VRAM
.flags = 8,
},
{
.handle = flush_handle,
.read_domains = 2, // RADEON_GEM_DOMAIN_GTT
.write_domain = 2, // RADEON_GEM_DOMAIN_GTT
.flags = 0,
}
};
struct drm_radeon_cs_chunk chunks[3] = {
{
.chunk_id = RADEON_CHUNK_ID_IB,
.length_dw = ib_dwords,
.chunk_data = (uint64_t)(uintptr_t)ib,
},
{
.chunk_id = RADEON_CHUNK_ID_RELOCS,
.length_dw = (sizeof (relocs)) / (sizeof (uint32_t)),
.chunk_data = (uint64_t)(uintptr_t)relocs,
},
{
.chunk_id = RADEON_CHUNK_ID_FLAGS,
.length_dw = (sizeof (flags)) / (sizeof (uint32_t)),
.chunk_data = (uint64_t)(uintptr_t)&flags,
},
};
uint64_t chunks_array[3] = {
(uint64_t)(uintptr_t)&chunks[0],
(uint64_t)(uintptr_t)&chunks[1],
(uint64_t)(uintptr_t)&chunks[2],
};
struct drm_radeon_cs cs = {
.num_chunks = 3,
.cs_id = 0,
.chunks = (uint64_t)(uintptr_t)chunks_array,
.gart_limit = 0,
.vram_limit = 0,
};
ret = drmCommandWriteRead(fd, DRM_RADEON_CS, &cs, (sizeof (struct drm_radeon_cs)));
if (ret != 0) {
perror("drmCommandWriteRead(DRM_RADEON_CS)");
}
primary_surface_address(rmmio, colorbuffer_ix);
// next state
theta += 0.01f;
colorbuffer_ix = (colorbuffer_ix + 1) & 1;
}
close(fd);
}