diff --git a/drm/math/constants.hpp b/drm/math/constants.hpp new file mode 100644 index 0000000..59f4482 --- /dev/null +++ b/drm/math/constants.hpp @@ -0,0 +1,5 @@ +#pragma once + +#define PI (3.14159274101257324219f) +#define PI_2 (PI * 2.0f) +#define I_PI_2 (1.0f / (PI_2)) diff --git a/drm2/Makefile b/drm2/Makefile new file mode 100644 index 0000000..3f1c9a3 --- /dev/null +++ b/drm2/Makefile @@ -0,0 +1,63 @@ +OPT = -O0 + +CFLAGS += -g +CFLAGS += -Wall -Werror -Wfatal-errors +CFLAGS += -Wno-error=unused-variable +CFLAGS += -Wno-error=unused-but-set-variable +CFLAGS += -Wno-narrowing +CFLAGS += $(shell pkg-config --cflags libdrm) + +LDFLAGS += $(shell pkg-config --libs libdrm) -lm + +VERTEX_SHADERS = $(patsubst %.asm,%.bin,$(wildcard *.vs.asm)) +FRAGMENT_SHADERS = $(patsubst %.asm,%.bin,$(wildcard *.fs.asm)) +SHADER_BIN = $(VERTEX_SHADERS) $(FRAGMENT_SHADERS) + +R500_COMMON = \ + r500/buffer.o \ + r500/display_controller.o \ + r500/indirect_buffer.o \ + r500/shader.o \ + file.o + +matrix_cubesphere: $(R500_COMMON) matrix_cubesphere.o | shaders + $(CXX) $(LDFLAGS) $^ -o $@ + +matrix_cubesphere2: $(R500_COMMON) matrix_cubesphere2.o | shaders + $(CXX) $(LDFLAGS) $^ -o $@ + +%.o: %.c + $(CC) $(ARCH) $(CFLAGS) $(OPT) -c $< -o $@ + +%.o: %.cpp + $(CXX) $(ARCH) $(CFLAGS) $(OPT) -c $< -o $@ + +%.vs.bin: %.vs.asm + PYTHONPATH=../regs/ python -m assembler.vs $< $@ + +%.fs.bin: %.fs.asm + PYTHONPATH=../regs/ python -m assembler.fs $< $@ + +%.vs.inc: %.vs.asm + PYTHONPATH=../regs/ python -m assembler.vs $< > $@ + +%.fs.inc: %.fs.asm + PYTHONPATH=../regs/ python -m assembler.fs $< > $@ + +shaders: $(SHADER_BIN) + @true + +#find . -type f ! -name "*.*" -delete +clean: + find . -type f -name "*.o" -delete + +.SUFFIXES: +.INTERMEDIATE: +.SECONDARY: +.PHONY: all clean phony + +%: RCS/%,v +%: RCS/% +%: %,v +%: s.% +%: SCCS/s.% diff --git a/drm2/clear.fs.asm b/drm2/clear.fs.asm new file mode 100644 index 0000000..f5d1003 --- /dev/null +++ b/drm2/clear.fs.asm @@ -0,0 +1,4 @@ +OUT TEX_SEM_WAIT +: + out[0].a = MAX src0.1 src0.1 , + out[0].rgb = MAX src0.000 src0.000 ; diff --git a/drm2/clear.fs.bin b/drm2/clear.fs.bin new file mode 100644 index 0000000..0efda5d Binary files /dev/null and b/drm2/clear.fs.bin differ diff --git a/drm2/clear.vs.asm b/drm2/clear.vs.asm new file mode 100644 index 0000000..80f7079 --- /dev/null +++ b/drm2/clear.vs.asm @@ -0,0 +1 @@ +out[0].xyzw = VE_ADD input[0].xyzw input[0].0000 ; diff --git a/drm2/clear.vs.bin b/drm2/clear.vs.bin new file mode 100644 index 0000000..69e1685 Binary files /dev/null and b/drm2/clear.vs.bin differ diff --git a/drm2/file.c b/drm2/file.c new file mode 100644 index 0000000..62e7b58 --- /dev/null +++ b/drm2/file.c @@ -0,0 +1,50 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "file.h" + +void * file_read(const char * path, int * size_out) +{ + int fd = open(path, O_RDONLY); + if (fd == -1) { + fprintf(stderr, "open(%s): %s\n", path, strerror(errno)); + return NULL; + } + + off_t size = lseek(fd, 0, SEEK_END); + if (size == (off_t)-1) { + fprintf(stderr, "lseek(%s, SEEK_END): %s\n", path, strerror(errno)); + return NULL; + } + + off_t start = lseek(fd, 0, SEEK_SET); + if (start == (off_t)-1) { + fprintf(stderr, "lseek(%s, SEEK_SET): %s\n", path, strerror(errno)); + return NULL; + } + + void * buf = malloc(size); + + ssize_t read_size = read(fd, buf, size); + if (read_size == -1) { + fprintf(stderr, "read(%s): %s\n", path, strerror(errno)); + return NULL; + } + + int ret = close(fd); + if (ret == -1) { + fprintf(stderr, "close(%s): %s\n", path, strerror(errno)); + return NULL; + } + + if (size_out != NULL) { + *size_out = size; + } + + return buf; +} diff --git a/drm2/file.h b/drm2/file.h new file mode 100644 index 0000000..5306990 --- /dev/null +++ b/drm2/file.h @@ -0,0 +1,11 @@ +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +void * file_read(const char * path, int * size_out); + +#ifdef __cplusplus +} +#endif diff --git a/drm2/math b/drm2/math new file mode 120000 index 0000000..16432a3 --- /dev/null +++ b/drm2/math @@ -0,0 +1 @@ +../drm/math \ No newline at end of file diff --git a/drm2/matrix_cubesphere.cpp b/drm2/matrix_cubesphere.cpp new file mode 100644 index 0000000..9798a07 --- /dev/null +++ b/drm2/matrix_cubesphere.cpp @@ -0,0 +1,542 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +#include "r500/3d_registers.h" +#include "r500/3d_registers_undocumented.h" +#include "r500/3d_registers_bits.h" +#include "r500/indirect_buffer.h" +#include "r500/shader.h" +#include "r500/display_controller.h" +#include "r500/buffer.h" + +#include "file.h" + +#include "math/float_types.hpp" +#include "math/transform.hpp" +#include "math/constants.hpp" + +#include "../model/model2.h" +#include "../model/cubesphere.h" + +#define COLORBUFFER_RELOC_INDEX 0 +#define ZBUFFER_RELOC_INDEX 1 +#define TEXTURE_RELOC_INDEX 2 + +#define CUBESPHERE_SHADER 0 +#define CLEAR_SHADER 1 + +const char * vertex_shader_paths[] = { + "matrix_cubesphere.vs.bin", + "clear.vs.bin", +}; +const int vertex_shader_paths_length = (sizeof (vertex_shader_paths)) / (sizeof (vertex_shader_paths[0])); +const char * fragment_shader_paths[] = { + "matrix_cubesphere.fs.bin", + "clear.fs.bin", +}; +const int fragment_shader_paths_length = (sizeof (fragment_shader_paths)) / (sizeof (fragment_shader_paths[0])); + +struct shaders { + struct shader_offset * vertex; + struct shader_offset * fragment; + int vertex_length; + int fragment_length; +}; + +void _3d_clear(struct shaders& shaders) +{ + ib_zbuffer(ZBUFFER_RELOC_INDEX, 7); // always + + ib_rs_instructions(0); + + ib_texture__0(); + + ib_vap_stream_cntl__2(); + + // shaders + T0V(US_PIXSIZE + , US_PIXSIZE__PIX_SIZE(1) + ); + ib_ga_us(&shaders.fragment[CLEAR_SHADER]); + ib_vap_pvs(&shaders.vertex[CLEAR_SHADER]); + + ////////////////////////////////////////////////////////////////////////////// + // VAP INDEX + ////////////////////////////////////////////////////////////////////////////// + + T0V(VAP_INDEX_OFFSET, 0); + + T0V(VAP_VF_MAX_VTX_INDX + , VAP_VF_MAX_VTX_INDX__MAX_INDX(0) + ); + T0V(VAP_VF_MIN_VTX_INDX + , VAP_VF_MIN_VTX_INDX__MIN_INDX(0) + ); + + ////////////////////////////////////////////////////////////////////////////// + // VAP + ////////////////////////////////////////////////////////////////////////////// + + T0V(VAP_CLIP_CNTL + , VAP_CLIP_CNTL__CLIP_DISABLE(1) + ); + + T0V(VAP_VTE_CNTL + , VAP_VTE_CNTL__VTX_XY_FMT(1) + | VAP_VTE_CNTL__VTX_Z_FMT(1) + ); + + T0V(VAP_CNTL_STATUS + , VAP_CNTL_STATUS__PVS_BYPASS(0) + ); + + T0V(VAP_VTX_SIZE + , VAP_VTX_SIZE__DWORDS_PER_VTX(2) + ); + + T0V(VAP_OUT_VTX_FMT_0 + , VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1)); + T0V(VAP_OUT_VTX_FMT_1 + , 0x0); + + ////////////////////////////////////////////////////////////////////////////// + // GA POINT SIZE + ////////////////////////////////////////////////////////////////////////////// + + T0V(GA_POINT_SIZE + , GA_POINT_SIZE__HEIGHT(600 * 12) + | GA_POINT_SIZE__WIDTH(800 * 12) + ); + + ////////////////////////////////////////////////////////////////////////////// + // 3D_DRAW + ////////////////////////////////////////////////////////////////////////////// + + const float center[] = { + 800.0f, 600.0f, + }; + T3(_3D_DRAW_IMMD_2, (1 + 2) - 1); + TU( VAP_VF_CNTL__PRIM_TYPE(1) // point list + | VAP_VF_CNTL__PRIM_WALK(3) + | VAP_VF_CNTL__INDEX_SIZE(0) + | VAP_VF_CNTL__VTX_REUSE_DIS(0) + | VAP_VF_CNTL__DUAL_INDEX_MODE(0) + | VAP_VF_CNTL__USE_ALT_NUM_VERTS(0) + | VAP_VF_CNTL__NUM_VERTICES(1) + ); + for (int i = 0; i < 2; i++) { + TF(center[i]); + } +} + +mat4x4 perspective(float low1, float high1, + float low2, float high2, + float low3, float high3) +{ + float scale2 = (high2 - low2) / (high1 - low1); + float scale3 = (high3 - low3) / (high1 - low1); + + mat4x4 m1 = mat4x4(1, 0, 0, 0, + 0, 1, 0, 0, + 0, 0, 1, -low1, + 0, 0, 0, 1 + ); + + mat4x4 m2 = mat4x4(1, 0, 0, 0, + 0, 1, 0, 0, + 0, 0, scale2, low2, + 0, 0, scale3, low3 + ); + + return m2 * m1; +} + +void _3d_cube_inner(mat4x4 trans, + mat4x4 world_trans, + vec4 light_pos) +{ + T0V(VAP_PVS_STATE_FLUSH_REG, 0x00000000); + + ////////////////////////////////////////////////////////////////////////////// + // VAP_PVS + ////////////////////////////////////////////////////////////////////////////// + + const float consts[] = { + // 0 + trans[0][0], trans[0][1], trans[0][2], trans[0][3], + trans[1][0], trans[1][1], trans[1][2], trans[1][3], + trans[2][0], trans[2][1], trans[2][2], trans[2][3], + trans[3][0], trans[3][1], trans[3][2], trans[3][3], + + // 4 + world_trans[0][0], world_trans[0][1], world_trans[0][2], world_trans[0][3], + world_trans[1][0], world_trans[1][1], world_trans[1][2], world_trans[1][3], + world_trans[2][0], world_trans[2][1], world_trans[2][2], world_trans[2][3], + world_trans[3][0], world_trans[3][1], world_trans[3][2], world_trans[3][3], + + // 8 + light_pos.x, light_pos.y, light_pos.z, light_pos.w, + }; + ib_vap_pvs_const_cntl(consts, (sizeof (consts))); + + ////////////////////////////////////////////////////////////////////////////// + // 3D_DRAW + ////////////////////////////////////////////////////////////////////////////// + + const model * model = &cubesphere_model; + const object * obj = model->object[0]; + const int triangle_count = obj->triangle_count; + const int vertex_count = triangle_count * 3; + + T3(_3D_DRAW_IMMD_2, (1 + vertex_count * 8) - 1); + TU( VAP_VF_CNTL__PRIM_TYPE(4) + | VAP_VF_CNTL__PRIM_WALK(3) + | VAP_VF_CNTL__INDEX_SIZE(0) + | VAP_VF_CNTL__VTX_REUSE_DIS(0) + | VAP_VF_CNTL__DUAL_INDEX_MODE(0) + | VAP_VF_CNTL__USE_ALT_NUM_VERTS(0) + | VAP_VF_CNTL__NUM_VERTICES(vertex_count) + ); + + for (int i = 0; i < triangle_count; i++) { + for (int j = 0; j < 3; j++) { + vec3 p = model->position[obj->triangle[i][j].position]; + vec2 t = model->texture[obj->triangle[i][j].texture]; + vec3 n = model->normal[obj->triangle[i][j].normal]; + + TF(p.x); + TF(p.y); + TF(p.z); + TF(t.x); + TF(t.y); + TF(n.x); + TF(n.y); + TF(n.z); + } + } +} + +void _3d_cube(struct shaders& shaders, + float theta) +{ + ib_zbuffer(ZBUFFER_RELOC_INDEX, 1); // less + + ib_rs_instructions(4); + + ib_texture__1(TEXTURE_RELOC_INDEX); + + ib_vap_stream_cntl__323(); + + // shaders + T0V(US_PIXSIZE + , US_PIXSIZE__PIX_SIZE(4) + ); + ib_ga_us(&shaders.fragment[CUBESPHERE_SHADER]); + ib_vap_pvs(&shaders.vertex[CUBESPHERE_SHADER]); + + ////////////////////////////////////////////////////////////////////////////// + // VAP + ////////////////////////////////////////////////////////////////////////////// + + T0V(VAP_VTX_SIZE + , VAP_VTX_SIZE__DWORDS_PER_VTX(8) + ); + + T0V(VAP_CLIP_CNTL + , VAP_CLIP_CNTL__PS_UCP_MODE(3) + ); + + T0V(VAP_VTE_CNTL + , VAP_VTE_CNTL__VPORT_X_SCALE_ENA(1) + | VAP_VTE_CNTL__VPORT_X_OFFSET_ENA(1) + | VAP_VTE_CNTL__VPORT_Y_SCALE_ENA(1) + | VAP_VTE_CNTL__VPORT_Y_OFFSET_ENA(1) + | VAP_VTE_CNTL__VPORT_Z_SCALE_ENA(0) + | VAP_VTE_CNTL__VPORT_Z_OFFSET_ENA(0) + | VAP_VTE_CNTL__VTX_XY_FMT(0) + | VAP_VTE_CNTL__VTX_Z_FMT(1) + | VAP_VTE_CNTL__VTX_W0_FMT(1) + | VAP_VTE_CNTL__SERIAL_PROC_ENA(0) + ); + + T0V(VAP_CNTL_STATUS + , VAP_CNTL_STATUS__PVS_BYPASS(0) + ); + + ////////////////////////////////////////////////////////////////////////////// + // VAP INDEX + ////////////////////////////////////////////////////////////////////////////// + + T0V(VAP_INDEX_OFFSET, 0); + + T0V(VAP_VF_MAX_VTX_INDX + , VAP_VF_MAX_VTX_INDX__MAX_INDX(0) + ); + T0V(VAP_VF_MIN_VTX_INDX + , VAP_VF_MIN_VTX_INDX__MIN_INDX(0) + ); + + ////////////////////////////////////////////////////////////////////////////// + // VAP OUT + ////////////////////////////////////////////////////////////////////////////// + + T0V(VAP_OUT_VTX_FMT_0 + , VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1)); + T0V(VAP_OUT_VTX_FMT_1 + , VAP_OUT_VTX_FMT_1__TEX_0_COMP_CNT(4) + | VAP_OUT_VTX_FMT_1__TEX_1_COMP_CNT(4) + | VAP_OUT_VTX_FMT_1__TEX_2_COMP_CNT(4) + | VAP_OUT_VTX_FMT_1__TEX_3_COMP_CNT(4)); + + ////////////////////////////////////////////////////////////////////////////// + // matrix + ////////////////////////////////////////////////////////////////////////////// + + float theta1 = theta; + float theta2 = theta; + + mat4x4 aspect = scale(vec3(3.0f/4.0f, 1, 1)); + + mat4x4 p = perspective(0.01f, 5.0f, + 0.001f, 0.999f, + 0.5f, 2.0f); + + vec4 light_pos = vec4(0, 0, 0, 1.0f); + + // light + if (1) { + mat4x4 t = translate(vec3(0, 0, 3)); + mat4x4 t1 = translate(vec3(1, 0, 0)); + mat4x4 s = scale(0.1f); + mat4x4 rz = rotate_y(theta * 2.f); + + mat4x4 world_trans = rz * t1 * s; + + //mat3x3 normal_trans = transpose(inverse(submatrix(world_trans, 3, 3))); + + mat4x4 trans = aspect * p * t * world_trans; + + light_pos = world_trans * light_pos; + + _3d_cube_inner(trans, world_trans, light_pos); + } + + + // object + if (1) { + mat4x4 t = translate(vec3(0, 0, 3)); + mat4x4 rx = rotate_x(1 * theta1 * 0.5f); + mat4x4 ry = rotate_y(0 * theta2 * 0.8f + 1.4f); + mat4x4 s = scale(0.9f); + + mat4x4 world_trans = rx * ry * s; + + //mat3x3 normal_trans = transpose(inverse(submatrix(world_trans, 3, 3))); + + mat4x4 trans = aspect * p * t * world_trans; + + _3d_cube_inner(trans, world_trans, light_pos); + } +} + +int indirect_buffer(shaders& shaders, + float theta) +{ + ib_ix = 0; + + ib_generic_initialization(); + + T0V(US_OUT_FMT_0 + , US_OUT_FMT__OUT_FMT(0) // C4_8 + | US_OUT_FMT__C0_SEL(3) // Blue + | US_OUT_FMT__C1_SEL(2) // Green + | US_OUT_FMT__C2_SEL(1) // Red + | US_OUT_FMT__C3_SEL(0) // Alpha + | US_OUT_FMT__OUT_SIGN(0) + ); + T0V(US_OUT_FMT_1 + , US_OUT_FMT__OUT_FMT(15) // render target is not used + ); + T0V(US_OUT_FMT_2 + , US_OUT_FMT__OUT_FMT(15) // render target is not used + ); + T0V(US_OUT_FMT_2 + , US_OUT_FMT__OUT_FMT(15) // render target is not used + ); + + ib_colorbuffer(COLORBUFFER_RELOC_INDEX); + + load_pvs_shaders(shaders.vertex, shaders.vertex_length); + load_us_shaders(shaders.fragment, shaders.fragment_length); + + ////////////////////////////////////////////////////////////////////////////// + // DRAW + ////////////////////////////////////////////////////////////////////////////// + + _3d_clear(shaders); + _3d_cube(shaders, theta); + + ////////////////////////////////////////////////////////////////////////////// + // padding + ////////////////////////////////////////////////////////////////////////////// + + while ((ib_ix % 8) != 0) { + TU(0x80000000); + } + + return ib_ix; +} + +int main() +{ + struct shaders shaders = { + .vertex = load_shaders(vertex_shader_paths, vertex_shader_paths_length), + .fragment = load_shaders(fragment_shader_paths, fragment_shader_paths_length), + .vertex_length = vertex_shader_paths_length, + .fragment_length = fragment_shader_paths_length, + }; + + void * rmmio = map_pci_resource2(); + + ////////////////////////////////////////////////////////////////////////////// + // DRI card0 + ////////////////////////////////////////////////////////////////////////////// + + int ret; + int fd = open("/dev/dri/card0", O_RDWR | O_CLOEXEC); + assert(fd != -1); + + const int colorbuffer_size = 1600 * 1200 * 4; + int colorbuffer_handle[2]; + int zbuffer_handle; + int texturebuffer_handle; + int flush_handle; + + void * colorbuffer_ptr[2]; + void * zbuffer_ptr; + + // colorbuffer + colorbuffer_handle[0] = create_buffer(fd, colorbuffer_size, &colorbuffer_ptr[0]); + colorbuffer_handle[1] = create_buffer(fd, colorbuffer_size, &colorbuffer_ptr[1]); + zbuffer_handle = create_buffer(fd, colorbuffer_size, &zbuffer_ptr); + flush_handle = create_flush_buffer(fd); + + fprintf(stderr, "colorbuffer handle[0] %d\n", colorbuffer_handle[0]); + fprintf(stderr, "colorbuffer handle[1] %d\n", colorbuffer_handle[1]); + fprintf(stderr, "zbuffer handle %d\n", zbuffer_handle); + + // texture + { + const int texture_size = 1024 * 1024 * 4; + void * texturebuffer_ptr; + + texturebuffer_handle = create_buffer(fd, texture_size, &texturebuffer_ptr); + + void * texture_buf = file_read("../texture/butterfly_1024x1024_argb8888.data", NULL); + assert(texture_buf != NULL); + for (int i = 0; i < texture_size / 4; i++) { + ((uint32_t*)texturebuffer_ptr)[i] = ((uint32_t*)texture_buf)[i]; + } + asm volatile ("" ::: "memory"); + free(texture_buf); + munmap(texturebuffer_ptr, texture_size); + } + + uint32_t flags[2] = { + 5, // RADEON_CS_KEEP_TILING_FLAGS | RADEON_CS_END_OF_FRAME + 0, // RADEON_CS_RING_GFX + }; + + int colorbuffer_ix = 0; + + float theta = 0; + + while (true) { + int ib_dwords = indirect_buffer(shaders, theta); + + struct drm_radeon_cs_reloc relocs[] = { + { + .handle = colorbuffer_handle[colorbuffer_ix], + .read_domains = 4, // RADEON_GEM_DOMAIN_VRAM + .write_domain = 4, // RADEON_GEM_DOMAIN_VRAM + .flags = 8, + }, + { + .handle = zbuffer_handle, + .read_domains = 4, // RADEON_GEM_DOMAIN_VRAM + .write_domain = 4, // RADEON_GEM_DOMAIN_VRAM + .flags = 8, + }, + { + .handle = texturebuffer_handle, + .read_domains = 4, // RADEON_GEM_DOMAIN_VRAM + .write_domain = 4, // RADEON_GEM_DOMAIN_VRAM + .flags = 8, + }, + { + .handle = flush_handle, + .read_domains = 2, // RADEON_GEM_DOMAIN_GTT + .write_domain = 2, // RADEON_GEM_DOMAIN_GTT + .flags = 0, + } + }; + + struct drm_radeon_cs_chunk chunks[3] = { + { + .chunk_id = RADEON_CHUNK_ID_IB, + .length_dw = ib_dwords, + .chunk_data = (uint64_t)(uintptr_t)ib, + }, + { + .chunk_id = RADEON_CHUNK_ID_RELOCS, + .length_dw = (sizeof (relocs)) / (sizeof (uint32_t)), + .chunk_data = (uint64_t)(uintptr_t)relocs, + }, + { + .chunk_id = RADEON_CHUNK_ID_FLAGS, + .length_dw = (sizeof (flags)) / (sizeof (uint32_t)), + .chunk_data = (uint64_t)(uintptr_t)&flags, + }, + }; + + uint64_t chunks_array[3] = { + (uint64_t)(uintptr_t)&chunks[0], + (uint64_t)(uintptr_t)&chunks[1], + (uint64_t)(uintptr_t)&chunks[2], + }; + + struct drm_radeon_cs cs = { + .num_chunks = 3, + .cs_id = 0, + .chunks = (uint64_t)(uintptr_t)chunks_array, + .gart_limit = 0, + .vram_limit = 0, + }; + + ret = drmCommandWriteRead(fd, DRM_RADEON_CS, &cs, (sizeof (struct drm_radeon_cs))); + if (ret != 0) { + perror("drmCommandWriteRead(DRM_RADEON_CS)"); + } + + primary_surface_address(rmmio, colorbuffer_ix); + + // next state + theta += 0.01f; + colorbuffer_ix = (colorbuffer_ix + 1) & 1; + } + + close(fd); +} diff --git a/drm2/matrix_cubesphere.fs.asm b/drm2/matrix_cubesphere.fs.asm new file mode 100644 index 0000000..7ed97b7 --- /dev/null +++ b/drm2/matrix_cubesphere.fs.asm @@ -0,0 +1,50 @@ +-- temp[0] -- position (world space) +-- temp[1] -- normal +-- temp[2] -- light pos (world space) +-- temp[3] -- texture + +-- PIXSIZE 4 + +TEX TEX_SEM_WAIT TEX_SEM_ACQUIRE + temp[3].rgba = LD tex[0].rgba temp[3].rgaa ; + +-- normal = normalize(normal) +-- normal = (1.0 / sqrt(dot(normal, normal))) * normal +src0.rgb = temp[1] : + DP3 src0.rgb src0.rgb , + temp[1].a = DP ; +src0.a = temp[1] : + temp[1].a = RSQ |src0.a| ; +src0.a = temp[1], src0.rgb = temp[1] : + temp[1].rgb = MAD src0.rgb src0.aaa src0.000 ; + +-- light_dir = normalize((f_light_pos - f_world_pos)) +src1.rgb = temp[2] , -- f_light_pos +src0.rgb = temp[0] , -- f_world_pos +srcp.rgb = neg : -- (f_light_pos - f_world_pos) + DP3 srcp.rgb srcp.rgb , + temp[2].a = DP ; +src0.a = temp[2] : + temp[2].a = RSQ |src0.a| ; +src0.a = temp[2], src0.rgb = temp[2] : + temp[2].rgb = MAD src0.rgb src0.aaa src0.000 ; + +-- dot(normal, light_dir) +src0.rgb = temp[2] , +src1.rgb = temp[1] : + DP3 src0.rgb src1.rgb , + temp[4].a = DP ; + +src0.a = temp[4] : + temp[4].a = MAX src0.a src0.0 ; + +src0.a = temp[4] , +src1.a = float(32) : + temp[4].a = MAD src0.a src0.1 src1.a ; + +OUT TEX_SEM_WAIT +src0.a = temp[3], +src0.rgb = temp[3] , +src1.a = temp[4] : + out[0].a = MAX src0.a src0.a , + out[0].rgb = MAD src0.rgb src1.aaa src2.000 ; diff --git a/drm2/matrix_cubesphere.fs.bin b/drm2/matrix_cubesphere.fs.bin new file mode 100644 index 0000000..6605535 Binary files /dev/null and b/drm2/matrix_cubesphere.fs.bin differ diff --git a/drm2/matrix_cubesphere.vs.asm b/drm2/matrix_cubesphere.vs.asm new file mode 100644 index 0000000..45d6fd4 --- /dev/null +++ b/drm2/matrix_cubesphere.vs.asm @@ -0,0 +1,45 @@ +-- +-- dot(m[0], v), dot(m[1], v), dot(m[2], v), dot(m[3], v) +-- + +-- input[0] -- position +-- input[1] -- texture +-- input[2] -- normal + +-- consts[0] -- trans +-- consts[4] -- world_trans +-- consts[8] -- normal_trans + +-- out[0] -- position clip space +-- out[1] -- texture +-- out[2] -- normal +-- out[3] -- object position world space +-- out[4] -- light position world space + +-- position clip space +temp[1].x = VE_DOT const[0].xyzw input[0].xyzw ; +temp[1].y = VE_DOT const[1].xyzw input[0].xyzw ; +temp[1].z = VE_DOT const[2].xyzw input[0].xyzw ; +temp[1].w = VE_DOT const[3].xyzw input[0].xyzw ; + +-- position world space +temp[2].x = VE_DOT const[4].xyzw input[0].xyzw ; +temp[2].y = VE_DOT const[5].xyzw input[0].xyzw ; +temp[2].z = VE_DOT const[6].xyzw input[0].xyzw ; +temp[2].w = VE_DOT const[7].xyzw input[0].xyzw ; + +-- normal world space +temp[3].x = VE_DOT const[4].xyz0 input[2].xyz0 ; +temp[3].y = VE_DOT const[5].xyz0 input[2].xyz0 ; +temp[3].z = VE_DOT const[6].xyz0 input[2].xyz0 ; + +-- position (clip space) +out[0].xyzw = VE_ADD temp[1].xyzw const[0].0000 ; +-- position (world space) +out[1].xyzw = VE_ADD temp[2].xyzw const[0].0000 ; +-- normal +out[2].xyzw = VE_ADD temp[3].xyz0 const[0].0000 ; +-- light pos (world space) +out[3].xyzw = VE_ADD const[8].xyzw const[8].0000 ; +-- texture +out[4].xyzw = VE_ADD input[1].xy00 const[0].0000 ; diff --git a/drm2/matrix_cubesphere.vs.bin b/drm2/matrix_cubesphere.vs.bin new file mode 100644 index 0000000..2bbbcd3 Binary files /dev/null and b/drm2/matrix_cubesphere.vs.bin differ diff --git a/drm2/r500/3d_registers.h b/drm2/r500/3d_registers.h new file mode 120000 index 0000000..599facf --- /dev/null +++ b/drm2/r500/3d_registers.h @@ -0,0 +1 @@ +../../drm/3d_registers.h \ No newline at end of file diff --git a/drm2/r500/3d_registers_bits.h b/drm2/r500/3d_registers_bits.h new file mode 120000 index 0000000..56d8a33 --- /dev/null +++ b/drm2/r500/3d_registers_bits.h @@ -0,0 +1 @@ +../../drm/3d_registers_bits.h \ No newline at end of file diff --git a/drm2/r500/3d_registers_undocumented.h b/drm2/r500/3d_registers_undocumented.h new file mode 120000 index 0000000..5366590 --- /dev/null +++ b/drm2/r500/3d_registers_undocumented.h @@ -0,0 +1 @@ +../../drm/3d_registers_undocumented.h \ No newline at end of file diff --git a/drm2/r500/buffer.c b/drm2/r500/buffer.c new file mode 100644 index 0000000..1bcb368 --- /dev/null +++ b/drm2/r500/buffer.c @@ -0,0 +1,72 @@ +#include +#include +#include + +#include +#include + +#include "buffer.h" + +int create_buffer(int fd, int buffer_size, void ** out_ptr) +{ + int ret; + + struct drm_radeon_gem_create args = { + .size = buffer_size, + .alignment = 4096, + .handle = 0, + .initial_domain = 4, // RADEON_GEM_DOMAIN_VRAM + .flags = 4 + }; + + ret = drmCommandWriteRead(fd, DRM_RADEON_GEM_CREATE, &args, (sizeof (struct drm_radeon_gem_create))); + if (ret != 0) { + perror("drmCommandWriteRead(DRM_RADEON_GEM_CREATE)"); + } + assert(args.handle != 0); + + struct drm_radeon_gem_mmap mmap_args = { + .handle = args.handle, + .offset = 0, + .size = buffer_size, + }; + ret = drmCommandWriteRead(fd, DRM_RADEON_GEM_MMAP, &mmap_args, (sizeof (struct drm_radeon_gem_mmap))); + if (ret != 0) { + perror("drmCommandWriteRead(DRM_RADEON_GEM_MMAP)"); + } + + if (out_ptr != NULL) { + void * ptr = mmap(0, + buffer_size, + PROT_READ | PROT_WRITE, + MAP_SHARED, + fd, + mmap_args.addr_ptr); + assert(ptr != MAP_FAILED); + + *out_ptr = ptr; + } + + return args.handle; +} + +int create_flush_buffer(int fd) +{ + int ret; + + struct drm_radeon_gem_create args = { + .size = 4096, + .alignment = 4096, + .handle = 0, + .initial_domain = 2, // GTT + .flags = 0 + }; + + ret = drmCommandWriteRead(fd, DRM_RADEON_GEM_CREATE, + &args, (sizeof (args))); + if (ret != 0) { + perror("drmCommandWriteRead(DRM_RADEON_GEM_CREATE)"); + } + assert(args.handle != 0); + return args.handle; +} diff --git a/drm2/r500/buffer.h b/drm2/r500/buffer.h new file mode 100644 index 0000000..4405a87 --- /dev/null +++ b/drm2/r500/buffer.h @@ -0,0 +1,12 @@ +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +int create_buffer(int fd, int buffer_size, void ** out_ptr); +int create_flush_buffer(int fd); + +#ifdef __cplusplus +} +#endif diff --git a/drm2/r500/command_processor.h b/drm2/r500/command_processor.h new file mode 100644 index 0000000..25eb030 --- /dev/null +++ b/drm2/r500/command_processor.h @@ -0,0 +1,13 @@ +#pragma once + +#define _NOP 0x10 +#define _3D_LOAD_VBPNTR 0x2f +#define _3D_DRAW_VBUF_2 0x34 +#define _3D_DRAW_IMMD_2 0x35 + +#define TYPE_0_COUNT(c) (((c) & 0x3fff) << 16) +#define TYPE_0_ONE_REG (1 << 15) +#define TYPE_0_BASE_INDEX(i) (((i) & 0x1fff) << 0) + +#define TYPE_3_COUNT(c) (((c) & 0x3fff) << 16) +#define TYPE_3_OPCODE(o) (((o) & 0xff) << 8) diff --git a/drm2/r500/display_controller.c b/drm2/r500/display_controller.c new file mode 100644 index 0000000..aba2dae --- /dev/null +++ b/drm2/r500/display_controller.c @@ -0,0 +1,48 @@ +#include +#include +#include +#include + +#include "registers.h" + +void * map_pci_resource2() +{ + ////////////////////////////////////////////////////////////////////////////// + // PCI resource0 + ////////////////////////////////////////////////////////////////////////////// + const char * resource2_path = "/sys/bus/pci/devices/0000:01:00.0/resource2"; + int resource2_fd = open(resource2_path, O_RDWR | O_SYNC); + assert(resource2_fd >= 0); + + uint32_t resource2_size = 0x10000; + void * resource2_base = mmap(0, resource2_size, PROT_READ | PROT_WRITE, MAP_SHARED, resource2_fd, 0); + assert(resource2_base != MAP_FAILED); + + void * rmmio = resource2_base; + + return rmmio; +} + +void primary_surface_address(void * rmmio, int colorbuffer_ix) +{ +#define D1CRTC_DOUBLE_BUFFER_CONTROL 0x60ec +#define D1GRPH_PRIMARY_SURFACE_ADDRESS 0x6110 +#define D1GRPH_UPDATE 0x6144 +#define D1GRPH_UPDATE__D1GRPH_SURFACE_UPDATE_PENDING (1 << 2) + + uint32_t d1crtc_double_buffer_control = rreg(rmmio, D1CRTC_DOUBLE_BUFFER_CONTROL); + //printf("D1CRTC_DOUBLE_BUFFER_CONTROL: %08x\n", d1crtc_double_buffer_control); + assert(d1crtc_double_buffer_control == (1 << 8)); + + // addresses were retrieved from /sys/kernel/debug/radeon_vram_mm + // + // This assumes GEM buffer allocation always starts from the lowest + // unallocated address. + const uint32_t colorbuffer_addresses[2] = { + 0x813000, + 0xf66000, + }; + + wreg(rmmio, D1GRPH_PRIMARY_SURFACE_ADDRESS, colorbuffer_addresses[colorbuffer_ix]); + while ((rreg(rmmio, D1GRPH_UPDATE) & D1GRPH_UPDATE__D1GRPH_SURFACE_UPDATE_PENDING) != 0); +} diff --git a/drm2/r500/display_controller.h b/drm2/r500/display_controller.h new file mode 100644 index 0000000..fe2d0bf --- /dev/null +++ b/drm2/r500/display_controller.h @@ -0,0 +1,12 @@ +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +void * map_pci_resource2(); +void primary_surface_address(void * rmmio, int colorbuffer_ix); + +#ifdef __cplusplus +} +#endif diff --git a/drm2/r500/indirect_buffer.c b/drm2/r500/indirect_buffer.c new file mode 100644 index 0000000..f691a78 --- /dev/null +++ b/drm2/r500/indirect_buffer.c @@ -0,0 +1,695 @@ +#include +#include + +#include "command_processor.h" +#include "shader.h" +#include "indirect_buffer.h" + +#include "3d_registers.h" +#include "3d_registers_undocumented.h" +#include "3d_registers_bits.h" + +union u32_f32 ib[16384]; +volatile int ib_ix; + +void ib_generic_initialization() +{ + T0V(RB3D_DSTCACHE_CTLSTAT + , RB3D_DSTCACHE_CTLSTAT__DC_FLUSH(0x2) // Flush dirty 3D data + | RB3D_DSTCACHE_CTLSTAT__DC_FREE(0x2) // Free 3D tags + ); + + T0V(ZB_ZCACHE_CTLSTAT + , ZB_ZCACHE_CTLSTAT__ZC_FLUSH(1) + | ZB_ZCACHE_CTLSTAT__ZC_FREE(1) + ); + + T0V(WAIT_UNTIL, 0x00020000); + + // anti-aliasing + + T0V(GB_AA_CONFIG, 0x00000000); + T0V(RB3D_AARESOLVE_CTL, 0x00000000); + + // z buffer + + T0V(ZB_BW_CNTL, 0x00000000); + T0V(ZB_DEPTHCLEARVALUE, 0x00000000); + T0V(ZB_ZTOP + , ZB_ZTOP__ZTOP(1) + ); + T0V(ZB_STENCILREFMASK, 0x00000000); + T0V(ZB_STENCILREFMASK_BF, 0x00000000); + + // fog + + T0V(FG_ALPHA_FUNC, 0x00000000); + T0V(FG_ALPHA_VALUE, 0x00000000); + T0V(FG_FOG_BLEND, 0x00000000); + T0V(FG_DEPTH_SRC, 0x00000000); + + // render backend + + T0V(RB3D_CCTL + , RB3D_CCTL__INDEPENDENT_COLORFORMAT_ENABLE(1) + ); + T0V(RB3D_ROPCNTL, 0x00000000); + T0V(RB3D_BLENDCNTL, 0x00000000); + T0V(RB3D_ABLENDCNTL, 0x00000000); + T0V(RB3D_COLOR_CHANNEL_MASK + , RB3D_COLOR_CHANNEL_MASK__BLUE_MASK(1) + | RB3D_COLOR_CHANNEL_MASK__GREEN_MASK(1) + | RB3D_COLOR_CHANNEL_MASK__RED_MASK(1) + | RB3D_COLOR_CHANNEL_MASK__ALPHA_MASK(1) + ); + T0V(RB3D_DITHER_CTL, 0x00000000); + T0V(RB3D_CONSTANT_COLOR_AR + , RB3D_CONSTANT_COLOR_AR__RED(0) + | RB3D_CONSTANT_COLOR_AR__ALPHA(0) + ); + T0V(RB3D_CONSTANT_COLOR_GB + , RB3D_CONSTANT_COLOR_GB__BLUE(0) + | RB3D_CONSTANT_COLOR_GB__GREEN(0) + ); + + T0V(RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD + , RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD__BLUE(1) + | RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD__GREEN(1) + | RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD__RED(1) + | RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD__ALPHA(1) + ); + T0V(RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD + , RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD__BLUE(254) + | RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD__GREEN(254) + | RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD__RED(254) + | RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD__ALPHA(254) + ); + + // clip + + T0V(SC_HYPERZ_EN, 0x00000000); + + T0V(SC_CLIP_RULE + , SC_CLIP_RULE__CLIP_RULE(0xffff)); + + T0V(SC_CLIP_0_A, 0x00000000); + T0V(SC_CLIP_0_B, 0xffffffff); + T0V(SC_SCREENDOOR, 0x00ffffff); + T0V(SC_EDGERULE + , SC_EDGERULE__ER_TRI(5) // L-in,R-out,HT-in,HB-in + | SC_EDGERULE__ER_POINT(9) // L-out,R-in,HT-in,HB-out + | SC_EDGERULE__ER_LINE_LR(5) // L-in,R-out,HT-in,HB-out + | SC_EDGERULE__ER_LINE_RL(9) // L-out,R-in,HT-in,HB-out + | SC_EDGERULE__ER_LINE_TB(26) // T-in,B-out,VL-out,VR-in + | SC_EDGERULE__ER_LINE_BT(22) // T-out,B-in,VL-out,VR-in + ); + + // ga + + T0V(GA_OFFSET + , GA_OFFSET__X_OFFSET(0) + | GA_OFFSET__Y_OFFSET(0) + ); + + T0V(GA_COLOR_CONTROL_PS3, 0x00000000); + + T0V(GA_POINT_MINMAX + , GA_POINT_MINMAX__MIN_SIZE(60) + | GA_POINT_MINMAX__MAX_SIZE(60) + ); + + T0V(GA_LINE_CNTL + , GA_LINE_CNTL__WIDTH(6) + | GA_LINE_CNTL__END_TYPE(2) + | GA_LINE_CNTL__SORT(0) + ); + + T0V(GA_LINE_STIPPLE_CONFIG, 0x00000000); + T0V(GA_LINE_STIPPLE_VALUE, 0x00000000); + T0V(GA_POLY_MODE + , GA_POLY_MODE__POLY_MODE(0)); + T0V(GA_ROUND_MODE + , GA_ROUND_MODE__GEOMETRY_ROUND(1) + | GA_ROUND_MODE__COLOR_ROUND(0) + | GA_ROUND_MODE__RGB_CLAMP(1) + | GA_ROUND_MODE__ALPHA_CLAMP(1) + | GA_ROUND_MODE__GEOMETRY_MASK(0) + ); + + T0Vf(GA_POINT_S0, 0.0f); + T0Vf(GA_POINT_T0, 1.0f); + T0Vf(GA_POINT_S1, 1.0f); + T0Vf(GA_POINT_T1, 0.0f); + + T0V(GA_COLOR_CONTROL + , GA_COLOR_CONTROL__RGB0_SHADING(2) + | GA_COLOR_CONTROL__ALPHA0_SHADING(2) + | GA_COLOR_CONTROL__RGB1_SHADING(2) + | GA_COLOR_CONTROL__ALPHA1_SHADING(2) + | GA_COLOR_CONTROL__RGB2_SHADING(2) + | GA_COLOR_CONTROL__ALPHA2_SHADING(2) + | GA_COLOR_CONTROL__RGB3_SHADING(2) + | GA_COLOR_CONTROL__ALPHA3_SHADING(2) + | GA_COLOR_CONTROL__PROVOKING_VERTEX(3) + ); + + // gb + + T0V(GB_Z_PEQ_CONFIG + , GB_Z_PEQ_CONFIG__Z_PEQ_SIZE(0) // 4x4 z plane equations + ); + + T0V(GB_SELECT, 0x00000000); + T0V(GB_ENABLE, 0x00000000); + + T0V(GB_MSPOS0 + , GB_MSPOS0__MS_X0(6) + | GB_MSPOS0__MS_Y0(6) + | GB_MSPOS0__MS_X1(6) + | GB_MSPOS0__MS_Y1(6) + | GB_MSPOS0__MS_X2(6) + | GB_MSPOS0__MS_Y2(6) + | GB_MSPOS0__MSBD0_Y(6) + | GB_MSPOS0__MSBD0_X(6) + ); + T0V(GB_MSPOS1 + , GB_MSPOS1__MS_X3(6) + | GB_MSPOS1__MS_Y3(6) + | GB_MSPOS1__MS_X4(6) + | GB_MSPOS1__MS_Y4(6) + | GB_MSPOS1__MS_X5(6) + | GB_MSPOS1__MS_Y5(6) + | GB_MSPOS1__MSBD1(6) + ); + + // setup unit + + T0V(SU_TEX_WRAP, 0x00000000); + T0V(SU_TEX_WRAP_PS3, 0x00000000); + + T0Vf(SU_DEPTH_SCALE, 16777215.0f); + T0Vf(SU_DEPTH_OFFSET, 0.0f); + + T0V(SU_CULL_MODE + , SU_CULL_MODE__CULL_FRONT(0) + | SU_CULL_MODE__CULL_BACK(0) + | SU_CULL_MODE__FACE(0) + ); + + T0V(SU_POLY_OFFSET_ENABLE, 0x00000000); + + // VAP + + T0V(VAP_PVS_STATE_FLUSH_REG, 0x00000000); + T0V(VAP_PVS_VTX_TIMEOUT_REG + , VAP_PVS_VTX_TIMEOUT_REG__CLK_COUNT(0xffff) + ); + + T0Vf(VAP_GB_VERT_CLIP_ADJ, 1.0f); + T0Vf(VAP_GB_VERT_DISC_ADJ, 1.0f); + T0Vf(VAP_GB_HORZ_CLIP_ADJ, 1.0f); + T0Vf(VAP_GB_HORZ_DISC_ADJ, 1.0f); + T0V(VAP_PSC_SGN_NORM_CNTL + , VAP_PSC_SGN_NORM_CNTL__SGN_NORM_METHOD_0(2) + | VAP_PSC_SGN_NORM_CNTL__SGN_NORM_METHOD_1(2) + | VAP_PSC_SGN_NORM_CNTL__SGN_NORM_METHOD_2(2) + | VAP_PSC_SGN_NORM_CNTL__SGN_NORM_METHOD_3(2) + | VAP_PSC_SGN_NORM_CNTL__SGN_NORM_METHOD_4(2) + | VAP_PSC_SGN_NORM_CNTL__SGN_NORM_METHOD_5(2) + | VAP_PSC_SGN_NORM_CNTL__SGN_NORM_METHOD_6(2) + | VAP_PSC_SGN_NORM_CNTL__SGN_NORM_METHOD_7(2) + | VAP_PSC_SGN_NORM_CNTL__SGN_NORM_METHOD_8(2) + | VAP_PSC_SGN_NORM_CNTL__SGN_NORM_METHOD_9(2) + | VAP_PSC_SGN_NORM_CNTL__SGN_NORM_METHOD_10(2) + | VAP_PSC_SGN_NORM_CNTL__SGN_NORM_METHOD_11(2) + | VAP_PSC_SGN_NORM_CNTL__SGN_NORM_METHOD_12(2) + | VAP_PSC_SGN_NORM_CNTL__SGN_NORM_METHOD_13(2) + | VAP_PSC_SGN_NORM_CNTL__SGN_NORM_METHOD_14(2) + | VAP_PSC_SGN_NORM_CNTL__SGN_NORM_METHOD_15(2) + ); + T0V(VAP_TEX_TO_COLOR_CNTL, 0x00000000); + + T0V(VAP_CNTL + , VAP_CNTL__PVS_NUM_SLOTS(10) + | VAP_CNTL__PVS_NUM_CNTLRS(5) + | VAP_CNTL__PVS_NUM_FPUS(5) + | VAP_CNTL__VAP_NO_RENDER(0) + | VAP_CNTL__VF_MAX_VTX_NUM(12) + | VAP_CNTL__DX_CLIP_SPACE_DEF(0) + | VAP_CNTL__TCL_STATE_OPTIMIZATION(1) + ); + T0V(VAP_PVS_FLOW_CNTL_OPC, 0x00000000); + + T0V(VAP_VTX_STATE_CNTL + , VAP_VTX_STATE_CNTL__COLOR_0_ASSEMBLY_CNTL(1) + | VAP_VTX_STATE_CNTL__COLOR_1_ASSEMBLY_CNTL(1) + | VAP_VTX_STATE_CNTL__COLOR_2_ASSEMBLY_CNTL(1) + | VAP_VTX_STATE_CNTL__COLOR_3_ASSEMBLY_CNTL(1) + | VAP_VTX_STATE_CNTL__COLOR_4_ASSEMBLY_CNTL(1) + | VAP_VTX_STATE_CNTL__COLOR_5_ASSEMBLY_CNTL(1) + | VAP_VTX_STATE_CNTL__COLOR_6_ASSEMBLY_CNTL(1) + | VAP_VTX_STATE_CNTL__COLOR_7_ASSEMBLY_CNTL(1) + | VAP_VTX_STATE_CNTL__UPDATE_USER_COLOR_0_ENA(0) + ); + + // vap constants + + T0(VAP_PVS_FLOW_CNTL_ADDRS_LW_0, 31); + for (int i = 0; i < 32; i++) + TU(0x00000000); + + T0(VAP_PVS_FLOW_CNTL_LOOP_INDEX_0, 15); + for (int i = 0; i < 16; i++) + TU(0x00000000); + + T0V(VAP_PVS_VECTOR_INDX_REG + , VAP_PVS_VECTOR_INDX_REG__OCTWORD_OFFSET(1536)); + T0_ONE_REG(VAP_PVS_VECTOR_DATA_REG_128, 23); + for (int i = 0; i < 24; i++) + TU(0x00000000); + + // us + + T0V(US_CONFIG + , US_CONFIG__ZERO_TIMES_ANYTHING_EQUALS_ZERO(1) + ); + T0V(US_FC_CTRL, 0); + + T0V(US_W_FMT + , US_W_FMT__W_FMT(0) // W is always zero + ); + + ////////////////////////////////////////////////////////////////////////////// + // SC + ////////////////////////////////////////////////////////////////////////////// + + T0V(SC_SCISSOR0 + , SC_SCISSOR0__XS0(0) + | SC_SCISSOR0__YS0(0) + ); + T0V(SC_SCISSOR1 + , SC_SCISSOR1__XS1(1600 - 1) + | SC_SCISSOR1__YS1(1200 - 1) + ); + + ////////////////////////////////////////////////////////////////////////////// + // VAP + ////////////////////////////////////////////////////////////////////////////// + + T0Vf(VAP_VPORT_XSCALE, 800.0f); + T0Vf(VAP_VPORT_XOFFSET, 800.0f); + T0Vf(VAP_VPORT_YSCALE, -600.0f); + T0Vf(VAP_VPORT_YOFFSET, 600.0f); + T0Vf(VAP_VPORT_ZSCALE, 0.5f); + T0Vf(VAP_VPORT_ZOFFSET, 0.5f); +} + +void ib_colorbuffer(int reloc_index) +{ + + ////////////////////////////////////////////////////////////////////////////// + // CB + ////////////////////////////////////////////////////////////////////////////// + + T0V(RB3D_COLOROFFSET0 + , 0x00000000 // value replaced by kernel from relocs + ); + T3(_NOP, 0); + TU(reloc_index * 4); // index into relocs array + + T0V(RB3D_COLORPITCH0 + , RB3D_COLORPITCH__COLORPITCH(1600 >> 1) + | RB3D_COLORPITCH__COLORFORMAT(6) // ARGB8888 + ); + // The COLORPITCH NOP is ignored/not applied due to + // RADEON_CS_KEEP_TILING_FLAGS, but is still required. + T3(_NOP, 0); + TU(reloc_index * 4); // index into relocs array +} + +void ib_zbuffer(int reloc_index, int zfunc) +{ + ////////////////////////////////////////////////////////////////////////////// + // ZB + ////////////////////////////////////////////////////////////////////////////// + T0V(ZB_CNTL + , ZB_CNTL__Z_ENABLE__ENABLED // 1 + | ZB_CNTL__ZWRITEENABLE__ENABLE // 1 + ); + T0V(ZB_ZSTENCILCNTL + , ZB_ZSTENCILCNTL__ZFUNC(zfunc) + ); + + T0V(ZB_FORMAT + , ZB_FORMAT__DEPTHFORMAT(2) // 24-bit integer Z, 8 bit stencil + ); + + T0V(ZB_DEPTHOFFSET, 0); + T3(_NOP, 0); + TU(reloc_index * 4); // index into relocs array + + T0V(ZB_DEPTHPITCH + , ZB_DEPTHPITCH__DEPTHPITCH(1600 >> 2) + | ZB_DEPTHPITCH__DEPTHMACROTILE(1) + | ZB_DEPTHPITCH__DEPTHMICROTILE(1) + ); + T3(_NOP, 0); + TU(reloc_index * 4); // index into relocs array +} + +void ib_rs_instructions(int count) +{ + ////////////////////////////////////////////////////////////////////////////// + // RS + ////////////////////////////////////////////////////////////////////////////// + + assert(count <= 8 && count >= 0); + + if (count == 0) { + T0V(RS_COUNT + , RS_COUNT__IT_COUNT(0) + | RS_COUNT__IC_COUNT(1) + | RS_COUNT__W_ADDR(0) + | RS_COUNT__HIRES_EN(1) + ); + T0V(RS_INST_COUNT + , RS_INST_COUNT__INST_COUNT(0)); + } else { + T0V(RS_COUNT + , RS_COUNT__IT_COUNT(count * 4) + | RS_COUNT__IC_COUNT(0) + | RS_COUNT__W_ADDR(0) + | RS_COUNT__HIRES_EN(1) + ); + T0V(RS_INST_COUNT + , RS_INST_COUNT__INST_COUNT(count - 1)); + } + + switch (count) { + case 8: + T0V(RS_IP_7 + , RS_IP__TEX_PTR_S(28) + | RS_IP__TEX_PTR_T(29) + | RS_IP__TEX_PTR_R(30) + | RS_IP__TEX_PTR_Q(31) + | RS_IP__OFFSET_EN(0) + ); + T0V(RS_INST_7 + , RS_INST__TEX_ID(7) + | RS_INST__TEX_CN(1) + | RS_INST__TEX_ADDR(7) + ); + [[fallthrough]]; + case 7: + T0V(RS_IP_6 + , RS_IP__TEX_PTR_S(24) + | RS_IP__TEX_PTR_T(25) + | RS_IP__TEX_PTR_R(26) + | RS_IP__TEX_PTR_Q(27) + | RS_IP__OFFSET_EN(0) + ); + T0V(RS_INST_6 + , RS_INST__TEX_ID(6) + | RS_INST__TEX_CN(1) + | RS_INST__TEX_ADDR(6) + ); + [[fallthrough]]; + case 6: + T0V(RS_IP_5 + , RS_IP__TEX_PTR_S(20) + | RS_IP__TEX_PTR_T(21) + | RS_IP__TEX_PTR_R(22) + | RS_IP__TEX_PTR_Q(23) + | RS_IP__OFFSET_EN(0) + ); + T0V(RS_INST_5 + , RS_INST__TEX_ID(5) + | RS_INST__TEX_CN(1) + | RS_INST__TEX_ADDR(5) + ); + [[fallthrough]]; + case 5: + T0V(RS_IP_4 + , RS_IP__TEX_PTR_S(16) + | RS_IP__TEX_PTR_T(17) + | RS_IP__TEX_PTR_R(18) + | RS_IP__TEX_PTR_Q(19) + | RS_IP__OFFSET_EN(0) + ); + T0V(RS_INST_4 + , RS_INST__TEX_ID(4) + | RS_INST__TEX_CN(1) + | RS_INST__TEX_ADDR(4) + ); + [[fallthrough]]; + case 4: + T0V(RS_IP_3 + , RS_IP__TEX_PTR_S(12) + | RS_IP__TEX_PTR_T(13) + | RS_IP__TEX_PTR_R(14) + | RS_IP__TEX_PTR_Q(15) + | RS_IP__OFFSET_EN(0) + ); + T0V(RS_INST_3 + , RS_INST__TEX_ID(3) + | RS_INST__TEX_CN(1) + | RS_INST__TEX_ADDR(3) + ); + [[fallthrough]]; + case 3: + T0V(RS_IP_2 + , RS_IP__TEX_PTR_S(8) + | RS_IP__TEX_PTR_T(9) + | RS_IP__TEX_PTR_R(10) + | RS_IP__TEX_PTR_Q(11) + | RS_IP__OFFSET_EN(0) + ); + T0V(RS_INST_2 + , RS_INST__TEX_ID(2) + | RS_INST__TEX_CN(1) + | RS_INST__TEX_ADDR(2) + ); + [[fallthrough]]; + case 2: + T0V(RS_IP_1 + , RS_IP__TEX_PTR_S(4) + | RS_IP__TEX_PTR_T(5) + | RS_IP__TEX_PTR_R(6) + | RS_IP__TEX_PTR_Q(7) + | RS_IP__OFFSET_EN(0) + ); + T0V(RS_INST_1 + , RS_INST__TEX_ID(1) + | RS_INST__TEX_CN(1) + | RS_INST__TEX_ADDR(1) + ); + [[fallthrough]]; + case 1: + T0V(RS_IP_0 + , RS_IP__TEX_PTR_S(0) + | RS_IP__TEX_PTR_T(1) + | RS_IP__TEX_PTR_R(2) + | RS_IP__TEX_PTR_Q(3) + | RS_IP__OFFSET_EN(0) + ); + T0V(RS_INST_0 + , RS_INST__TEX_ID(0) + | RS_INST__TEX_CN(1) + | RS_INST__TEX_ADDR(0) + ); + break; + case 0: + T0V(RS_IP_0 + , RS_IP__COL_PTR(0) + | RS_IP__COL_FMT(6) // Zero components (0,0,0,1) + ); + break; + } +} + +void ib_texture__0() +{ + ////////////////////////////////////////////////////////////////////////////// + // TX + ////////////////////////////////////////////////////////////////////////////// + + T0V(TX_INVALTAGS, 0x00000000); + + T0V(TX_ENABLE, 0x00000000); +} + +void ib_texture__1(int reloc_index) +{ + ////////////////////////////////////////////////////////////////////////////// + // TX + ////////////////////////////////////////////////////////////////////////////// + + T0V(TX_INVALTAGS, 0x00000000); + + T0V(TX_ENABLE + , TX_ENABLE__TEX_0_ENABLE__ENABLE); + T0V(TX_FILTER0_0 + , TX_FILTER0__MAG_FILTER__LINEAR + | TX_FILTER0__MIN_FILTER__LINEAR + ); + T0V(TX_FILTER1_0 + , TX_FILTER1__LOD_BIAS(1) + ); + T0V(TX_BORDER_COLOR_0, 0); + T0V(TX_FORMAT0_0 + , TX_FORMAT0__TXWIDTH(1024 - 1) + | TX_FORMAT0__TXHEIGHT(1024 - 1) + ); + + T0V(TX_FORMAT1_0 + , TX_FORMAT1__TXFORMAT__TX_FMT_8_8_8_8 + | TX_FORMAT1__SEL_ALPHA(5) + | TX_FORMAT1__SEL_RED(0) + | TX_FORMAT1__SEL_GREEN(1) + | TX_FORMAT1__SEL_BLUE(2) + | TX_FORMAT1__TEX_COORD_TYPE__2D + ); + T0V(TX_FORMAT2_0, 0); + + T0V(TX_OFFSET_0 + //, TX_OFFSET__MACRO_TILE(1) + //| TX_OFFSET__MICRO_TILE(1) + , 0 + ); + + T3(_NOP, 0); + TU(reloc_index * 4); // index into relocs array +} + +void ib_vap_pvs(struct shader_offset * offset) +{ + const int instruction_size = 4 * 4; // bytes + int first_inst = offset->start / instruction_size; + int last_inst = ((offset->start + offset->size) / instruction_size) - 1; + assert(last_inst >= first_inst); + + ////////////////////////////////////////////////////////////////////////////// + // VAP_PVS + ////////////////////////////////////////////////////////////////////////////// + + T0V(VAP_PVS_CODE_CNTL_0 + , VAP_PVS_CODE_CNTL_0__PVS_FIRST_INST(first_inst) + | VAP_PVS_CODE_CNTL_0__PVS_XYZW_VALID_INST(last_inst) + | VAP_PVS_CODE_CNTL_0__PVS_LAST_INST(last_inst) + ); + T0V(VAP_PVS_CODE_CNTL_1 + , VAP_PVS_CODE_CNTL_1__PVS_LAST_VTX_SRC_INST(last_inst) + ); +} + +void ib_ga_us(struct shader_offset * offset) +{ + const int instruction_size = 4 * 6; // bytes + int code_addr = offset->start / instruction_size; + int code_size = offset->size / instruction_size; + + ////////////////////////////////////////////////////////////////////////////// + // GA_US + ////////////////////////////////////////////////////////////////////////////// + + T0V(US_CODE_RANGE + , US_CODE_RANGE__CODE_ADDR(code_addr) + | US_CODE_RANGE__CODE_SIZE(code_size - 1) // relative to CODE_ADDR + ); + T0V(US_CODE_OFFSET + , US_CODE_OFFSET__OFFSET_ADDR(code_addr) + ); + T0V(US_CODE_ADDR + , US_CODE_ADDR__START_ADDR(0) // relative to OFFSET_ADDR + | US_CODE_ADDR__END_ADDR(code_size - 1) // relative to OFFSET_ADDR + ); +} + +void ib_vap_pvs_const_cntl(const float * consts, int size) +{ + assert(size % 16 == 0); + + ////////////////////////////////////////////////////////////////////////////// + // VAP_PVS_CONST_CNTL + ////////////////////////////////////////////////////////////////////////////// + + const int consts_length = size / 4; + + T0V(VAP_PVS_CONST_CNTL + , VAP_PVS_CONST_CNTL__PVS_CONST_BASE_OFFSET(0) + | VAP_PVS_CONST_CNTL__PVS_MAX_CONST_ADDR((size / 4) - 1) + ); + + T0V(VAP_PVS_VECTOR_INDX_REG + , VAP_PVS_VECTOR_INDX_REG__OCTWORD_OFFSET(1024) + ); + + T0_ONE_REG(VAP_PVS_VECTOR_DATA_REG_128, (consts_length - 1)); + for (int i = 0; i < consts_length; i++) + TF(consts[i]); +} + +void ib_vap_stream_cntl__2() +{ + ////////////////////////////////////////////////////////////////////////////// + // VAP_PROG_STREAM_CNTL + ////////////////////////////////////////////////////////////////////////////// + + T0V(VAP_PROG_STREAM_CNTL_0 + , VAP_PROG_STREAM_CNTL__DATA_TYPE_0__FLOAT_2 + | VAP_PROG_STREAM_CNTL__SKIP_DWORDS_0(0) + | VAP_PROG_STREAM_CNTL__DST_VEC_LOC_0(0) + | VAP_PROG_STREAM_CNTL__LAST_VEC_0(1) + ); + T0V(VAP_PROG_STREAM_CNTL_EXT_0 + , VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_X_0__SELECT_X + | VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_Y_0__SELECT_Y + | VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_Z_0__SELECT_FP_ONE + | VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_W_0__SELECT_FP_ONE + | VAP_PROG_STREAM_CNTL_EXT__WRITE_ENA_0(0b1111) + ); +} + +void ib_vap_stream_cntl__323() +{ + ////////////////////////////////////////////////////////////////////////////// + // VAP_PROG_STREAM_CNTL + ////////////////////////////////////////////////////////////////////////////// + + T0V(VAP_PROG_STREAM_CNTL_0 + , VAP_PROG_STREAM_CNTL__DATA_TYPE_0__FLOAT_3 + | VAP_PROG_STREAM_CNTL__SKIP_DWORDS_0(0) + | VAP_PROG_STREAM_CNTL__DST_VEC_LOC_0(0) + | VAP_PROG_STREAM_CNTL__LAST_VEC_0(0) + | VAP_PROG_STREAM_CNTL__DATA_TYPE_1__FLOAT_2 + | VAP_PROG_STREAM_CNTL__SKIP_DWORDS_1(0) + | VAP_PROG_STREAM_CNTL__DST_VEC_LOC_1(1) + | VAP_PROG_STREAM_CNTL__LAST_VEC_1(0) + ); + T0V(VAP_PROG_STREAM_CNTL_EXT_0 + , VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_X_0__SELECT_X + | VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_Y_0__SELECT_Y + | VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_Z_0__SELECT_Z + | VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_W_0__SELECT_FP_ONE + | VAP_PROG_STREAM_CNTL_EXT__WRITE_ENA_0(0b1111) // XYZW + | VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_X_1__SELECT_X + | VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_Y_1__SELECT_Y + | VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_Z_1__SELECT_FP_ZERO + | VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_W_1__SELECT_FP_ONE + | VAP_PROG_STREAM_CNTL_EXT__WRITE_ENA_1(0b1111) // XYZW + ); + + T0V(VAP_PROG_STREAM_CNTL_1 + , VAP_PROG_STREAM_CNTL__DATA_TYPE_0__FLOAT_3 + | VAP_PROG_STREAM_CNTL__SKIP_DWORDS_0(0) + | VAP_PROG_STREAM_CNTL__DST_VEC_LOC_0(2) + | VAP_PROG_STREAM_CNTL__LAST_VEC_0(1) + ); + T0V(VAP_PROG_STREAM_CNTL_EXT_1 + , VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_X_0__SELECT_X + | VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_Y_0__SELECT_Y + | VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_Z_0__SELECT_Z + | VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_W_0__SELECT_FP_ONE + | VAP_PROG_STREAM_CNTL_EXT__WRITE_ENA_0(0b1111) // XYZW + ); +} diff --git a/drm2/r500/indirect_buffer.h b/drm2/r500/indirect_buffer.h new file mode 100644 index 0000000..44c68c1 --- /dev/null +++ b/drm2/r500/indirect_buffer.h @@ -0,0 +1,71 @@ +#pragma once + +#include + +#include "command_processor.h" +#include "shader.h" + +#define T0(address, count) \ + do { \ + ib[ib_ix++].u32 = TYPE_0_COUNT(count) | TYPE_0_BASE_INDEX(address >> 2); \ + } while (0); + +#define T0_ONE_REG(address, count) \ + do { \ + ib[ib_ix++].u32 = TYPE_0_COUNT(count) | TYPE_0_ONE_REG | TYPE_0_BASE_INDEX(address >> 2); \ + } while (0); + +#define T0V(address, value) \ + do { \ + ib[ib_ix++].u32 = TYPE_0_COUNT(0) | TYPE_0_BASE_INDEX(address >> 2); \ + ib[ib_ix++].u32 = value; \ + } while (0); + +#define T0Vf(address, value) \ + do { \ + ib[ib_ix++].u32 = TYPE_0_COUNT(0) | TYPE_0_BASE_INDEX(address >> 2); \ + ib[ib_ix++].f32 = value; \ + } while (0); + +#define T3(opcode, count) \ + do { \ + ib[ib_ix++].u32 = (0b11 << 30) | TYPE_3_COUNT(count) | TYPE_3_OPCODE(opcode); \ + } while (0); + +#define TU(data) \ + do { \ + ib[ib_ix++].u32 = data; \ + } while (0); + +#define TF(data) \ + do { \ + ib[ib_ix++].f32 = data; \ + } while (0); + +#ifdef __cplusplus +extern "C" { +#endif + +union u32_f32 { + uint32_t u32; + float f32; +}; + +extern union u32_f32 ib[16384]; +extern volatile int ib_ix; + +void ib_generic_initialization(); +void ib_colorbuffer(int reloc_index); +void ib_zbuffer(int reloc_index, int zfunc); +void ib_rs_instructions(int count); +void ib_texture__0(); +void ib_texture__1(int reloc_index); +void ib_vap_pvs(struct shader_offset * offset); +void ib_ga_us(struct shader_offset * offset); +void ib_vap_pvs_const_cntl(const float * consts, int size); +void ib_vap_stream_cntl__2(); +void ib_vap_stream_cntl__323(); + +#ifdef __cplusplus +} +#endif diff --git a/drm2/r500/registers.h b/drm2/r500/registers.h new file mode 100644 index 0000000..9c8454c --- /dev/null +++ b/drm2/r500/registers.h @@ -0,0 +1,14 @@ +#pragma once + +static inline uint32_t rreg(void * rmmio, uint32_t offset) +{ + uint32_t value = *((volatile uint32_t *)(((uintptr_t)rmmio) + offset)); + asm volatile ("" ::: "memory"); + return value; +} + +static inline void wreg(void * rmmio, uint32_t offset, uint32_t value) +{ + *((volatile uint32_t *)(((uintptr_t)rmmio) + offset)) = value; + asm volatile ("" ::: "memory"); +} diff --git a/drm2/r500/shader.c b/drm2/r500/shader.c new file mode 100644 index 0000000..a2c26a4 --- /dev/null +++ b/drm2/r500/shader.c @@ -0,0 +1,74 @@ +#include +#include + +#include "../file.h" + +#include "3d_registers.h" +#include "3d_registers_undocumented.h" +#include "3d_registers_bits.h" + +#include "command_processor.h" +#include "indirect_buffer.h" +#include "shader.h" + +void load_pvs_shaders(struct shader_offset * offsets, int offsets_length) +{ + struct shader_offset * last_offset = &offsets[offsets_length - 1]; + int offset = last_offset->start + last_offset->size; + int instruction_dwords = offset / 4; + + T0V(VAP_PVS_VECTOR_INDX_REG + , VAP_PVS_VECTOR_INDX_REG__OCTWORD_OFFSET(0) + ); + + T0_ONE_REG(VAP_PVS_VECTOR_DATA_REG_128, instruction_dwords - 1); + for (int i = 0; i < offsets_length; i++) { + for (int j = 0; j < offsets[i].size / 4; j++) { + TU(((uint32_t*)offsets[i].buf)[j]); + } + } +} + +void load_us_shaders(struct shader_offset * offsets, int offsets_length) +{ + struct shader_offset * last_offset = &offsets[offsets_length - 1]; + int offset = last_offset->start + last_offset->size; + int instruction_dwords = offset / 4; + + T0V(GA_US_VECTOR_INDEX + , GA_US_VECTOR_INDEX__INDEX(0) // starting from index 0 + | GA_US_VECTOR_INDEX__TYPE(0) // load instructions + ); + + T0_ONE_REG(GA_US_VECTOR_DATA, instruction_dwords - 1); + for (int i = 0; i < offsets_length; i++) { + for (int j = 0; j < offsets[i].size / 4; j++) { + TU(((uint32_t*)offsets[i].buf)[j]); + } + } +} + +struct shader_offset * load_shaders(const char ** paths, int paths_length) +{ + int offset = 0; + + struct shader_offset * offsets = malloc((sizeof (struct shader_offset)) * paths_length); + + for (int i = 0; i < paths_length; i++) { + const char * path = paths[i]; + + int size; + void * buf = file_read(path, &size); + assert(buf != NULL); + assert(size % 4 == 0); + assert(offset + size <= 1024); + + offsets[i].start = offset; + offsets[i].size = size; + offsets[i].buf = buf; + + offset += size; + } + + return offsets; +} diff --git a/drm2/r500/shader.h b/drm2/r500/shader.h new file mode 100644 index 0000000..8d7b960 --- /dev/null +++ b/drm2/r500/shader.h @@ -0,0 +1,21 @@ +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +struct shader_offset { + int start; // in bytes + int size; // in bytes + void * buf; +}; + +void load_pvs_shaders(struct shader_offset * offsets, int offsets_length); + +void load_us_shaders(struct shader_offset * offsets, int offsets_length); + +struct shader_offset * load_shaders(const char ** paths, int paths_length); + +#ifdef __cplusplus +} +#endif