add drm triangle example

This commit is contained in:
Zack Buhman 2025-10-11 19:44:32 -05:00
parent 25c004dbe7
commit 62d3aa2b29
6 changed files with 7022 additions and 46 deletions

6580
drm/3d_registers.h Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,2 @@
#define RADEON_WAIT_UNTIL 0x1720
#define VAP_VSM_VTX_ASSM 0x2184

1
drm/build.sh Normal file
View File

@ -0,0 +1 @@
gcc -Werror -Wfatal-errors -g -O0 -I/usr/include/drm -ldrm main.c

367
drm/main.c Normal file
View File

@ -0,0 +1,367 @@
#include <assert.h>
#include <errno.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <xf86drm.h>
#include <libdrm/radeon_drm.h>
#include "3d_registers.h"
#include "3d_registers_undocumented.h"
static uint32_t ib[16384];
#define TYPE_0_COUNT(c) (((c) & 0x3fff) << 16)
#define TYPE_0_ONE_REG (1 << 15)
#define TYPE_0_BASE_INDEX(i) (((i) & 0x1fff) << 0)
#define TYPE_3_COUNT(c) (((c) & 0x3fff) << 16)
#define TYPE_3_OPCODE(o) (((o) & 0xff) << 8)
#define T0(address, count) \
do { \
ib[ix++] = TYPE_0_COUNT(count) | TYPE_0_BASE_INDEX(address >> 2); \
} while (0);
#define T0_ONE_REG(address, count) \
do { \
ib[ix++] = TYPE_0_COUNT(count) | TYPE_0_ONE_REG | TYPE_0_BASE_INDEX(address >> 2); \
} while (0);
#define T0V(address, value) \
do { \
ib[ix++] = TYPE_0_COUNT(0) | TYPE_0_BASE_INDEX(address >> 2); \
ib[ix++] = value; \
} while (0);
#define T3(opcode, count) \
do { \
ib[ix++] = (0b11 << 30) | TYPE_3_COUNT(count) | TYPE_3_OPCODE(opcode); \
} while (0);
int indirect_buffer()
{
int ix = 0;
T0V(SC_SCISSOR0, 0x0);
T0V(SC_SCISSOR1, ((1200 - 1) << 13) | ((1600 - 1) << 0));
T0V(RB3D_DSTCACHE_CTLSTAT, 0x0000000a);
T0V(ZB_ZCACHE_CTLSTAT, 0x00000003);
T0V(RADEON_WAIT_UNTIL, 00020000);
T0V(GB_AA_CONFIG, 0x00000000);
T0V(RB3D_AARESOLVE_CTL, 00000000);
T0V(RB3D_CCTL, 00004000);
T0V(RB3D_COLOROFFSET0, 00000000);
ib[ix++] = 0xc0001000;
ib[ix++] = 0x0;
T0V(RB3D_COLORPITCH0, (6 << 21) | (1600 << 0));
ib[ix++] = 0xc0001000;
ib[ix++] = 0x0;
T0V(ZB_BW_CNTL, 0x00000000);
T0V(ZB_DEPTHCLEARVALUE, 0x00000000);
T0V(SC_HYPERZ_EN, 0x00000000);
T0V(GB_Z_PEQ_CONFIG, 0x00000000);
T0V(ZB_ZTOP, 0x00000001);
T0V(FG_ALPHA_FUNC, 0x00000000);
T0V(ZB_CNTL, 0x00000000);
T0V(ZB_ZSTENCILCNTL, 0x00000000);
T0V(ZB_STENCILREFMASK, 0x00000000);
T0V(ZB_STENCILREFMASK_BF, 0x00000000);
T0V(FG_ALPHA_VALUE, 0x00000000);
T0V(RB3D_ROPCNTL, 0x00000000);
T0V(RB3D_BLENDCNTL, 0x00000000);
T0V(RB3D_ABLENDCNTL, 0x00000000);
T0V(RB3D_COLOR_CHANNEL_MASK, 0x0000000f);
T0V(RB3D_DITHER_CTL, 0x00000000);
T0V(RB3D_CONSTANT_COLOR_AR, 0x00000000);
T0V(RB3D_CONSTANT_COLOR_GB, 0x00000000);
T0V(SC_CLIP_0_A, 0x00000000);
T0V(SC_CLIP_0_B, 0xffffffff);
T0V(SC_SCREENDOOR, 0x00ffffff);
T0V(GB_SELECT, 0x00000000);
T0V(FG_FOG_BLEND, 0x00000000);
T0V(GA_OFFSET, 0x00000000);
T0V(SU_TEX_WRAP, 0x00000000);
T0V(SU_DEPTH_SCALE, 0x4b7fffff);
T0V(SU_DEPTH_OFFSET, 0x00000000);
T0V(SC_EDGERULE, 0x2da49525);
T0V(RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 0x01010101);
T0V(RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD, 0xfefefefe);
T0V(GA_COLOR_CONTROL_PS3, 0x00000000);
T0V(SU_TEX_WRAP_PS3, 0x00000000);
T0V(VAP_VPORT_XSCALE, 0x44480000);
T0V(VAP_VPORT_XOFFSET, 0x44480000);
T0V(VAP_VPORT_YSCALE, 0xc4160000);
T0V(VAP_VPORT_YOFFSET, 0x44160000);
T0V(VAP_VPORT_ZSCALE, 0x3f000000);
T0V(VAP_VPORT_ZOFFSET, 0x3f000000);
T0V(VAP_VTE_CNTL, 0x0000043f);
T0V(VAP_PVS_STATE_FLUSH_REG, 0x00000000);
T0V(VAP_PVS_VTX_TIMEOUT_REG, 0x0000ffff);
T0V(VAP_GB_VERT_CLIP_ADJ, 0x3f800000);
T0V(VAP_GB_VERT_DISC_ADJ, 0x3f800000);
T0V(VAP_GB_HORZ_CLIP_ADJ, 0x3f800000);
T0V(VAP_GB_HORZ_DISC_ADJ, 0x3f800000);
T0V(VAP_PSC_SGN_NORM_CNTL, 0xaaaaaaaa);
T0V(VAP_TEX_TO_COLOR_CNTL, 0x00000000);
T0V(VAP_PROG_STREAM_CNTL_0, 0x00002002);
T0V(VAP_PROG_STREAM_CNTL_EXT_0, 0x0000fa88);
T0V(VAP_PVS_CODE_CNTL_0, 0x00000000);
T0V(VAP_PVS_CODE_CNTL_1, 0x00000000);
T0V(VAP_PVS_VECTOR_INDX_REG, 0x00000000);
T0_ONE_REG(VAP_PVS_VECTOR_DATA_REG_128, 3);
ib[ix++] = 0x00f00203;
ib[ix++] = 0x00d10001;
ib[ix++] = 0x01248001;
ib[ix++] = 0x01248001;
T0V(VAP_CNTL, 0x00b0055a);
T0V(VAP_PVS_FLOW_CNTL_OPC, 0x00000000);
T0(VAP_PVS_FLOW_CNTL_ADDRS_LW_0, 31);
for (int i = 0; i < 32; i++)
ib[ix++] = 0x00000000;
T0(VAP_PVS_FLOW_CNTL_LOOP_INDEX_0, 15);
for (int i = 0; i < 16; i++)
ib[ix++] = 0x00000000;
T0V(VAP_PVS_VECTOR_INDX_REG, 0x00000600);
T0_ONE_REG(VAP_PVS_VECTOR_DATA_REG_128, 23);
for (int i = 0; i < 24; i++)
ib[ix++] = 0x00000000;
T0V(VAP_VTX_STATE_CNTL, 0x00005555);
T0V(VAP_VSM_VTX_ASSM, 0x00000001);
T0V(VAP_OUT_VTX_FMT_0, 0x00000001);
T0V(VAP_OUT_VTX_FMT_1, 0x00000000);
T0V(GB_ENABLE, 0x00000000);
T0V(RS_IP_0, 0x30000000);
T0V(RS_COUNT, 0x00040080);
T0V(RS_INST_COUNT, 0x00000000);
T0V(RS_INST_0, 0x00000000);
T0V(VAP_CNTL_STATUS, 0x00000000);
T0V(VAP_CLIP_CNTL, 0x0000c000);
T0V(GA_POINT_SIZE, 0x00060006);
T0V(GA_POINT_MINMAX, 0x00060006);
T0V(GA_LINE_CNTL, 0x00020006);
T0V(SU_POLY_OFFSET_ENABLE, 0x00000000);
T0V(SU_CULL_MODE, 0x00000000);
T0V(GA_LINE_STIPPLE_CONFIG, 0x00000000);
T0V(GA_LINE_STIPPLE_VALUE, 0x00000000);
T0V(GA_POLY_MODE, 0x00000000);
T0V(GA_ROUND_MODE, 0x00000031);
T0V(SC_CLIP_RULE, 0x0000ffff);
T0V(GA_POINT_S0, 0x00000000);
T0V(GA_POINT_T0, 0x3f800000);
T0V(GA_POINT_S1, 0x3f800000);
T0V(GA_POINT_T1, 0x00000000);
T0V(US_OUT_FMT_0, 0x00001b00);
T0V(US_OUT_FMT_1, 0x0000000f);
T0V(US_OUT_FMT_2, 0x0000000f);
T0V(US_OUT_FMT_3, 0x0000000f);
T0V(GB_MSPOS0, 0x66666666);
T0V(GB_MSPOS1, 0x06666666);
T0V(US_CONFIG, 0x00000002);
T0V(US_PIXSIZE, 0x00000001);
T0V(US_FC_CTRL, 0x00000000);
T0V(US_CODE_RANGE, 0x00000000);
T0V(US_CODE_OFFSET, 0x00000000);
T0V(US_CODE_ADDR, 0x00000000);
T0V(GA_US_VECTOR_INDEX, 0x00000000);
T0_ONE_REG(GA_US_VECTOR_DATA, 5);
ib[ix++] = 0x00078005;
ib[ix++] = 0x08020080;
ib[ix++] = 0x08020080;
ib[ix++] = 0x1c9b04d8;
ib[ix++] = 0x1c810003;
ib[ix++] = 0x00000005;
T0V(FG_DEPTH_SRC, 0x00000000);
T0V(US_W_FMT, 0x00000000);
T0V(VAP_PVS_CONST_CNTL, 0x00000000);
T0V(TX_INVALTAGS, 0x00000000);
T0V(TX_ENABLE, 0x00000000);
T0V(VAP_INDEX_OFFSET, 0x00000000);
T0V(GA_COLOR_CONTROL, 0x0003aaaa);
T0V(VAP_VF_MAX_VTX_INDX, 0x00000002);
T0V(VAP_VF_MIN_VTX_INDX, 0x00000000);
T0V(VAP_VTX_SIZE, 0x00000003);
T3(0x35, 9);
ib[ix++] = 0x00030034;
ib[ix++] = 0x3f000000;
ib[ix++] = 0xbf800000; //0xbf000000;
ib[ix++] = 0x00000000;
ib[ix++] = 0xbf800000; //0xbf000000
ib[ix++] = 0xbf800000; //0xbf000000
ib[ix++] = 0x00000000;
ib[ix++] = 0x00000000;
ib[ix++] = 0x3f000000;
ib[ix++] = 0x00000000;
while ((ix % 8) != 0) {
ib[ix++] = 0x80000000;
}
return ix;
}
int main()
{
int ret;
int fd = open("/dev/dri/card0", O_RDWR | O_CLOEXEC);
int colorbuffer_handle;
int flush_handle;
// colorbuffer
{
struct drm_radeon_gem_create args = {
.size = 1600 * 1200 * 4,
.alignment = 4096,
.handle = 0,
.initial_domain = 4, // RADEON_GEM_DOMAIN_VRAM
.flags = 4
};
ret = drmCommandWriteRead(fd, DRM_RADEON_GEM_CREATE, &args, (sizeof (struct drm_radeon_gem_create)));
if (ret != 0) {
perror("drmCommandWriteRead(DRM_RADEON_GEM_CREATE)");
}
assert(args.handle != 0);
colorbuffer_handle = args.handle;
}
// flush
{
struct drm_radeon_gem_create args = {
.size = 4096,
.alignment = 4096,
.handle = 0,
.initial_domain = 2, // GTT
.flags = 0
};
ret = drmCommandWriteRead(fd, DRM_RADEON_GEM_CREATE,
&args, (sizeof (args)));
if (ret != 0) {
perror("drmCommandWriteRead(DRM_RADEON_GEM_CREATE)");
}
assert(args.handle != 0);
flush_handle = args.handle;
}
fprintf(stderr, "colorbuffer handle %d\n", colorbuffer_handle);
struct drm_radeon_cs_reloc relocs[] = {
{
.handle = colorbuffer_handle,
.read_domains = 4, // RADEON_GEM_DOMAIN_VRAM
.write_domain = 4, // RADEON_GEM_DOMAIN_VRAM
.flags = 8,
},
{
.handle = flush_handle,
.read_domains = 2, // RADEON_GEM_DOMAIN_GTT
.write_domain = 2, // RADEON_GEM_DOMAIN_GTT
.flags = 0,
}
};
uint32_t flags[2] = {
5, // RADEON_CS_KEEP_TILING_FLAGS | RADEON_CS_END_OF_FRAME
0, // RADEON_CS_RING_GFX
};
int ib_dwords = indirect_buffer();
//int ib_dwords = (sizeof (ib2)) / (sizeof (ib2[0]));
struct drm_radeon_cs_chunk chunks[3] = {
{
.chunk_id = RADEON_CHUNK_ID_IB,
.length_dw = ib_dwords,
.chunk_data = (uint64_t)(uintptr_t)ib,
},
{
.chunk_id = RADEON_CHUNK_ID_RELOCS,
.length_dw = (sizeof (relocs)) / (sizeof (uint32_t)),
.chunk_data = (uint64_t)(uintptr_t)relocs,
},
{
.chunk_id = RADEON_CHUNK_ID_FLAGS,
.length_dw = (sizeof (flags)) / (sizeof (uint32_t)),
.chunk_data = (uint64_t)(uintptr_t)&flags,
},
};
uint64_t chunks_array[3] = {
(uint64_t)(uintptr_t)&chunks[0],
(uint64_t)(uintptr_t)&chunks[1],
(uint64_t)(uintptr_t)&chunks[2],
};
struct drm_radeon_cs cs = {
.num_chunks = 3,
.cs_id = 0,
.chunks = (uint64_t)(uintptr_t)chunks_array,
.gart_limit = 0,
.vram_limit = 0,
};
ret = drmCommandWriteRead(fd, DRM_RADEON_CS, &cs, (sizeof (struct drm_radeon_cs)));
if (ret != 0) {
perror("drmCommandWriteRead(DRM_RADEON_CS)");
}
struct drm_radeon_gem_wait_idle args = {
.handle = flush_handle
};
while (drmCommandWrite(fd, DRM_RADEON_GEM_WAIT_IDLE, &args, (sizeof (struct drm_radeon_gem_wait_idle))) == -EBUSY);
struct drm_radeon_gem_mmap mmap_args = {
.handle = colorbuffer_handle,
.offset = 0,
.size = 1600 * 1200 * 4,
};
ret = drmCommandWriteRead(fd, DRM_RADEON_GEM_MMAP, &mmap_args, (sizeof (struct drm_radeon_gem_mmap)));
if (ret != 0) {
perror("drmCommandWriteRead(DRM_RADEON_GEM_MMAP)");
}
void * ptr;
ptr = mmap(0, mmap_args.size, PROT_READ|PROT_WRITE, MAP_SHARED,
fd, mmap_args.addr_ptr);
int out_fd = open("colorbuffer.data", O_RDWR|O_CREAT);
assert(out_fd >= 0);
ssize_t write_length = write(out_fd, ptr, mmap_args.size);
assert(write_length == mmap_args.size);
close(out_fd);
munmap(ptr, mmap_args.size);
close(fd);
}

View File

@ -1,8 +1,5 @@
import sys
with open(sys.argv[1]) as f:
lines = f.read().split('\n')
def parse_reg_value(value):
if '-' in value:
start, end = value.split('-')
@ -19,9 +16,6 @@ def reg_name_value(lines):
for value in reg_value:
yield reg_name, parse_reg_value(value.removesuffix(','))
#for reg_name, reg_value in reg_name_value(lines):
# print("#define", reg_name, reg_value)
aos_order_table = [
"VTX_AOS_ATTR01",
"VTX_AOS_ADDR0",
@ -37,7 +31,7 @@ aos_order_table = [
"VTX_AOS_ADDR7",
"VTX_AOS_ATTR89",
"VTX_AOS_ADDR8",
"VTX_AOS_ADDR8",
"VTX_AOS_ADDR9",
"VTX_AOS_ATTR1011",
"VTX_AOS_ADDR10",
"VTX_AOS_ADDR11",
@ -49,45 +43,78 @@ aos_order_table = [
"VTX_AOS_ADDR15",
]
print("registers_lookup = {")
for reg_name, reg_value in reg_name_value(lines):
if type(reg_value) is int:
print(f' 0x{reg_value:04x}: "{reg_name}",')
else:
start, end = reg_value
if '[' not in reg_name:
offset = start
while offset <= end:
ix = (offset - start) // 4
print(f' 0x{offset:04x}: "{reg_name}[{ix}]",')
offset += 4
def generate(lines, callback, callback_array):
for reg_name, reg_value in reg_name_value(lines):
if type(reg_value) is int:
callback(reg_value, reg_name)
else:
reg_basename = reg_name.split('[')[0]
index_range = reg_name.split('[')[1].split(']')[0]
start_ix, end_ix = map(int, index_range.split('-'))
start, end = reg_value
if '[' not in reg_name:
offset = start
while offset <= end:
ix = (offset - start) // 4
callback_array(offset, reg_name, ix)
offset += 4
else:
reg_basename = reg_name.split('[')[0]
index_range = reg_name.split('[')[1].split(']')[0]
reg_tail = reg_name.split(']')[1]
start_ix, end_ix = map(int, index_range.split('-'))
offset_increment = 4
offset_increment = 4
if (end - start) // 4 != end_ix - start_ix:
# guess the offset increment
if start + end_ix * 8 == end:
offset_increment = 8
elif start + end_ix * 16 == end:
offset_increment = 16
elif reg_basename == 'VAP_VTX_AOS_ADDR':
pass
elif reg_basename == 'VAP_VTX_AOS_ATTR':
for i, name in enumerate(aos_order_table):
print(f' 0x{i * 4 + start}: "{name}",')
continue
else:
print(reg_name, reg_value)
assert False
if (end - start) // 4 != end_ix - start_ix:
# guess the offset increment
if start + end_ix * 8 == end:
offset_increment = 8
elif start + end_ix * 16 == end:
offset_increment = 16
elif reg_basename == 'VAP_VTX_AOS_ADDR':
pass
elif reg_basename == 'VAP_VTX_AOS_ATTR':
for i, name in enumerate(aos_order_table):
callback(i * 4 + start, name)
continue
else:
print(reg_name, reg_value)
assert False
offset = start
while offset <= end:
ix = (offset - start) // offset_increment
print(f' 0x{offset:04x}: "{reg_basename}{ix}",')
offset += offset_increment
offset = start
while offset <= end:
ix = (offset - start) // offset_increment
name = f"{reg_basename}{ix}{reg_tail}"
callback(offset, name)
offset += offset_increment
print('}')
def python_callback_array(offset, name, ix):
print(f' 0x{offset:04x}: "{name}[{ix}]",')
def python_callback(offset, name):
print(f' 0x{offset:04x}: "{name}",')
def generate_python(lines):
print("registers_lookup = {")
generate(lines, python_callback, python_callback_array)
print('}')
def c_callback(offset, name):
print(f"#define {name} 0x{offset:04x}")
def c_callback_array(offset, name, ix):
print(f"#define {name}_{ix} 0x{offset:04x}")
def generate_c(lines):
generate(lines, c_callback, c_callback_array)
mode = sys.argv[1]
filename = sys.argv[2]
with open(filename) as f:
lines = f.read().split('\n')
if mode == "python":
generate_python(lines)
elif mode == "c":
generate_c(lines)
else:
assert False, mode

View File

@ -3,5 +3,4 @@ set -eux
python parse_pvs.py PVS_DST pvs_opcode_and_destination_operand.txt > pvs_dst.py
python parse_pvs.py PVS_SRC pvs_source_operand.txt > pvs_src.py
python parse_pvs_bits.py pvs_opcode_and_destination_operand_bits.txt > pvs_dst_bits.py
python 3d_registers.py 3d_registers.txt > registers_lookup.py
python 3d_registers.py python 3d_registers.txt > registers_lookup.py