#include #include #include #include #include #include #include #include #include #include #include "3d_registers.h" #include "3d_registers_undocumented.h" #include "3d_registers_bits.h" #include "command_processor.h" static inline uint32_t rreg(void * rmmio, uint32_t offset) { uint32_t value = *((volatile uint32_t *)(((uintptr_t)rmmio) + offset)); asm volatile ("" ::: "memory"); return value; } static inline void wreg(void * rmmio, uint32_t offset, uint32_t value) { *((volatile uint32_t *)(((uintptr_t)rmmio) + offset)) = value; asm volatile ("" ::: "memory"); } union u32_f32 { uint32_t u32; float f32; }; static union u32_f32 ib[16384]; int indirect_buffer(float time) { int ix = 0; T0V(RB3D_DSTCACHE_CTLSTAT , RB3D_DSTCACHE_CTLSTAT__DC_FLUSH(0x2) // Flush dirty 3D data | RB3D_DSTCACHE_CTLSTAT__DC_FREE(0x2) // Free 3D tags ); T0V(ZB_ZCACHE_CTLSTAT , ZB_ZCACHE_CTLSTAT__ZC_FLUSH(1) | ZB_ZCACHE_CTLSTAT__ZC_FREE(1) ); T0V(WAIT_UNTIL, 0x00020000); T0V(GB_AA_CONFIG, 0x00000000); T0V(RB3D_AARESOLVE_CTL, 0x00000000); T0V(RB3D_CCTL , RB3D_CCTL__INDEPENDENT_COLORFORMAT_ENABLE(1) ); T0V(ZB_BW_CNTL, 0x00000000); T0V(ZB_DEPTHCLEARVALUE, 0x00000000); T0V(SC_HYPERZ_EN, 0x00000000); T0V(GB_Z_PEQ_CONFIG, 0x00000000); T0V(ZB_ZTOP , ZB_ZTOP__ZTOP(1) ); T0V(FG_ALPHA_FUNC, 0x00000000); T0V(ZB_CNTL, 0x00000000); T0V(ZB_ZSTENCILCNTL, 0x00000000); T0V(ZB_STENCILREFMASK, 0x00000000); T0V(ZB_STENCILREFMASK_BF, 0x00000000); T0V(FG_ALPHA_VALUE, 0x00000000); T0V(RB3D_ROPCNTL, 0x00000000); T0V(RB3D_BLENDCNTL, 0x00000000); T0V(RB3D_ABLENDCNTL, 0x00000000); T0V(RB3D_COLOR_CHANNEL_MASK , RB3D_COLOR_CHANNEL_MASK__BLUE_MASK(1) | RB3D_COLOR_CHANNEL_MASK__GREEN_MASK(1) | RB3D_COLOR_CHANNEL_MASK__RED_MASK(1) | RB3D_COLOR_CHANNEL_MASK__ALPHA_MASK(1) ); T0V(RB3D_DITHER_CTL, 0x00000000); T0V(RB3D_CONSTANT_COLOR_AR, 0x00000000); T0V(RB3D_CONSTANT_COLOR_GB, 0x00000000); T0V(SC_CLIP_0_A, 0x00000000); T0V(SC_CLIP_0_B, 0xffffffff); T0V(SC_SCREENDOOR, 0x00ffffff); T0V(GB_SELECT, 0x00000000); T0V(FG_FOG_BLEND, 0x00000000); T0V(GA_OFFSET, 0x00000000); T0V(SU_TEX_WRAP, 0x00000000); T0Vf(SU_DEPTH_SCALE, 16777215.0f); T0V(SU_DEPTH_OFFSET, 0x00000000); T0V(SC_EDGERULE , SC_EDGERULE__ER_TRI(5) // L-in,R-out,HT-in,HB-in | SC_EDGERULE__ER_POINT(9) // L-out,R-in,HT-in,HB-out | SC_EDGERULE__ER_LINE_LR(5) // L-in,R-out,HT-in,HB-out | SC_EDGERULE__ER_LINE_RL(9) // L-out,R-in,HT-in,HB-out | SC_EDGERULE__ER_LINE_TB(26) // T-in,B-out,VL-out,VR-in | SC_EDGERULE__ER_LINE_BT(22) // T-out,B-in,VL-out,VR-in ); T0V(RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD , RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD__BLUE(1) | RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD__GREEN(1) | RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD__RED(1) | RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD__ALPHA(1) ); T0V(RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD , RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD__BLUE(254) | RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD__GREEN(254) | RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD__RED(254) | RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD__ALPHA(254) ); T0V(GA_COLOR_CONTROL_PS3, 0x00000000); T0V(SU_TEX_WRAP_PS3, 0x00000000); T0V(VAP_PVS_VTX_TIMEOUT_REG , VAP_PVS_VTX_TIMEOUT_REG__CLK_COUNT(0xffff) ); T0Vf(VAP_GB_VERT_CLIP_ADJ, 1.0f); T0Vf(VAP_GB_VERT_DISC_ADJ, 1.0f); T0Vf(VAP_GB_HORZ_CLIP_ADJ, 1.0f); T0Vf(VAP_GB_HORZ_DISC_ADJ, 1.0f); T0V(VAP_PSC_SGN_NORM_CNTL , VAP_PSC_SGN_NORM_CNTL__SGN_NORM_METHOD_0(2) | VAP_PSC_SGN_NORM_CNTL__SGN_NORM_METHOD_1(2) | VAP_PSC_SGN_NORM_CNTL__SGN_NORM_METHOD_2(2) | VAP_PSC_SGN_NORM_CNTL__SGN_NORM_METHOD_3(2) | VAP_PSC_SGN_NORM_CNTL__SGN_NORM_METHOD_4(2) | VAP_PSC_SGN_NORM_CNTL__SGN_NORM_METHOD_5(2) | VAP_PSC_SGN_NORM_CNTL__SGN_NORM_METHOD_6(2) | VAP_PSC_SGN_NORM_CNTL__SGN_NORM_METHOD_7(2) | VAP_PSC_SGN_NORM_CNTL__SGN_NORM_METHOD_8(2) | VAP_PSC_SGN_NORM_CNTL__SGN_NORM_METHOD_9(2) | VAP_PSC_SGN_NORM_CNTL__SGN_NORM_METHOD_10(2) | VAP_PSC_SGN_NORM_CNTL__SGN_NORM_METHOD_11(2) | VAP_PSC_SGN_NORM_CNTL__SGN_NORM_METHOD_12(2) | VAP_PSC_SGN_NORM_CNTL__SGN_NORM_METHOD_13(2) | VAP_PSC_SGN_NORM_CNTL__SGN_NORM_METHOD_14(2) | VAP_PSC_SGN_NORM_CNTL__SGN_NORM_METHOD_15(2) ); T0V(VAP_TEX_TO_COLOR_CNTL, 0x00000000); T0V(VAP_CNTL , VAP_CNTL__PVS_NUM_SLOTS(10) | VAP_CNTL__PVS_NUM_CNTLRS(5) | VAP_CNTL__PVS_NUM_FPUS(5) | VAP_CNTL__VAP_NO_RENDER(0) | VAP_CNTL__VF_MAX_VTX_NUM(12) | VAP_CNTL__DX_CLIP_SPACE_DEF(0) | VAP_CNTL__TCL_STATE_OPTIMIZATION(1) ); T0V(VAP_PVS_FLOW_CNTL_OPC, 0x00000000); T0(VAP_PVS_FLOW_CNTL_ADDRS_LW_0, 31); for (int i = 0; i < 32; i++) ib[ix++].u32 = 0x00000000; T0(VAP_PVS_FLOW_CNTL_LOOP_INDEX_0, 15); for (int i = 0; i < 16; i++) ib[ix++].u32 = 0x00000000; T0V(VAP_PVS_VECTOR_INDX_REG, 0x00000600); T0_ONE_REG(VAP_PVS_VECTOR_DATA_REG_128, 23); for (int i = 0; i < 24; i++) ib[ix++].u32 = 0x00000000; T0V(VAP_VTX_STATE_CNTL , VAP_VTX_STATE_CNTL__COLOR_0_ASSEMBLY_CNTL(1) | VAP_VTX_STATE_CNTL__COLOR_1_ASSEMBLY_CNTL(1) | VAP_VTX_STATE_CNTL__COLOR_2_ASSEMBLY_CNTL(1) | VAP_VTX_STATE_CNTL__COLOR_3_ASSEMBLY_CNTL(1) | VAP_VTX_STATE_CNTL__COLOR_4_ASSEMBLY_CNTL(1) | VAP_VTX_STATE_CNTL__COLOR_5_ASSEMBLY_CNTL(1) | VAP_VTX_STATE_CNTL__COLOR_6_ASSEMBLY_CNTL(1) | VAP_VTX_STATE_CNTL__COLOR_7_ASSEMBLY_CNTL(1) | VAP_VTX_STATE_CNTL__UPDATE_USER_COLOR_0_ENA(0) ); T0V(GB_ENABLE, 0x00000000); T0V(VAP_CNTL_STATUS, 0x00000000); T0V(VAP_CLIP_CNTL , VAP_CLIP_CNTL__PS_UCP_MODE(3) ); T0V(GA_POINT_SIZE , GA_POINT_SIZE__HEIGHT(6) | GA_POINT_SIZE__WIDTH(6) ); T0V(GA_POINT_MINMAX , GA_POINT_MINMAX__MIN_SIZE(6) | GA_POINT_MINMAX__MAX_SIZE(6) ); T0V(GA_LINE_CNTL , GA_LINE_CNTL__WIDTH(6) | GA_LINE_CNTL__END_TYPE(2) | GA_LINE_CNTL__SORT(0) ); T0V(SU_POLY_OFFSET_ENABLE, 0x00000000); T0V(SU_CULL_MODE, 0x00000000); T0V(GA_LINE_STIPPLE_CONFIG, 0x00000000); T0V(GA_LINE_STIPPLE_VALUE, 0x00000000); T0V(GA_POLY_MODE, 0x00000000); T0V(GA_ROUND_MODE , GA_ROUND_MODE__GEOMETRY_ROUND(1) | GA_ROUND_MODE__COLOR_ROUND(0) | GA_ROUND_MODE__RGB_CLAMP(1) | GA_ROUND_MODE__ALPHA_CLAMP(1) | GA_ROUND_MODE__GEOMETRY_MASK(0) ); T0V(SC_CLIP_RULE , SC_CLIP_RULE__CLIP_RULE(0xffff)); T0Vf(GA_POINT_S0, 0.0f); T0Vf(GA_POINT_T0, 1.0f); T0Vf(GA_POINT_S1, 1.0f); T0Vf(GA_POINT_T1, 0.0f); T0V(US_OUT_FMT_0 , US_OUT_FMT__OUT_FMT(0) // C4_8 | US_OUT_FMT__C0_SEL(3) // Blue | US_OUT_FMT__C1_SEL(2) // Green | US_OUT_FMT__C2_SEL(1) // Red | US_OUT_FMT__C3_SEL(0) // Alpha | US_OUT_FMT__OUT_SIGN(0) ); T0V(US_OUT_FMT_1 , US_OUT_FMT__OUT_FMT(15) // render target is not used ); T0V(US_OUT_FMT_2 , US_OUT_FMT__OUT_FMT(15) // render target is not used ); T0V(US_OUT_FMT_2 , US_OUT_FMT__OUT_FMT(15) // render target is not used ); T0V(GB_MSPOS0 , GB_MSPOS0__MS_X0(6) | GB_MSPOS0__MS_Y0(6) | GB_MSPOS0__MS_X1(6) | GB_MSPOS0__MS_Y1(6) | GB_MSPOS0__MS_X2(6) | GB_MSPOS0__MS_Y2(6) | GB_MSPOS0__MSBD0_Y(6) | GB_MSPOS0__MSBD0_X(6) ); T0V(GB_MSPOS1 , GB_MSPOS1__MS_X3(6) | GB_MSPOS1__MS_Y3(6) | GB_MSPOS1__MS_X4(6) | GB_MSPOS1__MS_Y4(6) | GB_MSPOS1__MS_X5(6) | GB_MSPOS1__MS_Y5(6) | GB_MSPOS1__MSBD1(6) ); T0V(US_CONFIG , US_CONFIG__ZERO_TIMES_ANYTHING_EQUALS_ZERO(1) ); T0V(US_FC_CTRL, 0); T0V(FG_DEPTH_SRC, 0x00000000); T0V(US_W_FMT, 0x00000000); T0V(TX_INVALTAGS, 0x00000000); T0V(TX_ENABLE, 0x00000000); T0V(VAP_INDEX_OFFSET, 0x00000000); T0V(GA_COLOR_CONTROL , GA_COLOR_CONTROL__RGB0_SHADING(2) | GA_COLOR_CONTROL__ALPHA0_SHADING(2) | GA_COLOR_CONTROL__RGB1_SHADING(2) | GA_COLOR_CONTROL__ALPHA1_SHADING(2) | GA_COLOR_CONTROL__RGB2_SHADING(2) | GA_COLOR_CONTROL__ALPHA2_SHADING(2) | GA_COLOR_CONTROL__RGB3_SHADING(2) | GA_COLOR_CONTROL__ALPHA3_SHADING(2) | GA_COLOR_CONTROL__PROVOKING_VERTEX(3) ); ////////////////////////////////////////////////////////////////////////////// // CB ////////////////////////////////////////////////////////////////////////////// T0V(RB3D_COLOROFFSET0 , 0x00000000 // value replaced by kernel from relocs ); T3(_NOP, 0); ib[ix++].u32 = 0 * 4; // index into relocs array T0V(RB3D_COLORPITCH0 , RB3D_COLORPITCH__COLORPITCH(1600 >> 1) | RB3D_COLORPITCH__COLORFORMAT(6) // ARGB8888 ); // The COLORPITCH NOP is ignored/not applied due to // RADEON_CS_KEEP_TILING_FLAGS, but is still required. T3(_NOP, 0); ib[ix++].u32 = 0 * 4; // index into relocs array ////////////////////////////////////////////////////////////////////////////// // SC ////////////////////////////////////////////////////////////////////////////// T0V(SC_SCISSOR0 , SC_SCISSOR0__XS0(0) | SC_SCISSOR0__YS0(0) ); T0V(SC_SCISSOR1 , SC_SCISSOR1__XS1(800 - 1) | SC_SCISSOR1__YS1(600 - 1) ); ////////////////////////////////////////////////////////////////////////////// // VAP ////////////////////////////////////////////////////////////////////////////// T0Vf(VAP_VPORT_XSCALE, 400.0f); T0Vf(VAP_VPORT_XOFFSET, 400.0f); T0Vf(VAP_VPORT_YSCALE, -300.0f); T0Vf(VAP_VPORT_YOFFSET, 300.0f); T0Vf(VAP_VPORT_ZSCALE, 0.5f); T0Vf(VAP_VPORT_ZOFFSET, 0.5f); T0V(VAP_VTE_CNTL , VAP_VTE_CNTL__VPORT_X_SCALE_ENA(1) | VAP_VTE_CNTL__VPORT_X_OFFSET_ENA(1) | VAP_VTE_CNTL__VPORT_Y_SCALE_ENA(1) | VAP_VTE_CNTL__VPORT_Y_OFFSET_ENA(1) | VAP_VTE_CNTL__VPORT_Z_SCALE_ENA(1) | VAP_VTE_CNTL__VPORT_Z_OFFSET_ENA(1) | VAP_VTE_CNTL__VTX_XY_FMT(0) | VAP_VTE_CNTL__VTX_Z_FMT(0) | VAP_VTE_CNTL__VTX_W0_FMT(1) | VAP_VTE_CNTL__SERIAL_PROC_ENA(0) ); T0V(VAP_VF_MAX_VTX_INDX , VAP_VF_MAX_VTX_INDX__MAX_INDX(5) ); T0V(VAP_VF_MIN_VTX_INDX , VAP_VF_MIN_VTX_INDX__MIN_INDX(0) ); T0V(VAP_VTX_SIZE , VAP_VTX_SIZE__DWORDS_PER_VTX(3) ); T0V(VAP_PROG_STREAM_CNTL_0 , VAP_PROG_STREAM_CNTL__DATA_TYPE_0__FLOAT_3 | VAP_PROG_STREAM_CNTL__SKIP_DWORDS_0(0) | VAP_PROG_STREAM_CNTL__DST_VEC_LOC_0(0) | VAP_PROG_STREAM_CNTL__LAST_VEC_0(1) ); T0V(VAP_PROG_STREAM_CNTL_EXT_0 , VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_X_0(0) | VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_Y_0(1) | VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_Z_0(2) | VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_W_0(5) | VAP_PROG_STREAM_CNTL_EXT__WRITE_ENA_0(15) ); T0V(VAP_VSM_VTX_ASSM, 0x00000005); // undocumented T0V(VAP_OUT_VTX_FMT_0 , VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1) | VAP_OUT_VTX_FMT_0__VTX_COLOR_0_PRESENT(1) ); T0V(VAP_OUT_VTX_FMT_1 , 0 ); ////////////////////////////////////////////////////////////////////////////// // VAP_PVS ////////////////////////////////////////////////////////////////////////////// // vertex constants T0V(VAP_PVS_CONST_CNTL , VAP_PVS_CONST_CNTL__PVS_CONST_BASE_OFFSET(0) | VAP_PVS_CONST_CNTL__PVS_MAX_CONST_ADDR(1) ); T0V(VAP_PVS_VECTOR_INDX_REG , VAP_PVS_VECTOR_INDX_REG__OCTWORD_OFFSET(1024) ); const float vertex_consts[] = { 4.0f / 3.0f, 0.0f, 0.0f, 0.0f, }; int vertex_consts_length = (sizeof (vertex_consts)) / (sizeof (vertex_consts[0])); T0_ONE_REG(VAP_PVS_VECTOR_DATA_REG_128, (vertex_consts_length - 1)); for (int i = 0; i < vertex_consts_length; i++) ib[ix++].f32 = vertex_consts[i]; // vertex code T0V(VAP_PVS_CODE_CNTL_0 , VAP_PVS_CODE_CNTL_0__PVS_FIRST_INST(0) | VAP_PVS_CODE_CNTL_0__PVS_XYZW_VALID_INST(1) | VAP_PVS_CODE_CNTL_0__PVS_LAST_INST(1) ); T0V(VAP_PVS_CODE_CNTL_1 , VAP_PVS_CODE_CNTL_1__PVS_LAST_VTX_SRC_INST(1) ); T0V(VAP_PVS_VECTOR_INDX_REG , VAP_PVS_VECTOR_INDX_REG__OCTWORD_OFFSET(0) ); const uint32_t vertex_shader[] = { #include "shadertoy.vs.inc" }; const int vertex_shader_length = (sizeof (vertex_shader)) / (sizeof (vertex_shader[0])); assert(vertex_shader_length % 4 == 0); //printf("vs length %d\n", vertex_shader_length); T0_ONE_REG(VAP_PVS_VECTOR_DATA_REG_128, vertex_shader_length - 1); for (int i = 0; i < vertex_shader_length; i++) { ib[ix++].u32 = vertex_shader[i]; } T0V(VAP_PVS_STATE_FLUSH_REG, 0x00000000); ////////////////////////////////////////////////////////////////////////////// // RS ////////////////////////////////////////////////////////////////////////////// T0V(RS_IP_0, 0); T0V(RS_COUNT , RS_COUNT__IT_COUNT(0) | RS_COUNT__IC_COUNT(1) | RS_COUNT__W_ADDR(0) | RS_COUNT__HIRES_EN(1) ); T0V(RS_INST_0 , RS_INST__TEX_ID(0) | RS_INST__TEX_CN(0) | RS_INST__TEX_ADDR(0) | RS_INST__COL_ID(0) | RS_INST__COL_CN(1) | RS_INST__COL_ADDR(0) | RS_INST__TEX_ADJ(0) | RS_INST__W_CN(0) ); T0V(RS_INST_COUNT, 0x00000000); ////////////////////////////////////////////////////////////////////////////// // GA_US ////////////////////////////////////////////////////////////////////////////// // fragment constants #define PI (3.14159274101257324219f) #define PI_2 (PI * 2.0f) #define I_PI_2 (1.0f / (PI_2)) const float fragment_consts[] = { time, 1.2, 0.01, 0.4, PI_2, I_PI_2, 0, 0, 0.25, 0.40625, 0.5625, 0, }; int fragment_consts_length = (sizeof (fragment_consts)) / (sizeof (fragment_consts[0])); assert(fragment_consts_length % 4 == 0); T0V(GA_US_VECTOR_INDEX , GA_US_VECTOR_INDEX__INDEX(0) | GA_US_VECTOR_INDEX__TYPE(1) ); T0_ONE_REG(GA_US_VECTOR_DATA, (fragment_consts_length - 1)); for (int i = 0; i < fragment_consts_length; i++) ib[ix++].f32 = fragment_consts[i]; // fragment code const uint32_t fragment_shader[] = { #include "shadertoy_palette_fractal.fs.inc" }; const int fragment_shader_length = (sizeof (fragment_shader)) / (sizeof (fragment_shader[0])); assert(fragment_shader_length % 6 == 0); //printf("fs length %d\n", fragment_shader_length); const int fragment_shader_instructions = fragment_shader_length / 6; //printf("fs instructions %d\n", fragment_shader_instructions); T0V(US_PIXSIZE , US_PIXSIZE__PIX_SIZE(3) // pixel shader stack frame size ); T0V(US_CODE_RANGE , US_CODE_RANGE__CODE_ADDR(0) | US_CODE_RANGE__CODE_SIZE(fragment_shader_instructions - 1) ); T0V(US_CODE_OFFSET , US_CODE_OFFSET__OFFSET_ADDR(0) ); T0V(US_CODE_ADDR , US_CODE_ADDR__START_ADDR(0) | US_CODE_ADDR__END_ADDR(fragment_shader_instructions - 1) ); T0V(GA_US_VECTOR_INDEX , GA_US_VECTOR_INDEX__INDEX(0) | GA_US_VECTOR_INDEX__TYPE(0) ); T0_ONE_REG(GA_US_VECTOR_DATA, fragment_shader_length - 1); for (int i = 0; i < fragment_shader_length; i++) { ib[ix++].u32 = fragment_shader[i]; } ////////////////////////////////////////////////////////////////////////////// // 3D_DRAW ////////////////////////////////////////////////////////////////////////////// const float vertices[] = { // position -1.0f, -1.0f, 0.0f, 1.0f, -1.0f, 0.0f, 1.0f, 1.0f, 0.0f, -1.0f, -1.0f, 0.0f, 1.0f, 1.0f, 0.0f, -1.0f, 1.0f, 0.0f }; const int vertices_length = (sizeof (vertices)) / (sizeof (vertices[0])); //printf("vtx length %d\n", vertices_length); T3(_3D_DRAW_IMMD_2, (1 + vertices_length) - 1); ib[ix++].u32 = VAP_VF_CNTL__PRIM_TYPE(4) | VAP_VF_CNTL__PRIM_WALK(3) | VAP_VF_CNTL__INDEX_SIZE(0) | VAP_VF_CNTL__VTX_REUSE_DIS(0) | VAP_VF_CNTL__DUAL_INDEX_MODE(0) | VAP_VF_CNTL__USE_ALT_NUM_VERTS(0) | VAP_VF_CNTL__NUM_VERTICES(6) ; for (int i = 0; i < vertices_length; i++) { ib[ix++].f32 = vertices[i]; } ////////////////////////////////////////////////////////////////////////////// // padding ////////////////////////////////////////////////////////////////////////////// while ((ix % 8) != 0) { ib[ix++].u32 = 0x80000000; } return ix; } int create_colorbuffer(int fd, int colorbuffer_size) { int ret; struct drm_radeon_gem_create args = { .size = colorbuffer_size, .alignment = 4096, .handle = 0, .initial_domain = 4, // RADEON_GEM_DOMAIN_VRAM .flags = 4 }; ret = drmCommandWriteRead(fd, DRM_RADEON_GEM_CREATE, &args, (sizeof (struct drm_radeon_gem_create))); if (ret != 0) { perror("drmCommandWriteRead(DRM_RADEON_GEM_CREATE)"); } assert(args.handle != 0); struct drm_radeon_gem_mmap mmap_args = { .handle = args.handle, .offset = 0, .size = colorbuffer_size, }; ret = drmCommandWriteRead(fd, DRM_RADEON_GEM_MMAP, &mmap_args, (sizeof (struct drm_radeon_gem_mmap))); if (ret != 0) { perror("drmCommandWriteRead(DRM_RADEON_GEM_MMAP)"); } void * ptr = mmap(0, colorbuffer_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, mmap_args.addr_ptr); assert(ptr != MAP_FAILED); // clear colorbuffer for (int i = 0; i < colorbuffer_size / 4; i++) { ((uint32_t*)ptr)[i] = 0x00000000; } asm volatile ("" ::: "memory"); munmap(ptr, colorbuffer_size); return args.handle; } int main() { ////////////////////////////////////////////////////////////////////////////// // PCI resource0 ////////////////////////////////////////////////////////////////////////////// const char * resource2_path = "/sys/bus/pci/devices/0000:01:00.0/resource2"; int resource2_fd = open(resource2_path, O_RDWR | O_SYNC); assert(resource2_fd >= 0); uint32_t resource2_size = 0x10000; void * resource2_base = mmap(0, resource2_size, PROT_READ | PROT_WRITE, MAP_SHARED, resource2_fd, 0); assert(resource2_base != MAP_FAILED); void * rmmio = resource2_base; ////////////////////////////////////////////////////////////////////////////// // DRI card0 ////////////////////////////////////////////////////////////////////////////// int ret; int fd = open("/dev/dri/card0", O_RDWR | O_CLOEXEC); const int colorbuffer_size = 1600 * 1200 * 4; int colorbuffer_handle[2]; int flush_handle; // colorbuffer colorbuffer_handle[0] = create_colorbuffer(fd, colorbuffer_size); colorbuffer_handle[1] = create_colorbuffer(fd, colorbuffer_size); fprintf(stderr, "colorbuffer handle[0] %d\n", colorbuffer_handle[0]); fprintf(stderr, "colorbuffer handle[1] %d\n", colorbuffer_handle[1]); // flush { struct drm_radeon_gem_create args = { .size = 4096, .alignment = 4096, .handle = 0, .initial_domain = 2, // GTT .flags = 0 }; ret = drmCommandWriteRead(fd, DRM_RADEON_GEM_CREATE, &args, (sizeof (args))); if (ret != 0) { perror("drmCommandWriteRead(DRM_RADEON_GEM_CREATE)"); } assert(args.handle != 0); flush_handle = args.handle; } uint32_t flags[2] = { 5, // RADEON_CS_KEEP_TILING_FLAGS | RADEON_CS_END_OF_FRAME 0, // RADEON_CS_RING_GFX }; float time = 0; int ib_dwords = indirect_buffer(time); int colorbuffer_ix = 0; while (true) { struct drm_radeon_cs_reloc relocs[] = { { .handle = colorbuffer_handle[colorbuffer_ix], .read_domains = 4, // RADEON_GEM_DOMAIN_VRAM .write_domain = 4, // RADEON_GEM_DOMAIN_VRAM .flags = 8, }, { .handle = flush_handle, .read_domains = 2, // RADEON_GEM_DOMAIN_GTT .write_domain = 2, // RADEON_GEM_DOMAIN_GTT .flags = 0, } }; struct drm_radeon_cs_chunk chunks[3] = { { .chunk_id = RADEON_CHUNK_ID_IB, .length_dw = ib_dwords, .chunk_data = (uint64_t)(uintptr_t)ib, }, { .chunk_id = RADEON_CHUNK_ID_RELOCS, .length_dw = (sizeof (relocs)) / (sizeof (uint32_t)), .chunk_data = (uint64_t)(uintptr_t)relocs, }, { .chunk_id = RADEON_CHUNK_ID_FLAGS, .length_dw = (sizeof (flags)) / (sizeof (uint32_t)), .chunk_data = (uint64_t)(uintptr_t)&flags, }, }; uint64_t chunks_array[3] = { (uint64_t)(uintptr_t)&chunks[0], (uint64_t)(uintptr_t)&chunks[1], (uint64_t)(uintptr_t)&chunks[2], }; struct drm_radeon_cs cs = { .num_chunks = 3, .cs_id = 0, .chunks = (uint64_t)(uintptr_t)chunks_array, .gart_limit = 0, .vram_limit = 0, }; ret = drmCommandWriteRead(fd, DRM_RADEON_CS, &cs, (sizeof (struct drm_radeon_cs))); if (ret != 0) { perror("drmCommandWriteRead(DRM_RADEON_CS)"); } #define D1CRTC_DOUBLE_BUFFER_CONTROL 0x60ec #define D1GRPH_PRIMARY_SURFACE_ADDRESS 0x6110 #define D1GRPH_UPDATE 0x6144 #define D1GRPH_UPDATE__D1GRPH_SURFACE_UPDATE_PENDING (1 << 2) uint32_t d1crtc_double_buffer_control = rreg(rmmio, D1CRTC_DOUBLE_BUFFER_CONTROL); //printf("D1CRTC_DOUBLE_BUFFER_CONTROL: %08x\n", d1crtc_double_buffer_control); assert(d1crtc_double_buffer_control == (1 << 8)); // addresses were retrieved from /sys/kernel/debug/radeon_vram_mm // // This assumes GEM buffer allocation always starts from the lowest // unallocated address. const uint32_t colorbuffer_addresses[2] = { 0x813000, 0xf66000, }; wreg(rmmio, D1GRPH_PRIMARY_SURFACE_ADDRESS, colorbuffer_addresses[colorbuffer_ix]); while ((rreg(rmmio, D1GRPH_UPDATE) & D1GRPH_UPDATE__D1GRPH_SURFACE_UPDATE_PENDING) != 0); // next state time += 0.01f; colorbuffer_ix = (colorbuffer_ix + 1) & 1; // next indirect buffer ib_dwords = indirect_buffer(time); } /* int out_fd = open("colorbuffer.data", O_RDWR|O_CREAT); assert(out_fd >= 0); ssize_t write_length = write(out_fd, colorbuffer_ptr, colorbuffer_size); assert(write_length == colorbuffer_size); close(out_fd); int mm_fd = open("/sys/kernel/debug/radeon_vram_mm", O_RDONLY); assert(mm_fd >= 0); char buf[4096]; while (true) { ssize_t read_length = read(mm_fd, buf, 4096); assert(read_length >= 0); write(STDOUT_FILENO, buf, read_length); if (read_length < 4096) { break; } } close(mm_fd); */ close(fd); }