From 18772f097697a32fd202bd7de6b2346f3795240b Mon Sep 17 00:00:00 2001 From: Zack Buhman Date: Tue, 7 Oct 2025 21:01:43 -0500 Subject: [PATCH] add triangle.c --- pci/3d_registers.h | 12 +- pci/3d_registers2.h | 21 +++ pci/main.c | 30 +++- pci/triangle.c | 275 ++++++++++++++++++++++++++++++++ pci/undocumented_3d_registers.h | 2 + 5 files changed, 329 insertions(+), 11 deletions(-) create mode 100644 pci/3d_registers2.h create mode 100644 pci/triangle.c diff --git a/pci/3d_registers.h b/pci/3d_registers.h index b15c1c2..ad48192 100644 --- a/pci/3d_registers.h +++ b/pci/3d_registers.h @@ -170,12 +170,12 @@ #define VAP_VF_CNTL 0x2084 #define VAP_VF_MAX_VTX_INDX 0x2134 #define VAP_VF_MIN_VTX_INDX 0x2138 -#define VAP_VPORT_XOFFSET 0x1d9c, -#define VAP_VPORT_XSCALE 0x1d98, -#define VAP_VPORT_YOFFSET 0x1da4, -#define VAP_VPORT_YSCALE 0x1da0, -#define VAP_VPORT_ZOFFSET 0x1dac, -#define VAP_VPORT_ZSCALE 0x1da8, +#define VAP_VPORT_XOFFSET 0x1d9c +#define VAP_VPORT_XSCALE 0x1d98 +#define VAP_VPORT_YOFFSET 0x1da4 +#define VAP_VPORT_YSCALE 0x1da0 +#define VAP_VPORT_ZOFFSET 0x1dac +#define VAP_VPORT_ZSCALE 0x1da8 #define VAP_VTE_CNTL 0x20b0 #define VAP_VTX_NUM_ARRAYS 0x20c0 #define VAP_VTX_SIZE 0x204b diff --git a/pci/3d_registers2.h b/pci/3d_registers2.h new file mode 100644 index 0000000..8ecf182 --- /dev/null +++ b/pci/3d_registers2.h @@ -0,0 +1,21 @@ +#define VAP_PVS_FLOW_CNTL_ADDRS_LW_0 0x2500 +#define VAP_PVS_FLOW_CNTL_ADDRS_UW_0 0x2504 +#define VAP_PVS_FLOW_CNTL_LOOP_INDEX_0 0x2290 + +#define RS_IP_0 0x4074 +#define RS_INST_0 0x4320 + +#define VAP_OUTPUT_VTX_FMT_0 0x2090 +#define VAP_OUTPUT_VTX_FMT_1 0x2094 + +#define VAP_PROG_STREAM_CNTL_0 0x2150 +#define VAP_PROG_STREAM_CNTL_EXT_0 0x21e0 + +#define US_OUT_FMT_0 0x46a4 +#define US_OUT_FMT_1 0x46a8 +#define US_OUT_FMT_2 0x46ac +#define US_OUT_FMT_3 0x46b0 + +#define RB3D_COLOROFFSET0 0x4e28 + +#define RB3D_COLORPITCH0 0x4e38 diff --git a/pci/main.c b/pci/main.c index 81c30d5..534e299 100644 --- a/pci/main.c +++ b/pci/main.c @@ -5,6 +5,7 @@ #include #include "3d_registers.h" +#include "3d_registers2.h" #include "undocumented_3d_registers.h" #define R500 "r500" @@ -64,6 +65,8 @@ static inline uint32_t rreg(void __iomem * rmmio, uint32_t reg) #define bswap32 __builtin_bswap32 +#include "triangle.c" + static const uint8_t _cp_data[] __attribute__((aligned (4))) = { 0x00,0x00,0x00,0x00,0x42,0x00,0xe0,0x00,0x00,0x00,0x00,0x00,0x40,0x00,0xe0,0x00, 0x00,0x00,0x00,0x08,0x00,0x00,0x00,0x99,0x00,0x00,0x00,0x08,0x00,0x00,0x00,0x9d, @@ -370,11 +373,28 @@ static int r500_probe(struct pci_dev *pdev, const struct pci_device_id *ent) mb(); wreg(rmmio, 0x1000, 0xdeadbeef); mb(); + for (int i = 0; i < 3; i++) { + wreg(rmmio, 0x1000, 0xc0001000); // NOP + mb(); + wreg(rmmio, 0x1000, 0); + mb(); + } + mdelay(1); scratch = rreg(rmmio, SCRATCH_REG0); printk(KERN_INFO "[r500] SCRATCH_REG0 2 %08x\n", scratch); + mdelay(100); + + triangle(rmmio); + + mdelay(100); + + RM(GA_IDLE); + RM(VAP_CNTL_STATUS); + RM(CP_STAT); + if (0) { wreg(rmmio, CP_CSQ_CNTL, 1); // Primary PIO, Indirect Disabled wreg(rmmio, CP_CSQ_MODE, (1 << 30) | (1 << 31)); // Primary PIO, Indirect Disabled @@ -403,12 +423,12 @@ static int r500_probe(struct pci_dev *pdev, const struct pci_device_id *ent) (void)rreg(rmmio, CP_RB_WPTR); udelay(500); + + uint32_t scratch_reg0_2 = rreg(rmmio, SCRATCH_REG0); + printk(KERN_INFO "[r500] SCRATCH_REG0 2 %08x\n", scratch_reg0_2); } - uint32_t scratch_reg0_2 = rreg(rmmio, SCRATCH_REG0); - printk(KERN_INFO "[r500] SCRATCH_REG0 2 %08x\n", scratch_reg0_2); - - if (1) { + if (0) { // GB_PIPE_SELECT // GB_TILE_CONFIG // GA_SOFT_RESET @@ -455,7 +475,7 @@ static int r500_probe(struct pci_dev *pdev, const struct pci_device_id *ent) printk(KERN_INFO "[r500] SCRATCH_REG0 3 %08x\n", scratch_reg03); } - if (0) { + if (1) { uint32_t tmp; tmp = rreg(rmmio, 0xf8); printk(KERN_INFO "CONFIG_MEMSIZE 0x%08x\n", tmp); diff --git a/pci/triangle.c b/pci/triangle.c new file mode 100644 index 0000000..b26135d --- /dev/null +++ b/pci/triangle.c @@ -0,0 +1,275 @@ +#define WM(offset, value) \ + do { \ + wreg(rmmio, offset, value); \ + mb(); \ + mdelay(1); \ + printk(KERN_INFO "[r500] %s %08x\n", #offset, value); \ + } while (0); + +#define RM(offset) \ + do { \ + printk(KERN_INFO "[r500] %s %08x\n", #offset, rreg(rmmio, offset)); \ + } while (0); + + +static void triangle(void __iomem * rmmio) +{ + // r300_emit_gpu_flush + WM(SC_SCISSOR0, 0); + uint32_t sc_scissor1 = ((1600 - 1) << 13) | ((1600 - 1) << 0); + WM(SC_SCISSOR1, sc_scissor1); + WM(RB3D_DSTCACHE_CTLSTAT, 0xa); // DC_FLUSH | DC_FREE + WM(ZB_ZCACHE_CTLSTAT, 0x3); // ZC_FLUSH | ZC_FREE + mb(); + mdelay(100); + + // r300_emit_aa_state + WM(GB_AA_CONFIG, 0); + WM(RB3D_AARESOLVE_CTL, 0); + + // r300_emit_fb_state + WM(RB3D_CCTL, 0x4000); // INDEPENDENT_COLORFORMAT_ENABLE + WM(RB3D_COLOROFFSET0, 0); + mb(); + uint32_t rb3d_colorpitch0 + = (1600 << 0) // COLORPITCH + | (1 << 16) // COLORTILE (is macrotiled) + | (6 << 21) // COLORFORMAT (ARGB8888) + ; + WM(RB3D_COLORPITCH0, rb3d_colorpitch0); + mb(); + WM(ZB_FORMAT, 2); // 24-bit integer, 8 bit stencil + WM(ZB_DEPTHOFFSET, 0); + mb(); + uint32_t zb_depthpitch + = (1600 << 0) + | (1 << 16) // DEPTHMACROTILE + | (0b01 << 17) // DEPTHMICROTILE (32 byte cache line is tiled) + ; + WM(ZB_DEPTHPITCH, zb_depthpitch); + mb(); + + // r300_emit_hyperz_state + WM(ZB_BW_CNTL, 0); + WM(ZB_DEPTHCLEARVALUE, 0); + WM(SC_HYPERZ_EN, 0x1c); // HZ_ADJ (add or subtract 1/2) + WM(GB_Z_PEQ_CONFIG, 0); + + // r300_emit_ztop_state + WM(ZB_ZTOP, 1); // Z is at top of pipe, after the scan unit + + // r300_emit_dsa_state + WM(FG_ALPHA_FUNC, 0); + WM(ZB_CNTL, 0); + WM(ZB_ZSTENCILCNTL, 0); + WM(ZB_STENCILREFMASK, 0); + WM(ZB_STENCILREFMASK_BF, 0); + WM(FG_ALPHA_VALUE, 0); + + // r300_emit_blend_state + WM(RB3D_ROPCNTL, 0); + WM(RB3D_BLENDCNTL, 0); + WM(RB3D_ABLENDCNTL, 0); + WM(RB3D_COLOR_CHANNEL_MASK, 0xf); // BLUE_MASK | GREEN_MASK | RED_MASK | ALPHA_MASK + WM(RB3D_DITHER_CTL, 0); + + // r300_emit_blend_color_state + WM(RB3D_CONSTANT_COLOR_AR, 0); + WM(RB3D_CONSTANT_COLOR_GB, 0); + + // r300_emit_scissor_state + WM(SC_CLIP_0_A, 0); + WM(SC_CLIP_0_B, 0x3ffffff); + + // r300_emit_sample_mask + WM(SC_SCREENDOOR, 0xffffff); + + // r300_emit_invariant_state + WM(GB_SELECT, 0); + WM(FG_FOG_BLEND, 0); + WM(GA_OFFSET, 0); + WM(SU_TEX_WRAP, 0); + WM(SU_DEPTH_SCALE, 0x4b7fffff); // 16777215.0f + WM(SU_DEPTH_OFFSET, 0); + WM(SC_EDGERULE, 0x2da49525); + WM(RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 0x1010101); + WM(RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD, 0xfefefefe); + WM(GA_COLOR_CONTROL_PS3, 0); + WM(SU_TEX_WRAP_PS3, 0); + + // r300_emit_viewport_state + WM(VAP_VPORT_XSCALE, 0x44160000); // 600.0f + WM(VAP_VPORT_XOFFSET, 0x44480000); // 800.0f + WM(VAP_VPORT_YSCALE, 0xc4160000); // -600.0f + WM(VAP_VPORT_YOFFSET, 0x44160000); // 600.0f + WM(VAP_VPORT_ZSCALE, 0x3f000000); // 0.5f + WM(VAP_VPORT_ZOFFSET, 0x3f000000); // 0.5f + + // r300_emit_pvs_flush + WM(VAP_PVS_STATE_FLUSH_REG, 0); + + // r300_emit_vap_invariant_state + WM(VAP_PVS_VTX_TIMEOUT_REG, 0xffff); + WM(VAP_GB_VERT_CLIP_ADJ, 0x3f800000); // 1.0f + WM(VAP_GB_VERT_DISC_ADJ, 0x3f800000); // 1.0f + WM(VAP_GB_HORZ_CLIP_ADJ, 0x3f800000); // 1.0f + WM(VAP_GB_HORZ_DISC_ADJ, 0x3f800000); // 1.0f + WM(VAP_PSC_SGN_NORM_CNTL, 0xaaaaaaaa); // SGN_NORM_NO_ZERO + WM(VAP_TEX_TO_COLOR_CNTL, 0); + + // r300_emit_vertex_stream_state + WM(VAP_PROG_STREAM_CNTL_0, 0x2002); // DATA_TYPE_0(FLOAT_3) | LAST_VEC_0 + /* + = SWIZZLE_SELECT_X_0(SELECT_X) + | SWIZZLE_SELECT_Y_0(SELECT_Y) + | SWIZZLE_SELECT_Z_0(SELECT_Z) + | SWIZZLE_SELECT_W_0(SELECT_FP_ONE) + | WRITE_ENA_0(XYZW) + */ + WM(VAP_PROG_STREAM_CNTL_EXT_0, 0xfa88); + + // r300_emit_vs_state + WM(VAP_PVS_CODE_CNTL_0, 0); + WM(VAP_PVS_CODE_CNTL_1, 0); + WM(VAP_PVS_VECTOR_INDX_REG, 0); + const uint32_t vertex_program[] = {0xf00203, 0xd10001, 0x1248001, 0x1248001}; + for (int i = 0; i < (sizeof (vertex_program)) / (sizeof (vertex_program[0])); i++) { + wreg(rmmio, VAP_PVS_VECTOR_DATA_REG_128, vertex_program[i]); + mb(); + } + + /* + = PVS_NUM_SLOTS(10) + | PVS_NUM_CNTLRS(5) + | PVS_NUM_FPUS(5) + | VF_MAX_VTX_NUM(12) + | TCL_STATE_OPTIMIZATION + */ + WM(VAP_CNTL, 0xb0055a); + WM(VAP_PVS_FLOW_CNTL_OPC, 0); + for (int i = 0; i < 16; i++) { + wreg(rmmio, VAP_PVS_FLOW_CNTL_ADDRS_LW_0 + i * 8, 0); + wreg(rmmio, VAP_PVS_FLOW_CNTL_ADDRS_UW_0 + i * 8, 0); + mb(); + } + for (int i = 0; i < 16; i++) { + wreg(rmmio, VAP_PVS_FLOW_CNTL_LOOP_INDEX_0 + i * 4, 0); + mb(); + } + + // r300_emit_clip_state + WM(VAP_PVS_VECTOR_INDX_REG, 0x600); + for (int i = 0; i < 128; i++) { + wreg(rmmio, VAP_PVS_VECTOR_DATA_REG_128, 0); + mb(); + } + + // r300_emit_rs_block_stat + WM(VAP_VTX_STATE_CNTL, 0x5555); // Select User Color 0 + WM(VAP_VSM_VTX_ASSM, 0x1); + WM(VAP_OUTPUT_VTX_FMT_0, 0x1); // output position vector + WM(VAP_OUTPUT_VTX_FMT_1, 0x4); // TEX_0_COMP_CNT(4) + WM(GB_ENABLE, 0); + WM(RS_IP_0, 0x30000000); // Zero components (0,0,0,1) + WM(RS_COUNT, 0x40080); // IC_COUNT(1) | HIRES_EN + WM(RS_INST_COUNT, 0); + WM(RS_INST_0, 0); + + // r300_emit_rs_state + WM(VAP_CNTL_STATUS, 0); + WM(VAP_CLIP_CNTL, 0xc000); // PS_UCP_MODE(3) + WM(GA_POINT_SIZE, 0x60006); + WM(GA_POINT_MINMAX, 0x60006); + WM(GA_LINE_CNTL, 0x20006); + WM(SU_POLY_OFFSET_ENABLE, 0); + WM(SU_CULL_MODE, 0); + WM(GA_LINE_STIPPLE_CONFIG, 0); + WM(GA_LINE_STIPPLE_VALUE, 0); + WM(GA_POLY_MODE, 0); + WM(GA_ROUND_MODE, 0x31); // round to nearest | RGB_CLAMP | ALPHA_CLAMP + WM(SC_CLIP_RULE, 0xffff); + WM(GA_POINT_S0, 0); // 0.0f + WM(GA_POINT_T0, 0x3f800000); // 1.0f + WM(GA_POINT_S1, 0x3f800000); // 1.0f + WM(GA_POINT_T1, 0); // 0.0f + + // r300_emit_fb_state_pipelined + // + WM(US_OUT_FMT_0, 0x1b00); // C4_8 | C0_SEL(Blue) | C1_SEL(Green) | C2_SEL(Red) | C3_SEL(Alpha) + WM(US_OUT_FMT_1, 0xf); // unused + WM(US_OUT_FMT_2, 0xf); // unused + WM(US_OUT_FMT_3, 0xf); // unused + WM(GB_MSPOS0, 0x66666666); + WM(GB_MSPOS0, 0x6666666); + + // r500_emit_fs + WM(US_CONFIG, 0x2); // Legacy behavior for shader model 1 + WM(US_PIXSIZE, 1); + WM(US_FC_CTRL, 0); + WM(US_CODE_RANGE, 0); + WM(US_CODE_OFFSET, 0); + WM(US_CODE_ADDR, 0); + WM(GA_US_VECTOR_INDEX, 0); + const uint32_t fragment_program[] = {0x78005, 0x8020080, 0x8020080, 0x1c9b04d8, 0x1c810003, 0x5}; + for (int i = 0; i < (sizeof (fragment_program)) / (sizeof (fragment_program[0])); i++) { + wreg(rmmio, GA_US_VECTOR_DATA, fragment_program[i]); + mb(); + } + WM(FG_DEPTH_SRC, 0); + WM(US_W_FMT, 0); + + // r300_emit_vs_constants + WM(VAP_PVS_CONST_CNTL, 0); + // r300_emit_texture_cache_inval + WM(TX_INVALTAGS, 0); + // r300_emit_textures_state + WM(TX_ENABLE, 0); + + // r500_emit_index_bias + WM(VAP_INDEX_OFFSET, 0); + // r300_emit_draw_init + WM(GA_COLOR_CONTROL, 0x3aaaa); // gouraud shading | provoking is always last vertex + WM(VAP_VF_MAX_VTX_INDX, 2); + WM(VAP_VF_MIN_VTX_INDX, 0); + + // r300_draw_arrays_immediate + WM(VAP_VTX_SIZE, 3); + + const float vertices[] = { + 0.5, -0.5, 0, + -0.5, -0.5, 0, + 0, 0.5, 0, + }; + +#define _3D_DRAW_IMMD_2 (0x35) +#define NOP (0x10) +#define TYPE_3_PACKET(opcode, count) \ + ((3 << 30) | ((count) << 16) | (opcode << 8)) + + printk(KERN_INFO "[r500] draw start\n"); + + wreg(rmmio, 0x1000, TYPE_3_PACKET(_3D_DRAW_IMMD_2, (3 * 3))); + mb(); + mdelay(1); + wreg(rmmio, 0x1000, 0x30034); // VAP_VF_CNTL + mb(); + mdelay(1); + const uint32_t * vertices_u32 = (const uint32_t *)vertices; + for (int i = 0; i < 9; i++) { + wreg(rmmio, 0x1000, vertices_u32[i]); + mb(); + mdelay(1); + } + wreg(rmmio, 0x1000, TYPE_3_PACKET(NOP, 4 - 1)); + mb(); + mdelay(1); + for (int i = 0; i < 4; i++) { + wreg(rmmio, 0x1000, 0); + mb(); + mdelay(1); + } + + printk(KERN_INFO "[r500] draw end\n"); + + WM(0x6110, 0xe0000000); +} diff --git a/pci/undocumented_3d_registers.h b/pci/undocumented_3d_registers.h index 2d7adb6..1c16fd2 100644 --- a/pci/undocumented_3d_registers.h +++ b/pci/undocumented_3d_registers.h @@ -16,3 +16,5 @@ #define RBBM_SOFT_RESET_IDCT (1 << 14) #define SCRATCH_REG0 0x15e0 + +#define VAP_VSM_VTX_ASSM 0x2184