Compare commits

...

28 Commits

Author SHA1 Message Date
614a1d4172 src: add matrix_cubesphere_indexed 2025-11-20 15:09:37 -06:00
443f0f4785 parse_packets: add support for printing the content of type 3 packets 2025-11-20 14:46:18 -06:00
687bc734d9 regs: add display registers, memory controller, pcie 2025-11-15 13:03:09 -06:00
bed4b640ad particle_oriented_animated: improve vertex shader constant comments 2025-11-12 23:19:15 -06:00
fdff78f1ad shadertoy: implement shadertoy_palette_fractal 2025-11-11 16:02:39 -06:00
9e281cba58 assembler/lexer: add support for #include directive 2025-11-11 15:06:34 -06:00
90b486e744 assembler.fs: add support for omod 2025-11-11 14:22:35 -06:00
399cd6aaf9 particle: add colored particle fragment shader 2025-11-10 18:27:46 -06:00
872f0c31a8 particle_oriented_animated_quad_vbuf*: increase particle count 2025-11-09 23:12:20 -06:00
3ebdfda196 particle_oriented_animated_quad_vbuf_pixel_shader: implement particle reset 2025-11-09 22:30:27 -06:00
314267afe1 particle_oriented_animated_quad_vbuf_pixel_shader: use vertex_buffer_copy shader 2025-11-09 21:33:35 -06:00
e622d769a4 particle_oriented_animated_quad_vbuf_pixel_shader: use drm_radeon_cs2 2025-11-09 21:02:37 -06:00
1ec48e6323 add tx_rt_float_4x 2025-11-09 20:39:19 -06:00
69904efe3f particle_oriented_animated_quad_vbuf*: correct vertex buffer size calculation 2025-11-09 19:44:28 -06:00
a4c6f29cb4 add particle_oriented_animated_quad_vbuf_pixel_shader (partially working) 2025-11-09 17:44:48 -06:00
57a62859f3 add particle_oriented_animated_quad_vbuf 2025-11-08 23:55:10 -06:00
e7d571fe6a particle_oriented_animated_fan: use AOS for particle drawing 2025-11-08 23:10:04 -06:00
36cd56a51a add particle_oriented_animated_fan 2025-11-08 21:04:48 -06:00
940b0cd43d add tx_rt_float 2025-11-08 17:45:35 -06:00
0272ee93d0 add tx_rt 2025-11-08 15:46:18 -06:00
c864717deb matrix_cubesphere_tiled: enable tiling 2025-11-08 14:43:10 -06:00
23cafcdb23 rename argb -> rgba 2025-11-08 14:28:49 -06:00
089b126523 matrix_cubesphere: check drm_radeon_cs return value 2025-11-08 14:18:39 -06:00
26800a6d40 src: add particle_oriented_animated 2025-11-06 19:52:10 -06:00
ef291567b8 src: add particle_oriented 2025-11-06 18:03:08 -06:00
6bfb5bdb63 src: add plane 2025-11-06 17:12:40 -06:00
38fa29ca10 matrix_cubesphere_tiled: draw macrotiled texture 2025-11-04 18:42:26 -06:00
0d4e80b03e add matrix_cubesphere_tiled (partially incomplete) 2025-11-04 18:40:05 -06:00
108 changed files with 16639 additions and 165 deletions

View File

@ -1018,7 +1018,7 @@ int main()
assert(texturebuffer_ptr != MAP_FAILED);
// copy texture
void * texture_buf = read_file("../texture/butterfly_1024x1024_argb8888.data");
void * texture_buf = read_file("../texture/butterfly_1024x1024_rgba8888.data");
assert(texture_buf != NULL);
for (int i = 0; i < texture_size / 4; i++) {

View File

@ -1105,7 +1105,7 @@ int main()
assert(texturebuffer_ptr != MAP_FAILED);
// copy texture
void * texture_buf = read_file("../texture/butterfly_1024x1024_argb8888.data");
void * texture_buf = read_file("../texture/butterfly_1024x1024_rgba8888.data");
assert(texture_buf != NULL);
for (int i = 0; i < texture_size / 4; i++) {

View File

@ -1126,7 +1126,7 @@ int main()
assert(texturebuffer_ptr != MAP_FAILED);
// copy texture
void * texture_buf = read_file("../texture/butterfly_1024x1024_argb8888.data");
void * texture_buf = read_file("../texture/butterfly_1024x1024_rgba8888.data");
assert(texture_buf != NULL);
for (int i = 0; i < texture_size / 4; i++) {

View File

@ -1188,7 +1188,7 @@ int main()
assert(texturebuffer_ptr != MAP_FAILED);
// copy texture
void * texture_buf = read_file("../texture/butterfly_1024x1024_argb8888.data");
void * texture_buf = read_file("../texture/butterfly_1024x1024_rgba8888.data");
assert(texture_buf != NULL);
for (int i = 0; i < texture_size / 4; i++) {

View File

@ -304,18 +304,18 @@ int indirect_buffer(float time)
| SC_SCISSOR0__YS0(0)
);
T0V(SC_SCISSOR1
, SC_SCISSOR1__XS1(1600 - 1)
| SC_SCISSOR1__YS1(1200 - 1)
, SC_SCISSOR1__XS1(800 - 1)
| SC_SCISSOR1__YS1(600 - 1)
);
//////////////////////////////////////////////////////////////////////////////
// VAP
//////////////////////////////////////////////////////////////////////////////
T0Vf(VAP_VPORT_XSCALE, 800.0f);
T0Vf(VAP_VPORT_XOFFSET, 800.0f);
T0Vf(VAP_VPORT_YSCALE, -600.0f);
T0Vf(VAP_VPORT_YOFFSET, 600.0f);
T0Vf(VAP_VPORT_XSCALE, 400.0f);
T0Vf(VAP_VPORT_XOFFSET, 400.0f);
T0Vf(VAP_VPORT_YSCALE, -300.0f);
T0Vf(VAP_VPORT_YOFFSET, 300.0f);
T0Vf(VAP_VPORT_ZSCALE, 0.5f);
T0Vf(VAP_VPORT_ZOFFSET, 0.5f);
@ -409,7 +409,7 @@ int indirect_buffer(float time)
};
const int vertex_shader_length = (sizeof (vertex_shader)) / (sizeof (vertex_shader[0]));
assert(vertex_shader_length % 4 == 0);
printf("vs length %d\n", vertex_shader_length);
//printf("vs length %d\n", vertex_shader_length);
T0_ONE_REG(VAP_PVS_VECTOR_DATA_REG_128, vertex_shader_length - 1);
for (int i = 0; i < vertex_shader_length; i++) {
@ -449,10 +449,17 @@ int indirect_buffer(float time)
// fragment constants
#define PI (3.14159274101257324219f)
#define PI_2 (PI * 2.0f)
#define I_PI_2 (1.0f / (PI_2))
const float fragment_consts[] = {
time, 0, 0, 0,
time, 1.2, 0.01, 0.4,
PI_2, I_PI_2, 0, 0,
0.25, 0.40625, 0.5625, 0,
};
int fragment_consts_length = (sizeof (fragment_consts)) / (sizeof (fragment_consts[0]));
assert(fragment_consts_length % 4 == 0);
T0V(GA_US_VECTOR_INDEX
, GA_US_VECTOR_INDEX__INDEX(0)
@ -465,16 +472,16 @@ int indirect_buffer(float time)
// fragment code
const uint32_t fragment_shader[] = {
#include "shadertoy_palette.fs.inc"
#include "shadertoy_palette_fractal.fs.inc"
};
const int fragment_shader_length = (sizeof (fragment_shader)) / (sizeof (fragment_shader[0]));
assert(fragment_shader_length % 6 == 0);
printf("fs length %d\n", fragment_shader_length);
//printf("fs length %d\n", fragment_shader_length);
const int fragment_shader_instructions = fragment_shader_length / 6;
printf("fs instructions %d\n", fragment_shader_instructions);
//printf("fs instructions %d\n", fragment_shader_instructions);
T0V(US_PIXSIZE
, US_PIXSIZE__PIX_SIZE(2) // pixel shader stack frame size
, US_PIXSIZE__PIX_SIZE(3) // pixel shader stack frame size
);
T0V(US_CODE_RANGE
@ -513,7 +520,7 @@ int indirect_buffer(float time)
-1.0f, 1.0f, 0.0f
};
const int vertices_length = (sizeof (vertices)) / (sizeof (vertices[0]));
printf("vtx length %d\n", vertices_length);
//printf("vtx length %d\n", vertices_length);
T3(_3D_DRAW_IMMD_2, (1 + vertices_length) - 1);
ib[ix++].u32
= VAP_VF_CNTL__PRIM_TYPE(4)
@ -708,7 +715,7 @@ int main()
#define D1GRPH_UPDATE__D1GRPH_SURFACE_UPDATE_PENDING (1 << 2)
uint32_t d1crtc_double_buffer_control = rreg(rmmio, D1CRTC_DOUBLE_BUFFER_CONTROL);
printf("D1CRTC_DOUBLE_BUFFER_CONTROL: %08x\n", d1crtc_double_buffer_control);
//printf("D1CRTC_DOUBLE_BUFFER_CONTROL: %08x\n", d1crtc_double_buffer_control);
assert(d1crtc_double_buffer_control == (1 << 8));
// addresses were retrieved from /sys/kernel/debug/radeon_vram_mm

View File

@ -0,0 +1,37 @@
-- CONST[0] = { time, 1.2, 0.01, 0.4 }
-- CONST[1] = { PI_2, I_PI_2, 0, 0 },
-- CONST[2] = { 0.25, 0.40625, 0.5625, 0 },
-- temp[0] : { uv0.xy , _, l }
-- temp[1] : { uv.xy , _, d }
-- temp[2] : final_color.xyzw
-- temp[3] : {col.xyz , i }
-- vec2 uv = uv0; // temp[1]
src0.rgb = temp[0] : -- uv0
temp[1].rg = MAX src0.rg_ src0.rg_ ;
-- vec4 final_color = vec4(0, 0, 0, 1);
:
temp[2].a = MAX src0.1 src0.1 ,
temp[2].rgb = MAX src0.000 src0.000 ;
-- i = 0;
:
temp[3].a = MAX src0.0 src0.0 ;
--------------------------------------------------------------------------------
-- loop start
--------------------------------------------------------------------------------
#include "shadertoy_palette_fractal_loop_inner.fs.asm"
#include "shadertoy_palette_fractal_loop_inner.fs.asm"
#include "shadertoy_palette_fractal_loop_inner.fs.asm"
#include "shadertoy_palette_fractal_loop_inner.fs.asm"
--------------------------------------------------------------------------------
-- loop end
--------------------------------------------------------------------------------
OUT TEX_SEM_WAIT
src0.rgb = temp[2] :
out[0].a = MAX src0.1 src0.1 ,
out[0].rgb = MAX src0.rgb src0.rgb ;

View File

@ -0,0 +1,924 @@
0x00001800,
0x08020000,
0x08020080,
0x00e40720,
0x00000000,
0x00000015,
0x00007800,
0x08020080,
0x08020080,
0x00920490,
0x00c18023,
0x00000025,
0x00004000,
0x08020080,
0x08020080,
0x00000000,
0x00810033,
0x00000000,
0x00001800,
0x08020001,
0x080200bc,
0x00ed8720,
0x00000000,
0x00790010,
0x00001800,
0x08020001,
0x08020080,
0x00000720,
0x00000000,
0x00000019,
0x00001800,
0x08020001,
0x0802c080,
0x00fb0720,
0x00000000,
0x00f6d010,
0x00004000,
0x08020000,
0x08020080,
0x00840420,
0x00000001,
0x00000001,
0x00004000,
0x08020080,
0x08020000,
0x00000000,
0x0004c00b,
0x00000000,
0x00004000,
0x08020080,
0x08020000,
0x00000000,
0x0000c00a,
0x00000000,
0x00004000,
0x08020080,
0x00300100,
0x00000000,
0x0060e010,
0x1a000000,
0x00004000,
0x08020100,
0x08000500,
0x00000000,
0x00600010,
0x1a000000,
0x00003800,
0x8802c102,
0x08020001,
0x006d86d8,
0x00000000,
0x00223030,
0x00003800,
0x08020003,
0x08020080,
0x00000220,
0x00000000,
0x00000039,
0x00003800,
0x0802c003,
0x08020080,
0x004406d8,
0x00000000,
0x00a21030,
0x00000800,
0x08020003,
0x08020080,
0x00000000,
0x0000000d,
0x0000003a,
0x00001000,
0x08020003,
0x08020080,
0x00000000,
0x0000400d,
0x0000003a,
0x00002000,
0x08020003,
0x08020080,
0x00000000,
0x0000800d,
0x0000003a,
0x00003800,
0x0802c003,
0x08020080,
0x00440221,
0x00000000,
0x00221030,
0x00004000,
0x08020080,
0x08020000,
0x00000000,
0x0002c018,
0x00000000,
0x00004000,
0x08020001,
0x08020080,
0x00840420,
0x00000001,
0x00000001,
0x00004000,
0x08020080,
0x08020000,
0x00000000,
0x0004c00b,
0x00000000,
0x00004000,
0x08020080,
0x08020000,
0x00000000,
0x0000c00a,
0x00000000,
0x00004000,
0x08020080,
0x08000400,
0x00000000,
0x0068c010,
0x20000000,
0x00004000,
0x10020080,
0x08034001,
0x00000000,
0x0068c010,
0x04000000,
0x00004000,
0x08040480,
0x0b020001,
0x00000000,
0x0028c010,
0x1c000000,
0x00004000,
0x08020080,
0x08020001,
0x00000000,
0x0000c017,
0x00000000,
0x00004000,
0x08020080,
0x0802c001,
0x00000000,
0x00618010,
0x5a000000,
0x00004000,
0x08020080,
0x08020001,
0x00000000,
0x1800c01c,
0x00000000,
0x00004000,
0x08020080,
0x08020001,
0x00000000,
0x0004c01a,
0x00000000,
0x00004000,
0x08040080,
0x08020001,
0x00000000,
0x0048c010,
0x20000000,
0x00004000,
0x08020080,
0x08020001,
0x00000000,
0x0000c019,
0x00000000,
0x00004000,
0x08040080,
0x08020001,
0x00000000,
0x0028c010,
0x20000000,
0x00004000,
0x08020080,
0x08020001,
0x00000000,
0x0000c018,
0x00000000,
0x00003800,
0x00220003,
0x08000480,
0x006da220,
0x00000000,
0x00222020,
0x00004000,
0x08020080,
0x08020003,
0x00000000,
0x00618030,
0x30000000,
0x00001800,
0x08020001,
0x080200bc,
0x00ed8720,
0x00000000,
0x00790010,
0x00001800,
0x08020001,
0x08020080,
0x00000720,
0x00000000,
0x00000019,
0x00001800,
0x08020001,
0x0802c080,
0x00fb0720,
0x00000000,
0x00f6d010,
0x00004000,
0x08020000,
0x08020080,
0x00840420,
0x00000001,
0x00000001,
0x00004000,
0x08020080,
0x08020000,
0x00000000,
0x0004c00b,
0x00000000,
0x00004000,
0x08020080,
0x08020000,
0x00000000,
0x0000c00a,
0x00000000,
0x00004000,
0x08020080,
0x00300100,
0x00000000,
0x0060e010,
0x1a000000,
0x00004000,
0x08020100,
0x08000500,
0x00000000,
0x00600010,
0x1a000000,
0x00003800,
0x8802c102,
0x08020001,
0x006d86d8,
0x00000000,
0x00223030,
0x00003800,
0x08020003,
0x08020080,
0x00000220,
0x00000000,
0x00000039,
0x00003800,
0x0802c003,
0x08020080,
0x004406d8,
0x00000000,
0x00a21030,
0x00000800,
0x08020003,
0x08020080,
0x00000000,
0x0000000d,
0x0000003a,
0x00001000,
0x08020003,
0x08020080,
0x00000000,
0x0000400d,
0x0000003a,
0x00002000,
0x08020003,
0x08020080,
0x00000000,
0x0000800d,
0x0000003a,
0x00003800,
0x0802c003,
0x08020080,
0x00440221,
0x00000000,
0x00221030,
0x00004000,
0x08020080,
0x08020000,
0x00000000,
0x0002c018,
0x00000000,
0x00004000,
0x08020001,
0x08020080,
0x00840420,
0x00000001,
0x00000001,
0x00004000,
0x08020080,
0x08020000,
0x00000000,
0x0004c00b,
0x00000000,
0x00004000,
0x08020080,
0x08020000,
0x00000000,
0x0000c00a,
0x00000000,
0x00004000,
0x08020080,
0x08000400,
0x00000000,
0x0068c010,
0x20000000,
0x00004000,
0x10020080,
0x08034001,
0x00000000,
0x0068c010,
0x04000000,
0x00004000,
0x08040480,
0x0b020001,
0x00000000,
0x0028c010,
0x1c000000,
0x00004000,
0x08020080,
0x08020001,
0x00000000,
0x0000c017,
0x00000000,
0x00004000,
0x08020080,
0x0802c001,
0x00000000,
0x00618010,
0x5a000000,
0x00004000,
0x08020080,
0x08020001,
0x00000000,
0x1800c01c,
0x00000000,
0x00004000,
0x08020080,
0x08020001,
0x00000000,
0x0004c01a,
0x00000000,
0x00004000,
0x08040080,
0x08020001,
0x00000000,
0x0048c010,
0x20000000,
0x00004000,
0x08020080,
0x08020001,
0x00000000,
0x0000c019,
0x00000000,
0x00004000,
0x08040080,
0x08020001,
0x00000000,
0x0028c010,
0x20000000,
0x00004000,
0x08020080,
0x08020001,
0x00000000,
0x0000c018,
0x00000000,
0x00003800,
0x00220003,
0x08000480,
0x006da220,
0x00000000,
0x00222020,
0x00004000,
0x08020080,
0x08020003,
0x00000000,
0x00618030,
0x30000000,
0x00001800,
0x08020001,
0x080200bc,
0x00ed8720,
0x00000000,
0x00790010,
0x00001800,
0x08020001,
0x08020080,
0x00000720,
0x00000000,
0x00000019,
0x00001800,
0x08020001,
0x0802c080,
0x00fb0720,
0x00000000,
0x00f6d010,
0x00004000,
0x08020000,
0x08020080,
0x00840420,
0x00000001,
0x00000001,
0x00004000,
0x08020080,
0x08020000,
0x00000000,
0x0004c00b,
0x00000000,
0x00004000,
0x08020080,
0x08020000,
0x00000000,
0x0000c00a,
0x00000000,
0x00004000,
0x08020080,
0x00300100,
0x00000000,
0x0060e010,
0x1a000000,
0x00004000,
0x08020100,
0x08000500,
0x00000000,
0x00600010,
0x1a000000,
0x00003800,
0x8802c102,
0x08020001,
0x006d86d8,
0x00000000,
0x00223030,
0x00003800,
0x08020003,
0x08020080,
0x00000220,
0x00000000,
0x00000039,
0x00003800,
0x0802c003,
0x08020080,
0x004406d8,
0x00000000,
0x00a21030,
0x00000800,
0x08020003,
0x08020080,
0x00000000,
0x0000000d,
0x0000003a,
0x00001000,
0x08020003,
0x08020080,
0x00000000,
0x0000400d,
0x0000003a,
0x00002000,
0x08020003,
0x08020080,
0x00000000,
0x0000800d,
0x0000003a,
0x00003800,
0x0802c003,
0x08020080,
0x00440221,
0x00000000,
0x00221030,
0x00004000,
0x08020080,
0x08020000,
0x00000000,
0x0002c018,
0x00000000,
0x00004000,
0x08020001,
0x08020080,
0x00840420,
0x00000001,
0x00000001,
0x00004000,
0x08020080,
0x08020000,
0x00000000,
0x0004c00b,
0x00000000,
0x00004000,
0x08020080,
0x08020000,
0x00000000,
0x0000c00a,
0x00000000,
0x00004000,
0x08020080,
0x08000400,
0x00000000,
0x0068c010,
0x20000000,
0x00004000,
0x10020080,
0x08034001,
0x00000000,
0x0068c010,
0x04000000,
0x00004000,
0x08040480,
0x0b020001,
0x00000000,
0x0028c010,
0x1c000000,
0x00004000,
0x08020080,
0x08020001,
0x00000000,
0x0000c017,
0x00000000,
0x00004000,
0x08020080,
0x0802c001,
0x00000000,
0x00618010,
0x5a000000,
0x00004000,
0x08020080,
0x08020001,
0x00000000,
0x1800c01c,
0x00000000,
0x00004000,
0x08020080,
0x08020001,
0x00000000,
0x0004c01a,
0x00000000,
0x00004000,
0x08040080,
0x08020001,
0x00000000,
0x0048c010,
0x20000000,
0x00004000,
0x08020080,
0x08020001,
0x00000000,
0x0000c019,
0x00000000,
0x00004000,
0x08040080,
0x08020001,
0x00000000,
0x0028c010,
0x20000000,
0x00004000,
0x08020080,
0x08020001,
0x00000000,
0x0000c018,
0x00000000,
0x00003800,
0x00220003,
0x08000480,
0x006da220,
0x00000000,
0x00222020,
0x00004000,
0x08020080,
0x08020003,
0x00000000,
0x00618030,
0x30000000,
0x00001800,
0x08020001,
0x080200bc,
0x00ed8720,
0x00000000,
0x00790010,
0x00001800,
0x08020001,
0x08020080,
0x00000720,
0x00000000,
0x00000019,
0x00001800,
0x08020001,
0x0802c080,
0x00fb0720,
0x00000000,
0x00f6d010,
0x00004000,
0x08020000,
0x08020080,
0x00840420,
0x00000001,
0x00000001,
0x00004000,
0x08020080,
0x08020000,
0x00000000,
0x0004c00b,
0x00000000,
0x00004000,
0x08020080,
0x08020000,
0x00000000,
0x0000c00a,
0x00000000,
0x00004000,
0x08020080,
0x00300100,
0x00000000,
0x0060e010,
0x1a000000,
0x00004000,
0x08020100,
0x08000500,
0x00000000,
0x00600010,
0x1a000000,
0x00003800,
0x8802c102,
0x08020001,
0x006d86d8,
0x00000000,
0x00223030,
0x00003800,
0x08020003,
0x08020080,
0x00000220,
0x00000000,
0x00000039,
0x00003800,
0x0802c003,
0x08020080,
0x004406d8,
0x00000000,
0x00a21030,
0x00000800,
0x08020003,
0x08020080,
0x00000000,
0x0000000d,
0x0000003a,
0x00001000,
0x08020003,
0x08020080,
0x00000000,
0x0000400d,
0x0000003a,
0x00002000,
0x08020003,
0x08020080,
0x00000000,
0x0000800d,
0x0000003a,
0x00003800,
0x0802c003,
0x08020080,
0x00440221,
0x00000000,
0x00221030,
0x00004000,
0x08020080,
0x08020000,
0x00000000,
0x0002c018,
0x00000000,
0x00004000,
0x08020001,
0x08020080,
0x00840420,
0x00000001,
0x00000001,
0x00004000,
0x08020080,
0x08020000,
0x00000000,
0x0004c00b,
0x00000000,
0x00004000,
0x08020080,
0x08020000,
0x00000000,
0x0000c00a,
0x00000000,
0x00004000,
0x08020080,
0x08000400,
0x00000000,
0x0068c010,
0x20000000,
0x00004000,
0x10020080,
0x08034001,
0x00000000,
0x0068c010,
0x04000000,
0x00004000,
0x08040480,
0x0b020001,
0x00000000,
0x0028c010,
0x1c000000,
0x00004000,
0x08020080,
0x08020001,
0x00000000,
0x0000c017,
0x00000000,
0x00004000,
0x08020080,
0x0802c001,
0x00000000,
0x00618010,
0x5a000000,
0x00004000,
0x08020080,
0x08020001,
0x00000000,
0x1800c01c,
0x00000000,
0x00004000,
0x08020080,
0x08020001,
0x00000000,
0x0004c01a,
0x00000000,
0x00004000,
0x08040080,
0x08020001,
0x00000000,
0x0048c010,
0x20000000,
0x00004000,
0x08020080,
0x08020001,
0x00000000,
0x0000c019,
0x00000000,
0x00004000,
0x08040080,
0x08020001,
0x00000000,
0x0028c010,
0x20000000,
0x00004000,
0x08020080,
0x08020001,
0x00000000,
0x0000c018,
0x00000000,
0x00003800,
0x00220003,
0x08000480,
0x006da220,
0x00000000,
0x00222020,
0x00004000,
0x08020080,
0x08020003,
0x00000000,
0x00618030,
0x30000000,
0x00078005,
0x08020002,
0x08020080,
0x00440220,
0x00c18003,
0x00000005,

View File

@ -0,0 +1,143 @@
-- uv = uv * 1.5;
src0.rgb = temp[1] , -- uv
src0.a = float(60) : -- 1.5
temp[1].rg = MAD src0.rg_ src0.aa_ src0.00_ ;
-- uv = fract(uv);
src0.rgb = temp[1] : -- uv
temp[1].rg = FRC src0.rg_ ;
-- uv = uv - 0.5;
src0.rgb = temp[1] ,
src1.a = float(48) : -- 0.5
temp[1].rg = MAD src0.rg_ src0.11_ -src1.aa_ ;
-- l = length(uv0);
src0.rgb = temp[0] : -- uv0
DP3 src0.rg0 src0.rg0 ,
temp[0].a = DP ;
src0.a = temp[0] :
temp[0].a = RSQ |src0.a| ;
src0.a = temp[0] :
temp[0].a = RCP src0.a ;
-- d = i * 0.4 + l;
src0.a = const[0] , -- 0.4
src1.a = temp[0] , -- l
src2.a = temp[3] : -- i
temp[1].a = MAD src2.a src0.a src1.a ;
-- d = time * 0.4 + d;
src0.a = const[0] , -- 0.4
src1.a = temp[1] , -- d
src0.rgb = const[0] : -- time (r)
temp[1].a = MAD src0.r src0.a src1.a ;
--------------------------------------------------------------------------------
-- start of 'palette' function
--------------------------------------------------------------------------------
-- v = d + (vec3(0.25, 0.40625, 0.5625) + 0.5)
src0.a = temp[1] , -- d
src0.rgb = const[2] , -- vec3(0.25, 0.40625, 0.5625)
src1.rgb = float(48) , -- 0.5
srcp.rgb = add : -- (vec3(0.25, 0.40625, 0.5625) + 0.5)
temp[3].rgb = MAD src0.111 src0.aaa srcp.rgb ;
-- v = frac(v)
src0.rgb = temp[3] : -- v
temp[3].rgb = FRC src0.rgb ;
-- v = v - 0.5
src0.rgb = temp[3] , -- v
src1.rgb = float(48) : -- 0.5
temp[3].rgb = MAD src0.111 src0.rgb -src1.rgb ;
-- v = cos(v)
src0.rgb = temp[3] : -- v
COS src0.r ,
temp[3].r = SOP ;
src0.rgb = temp[3] : -- v
COS src0.g ,
temp[3].g = SOP ;
src0.rgb = temp[3] : -- v
COS src0.b ,
temp[3].b = SOP ;
-- col = vec3(0.5, 0.5, 0.5) * v + vec3(0.5, 0.5, 0.5)
src0.rgb = temp[3] , -- v
src1.rgb = float(48) : -- 0.5
temp[3].rgb = MAD src1.rgb src0.rgb src1.rgb;
--------------------------------------------------------------------------------
-- end of 'palette' function
--------------------------------------------------------------------------------
-- d = ex2(-l);
src0.a = temp[0] : -- l
temp[1].a = EX2 -src0.a ;
-- l = length(uv);
src0.rgb = temp[1] : -- uv
DP3 src0.rg0 src0.rg0 ,
temp[0].a = DP ;
src0.a = temp[0] :
temp[0].a = RSQ |src0.a| ;
src0.a = temp[0] :
temp[0].a = RCP src0.a ;
-- d = l * d;
src0.a = temp[0] , -- l
src1.a = temp[1] : -- d
temp[1].a = MAD src0.a src1.a src0.0 ;
-- d = d * 8.0 + time;
src0.a = temp[1] , -- d
src1.a = float(80) , -- 8.0
src2.rgb = const[0] : -- time (r)
temp[1].a = MAD src0.a src1.a src2.r ;
-- d = 0.125 * sin(d); <OMOD>
-- d = d * 0.159154936671257019043 + 0.5; // 48
src0.a = temp[1] , -- d
src1.rgb = const[1] , -- I_PI_2 (g)
src2.a = float(48) : -- 0.5
temp[1].a = MAD src0.a src1.g src2.a ;
-- d = fract(d);
src0.a = temp[1] : -- d
temp[1].a = FRC src0.a ;
-- d = d - 0.5;
src0.a = temp[1] , -- d
src1.a = float(48) : -- 0.5
temp[1].a = MAD src0.1 src0.a -src1.a ;
-- d = 0.125 * sin(d * PI_2);
src0.a = temp[1] :
temp[1].a = 0.125 * SIN src0.a ;
-- d = 1.0 / abs(d);
src0.a = temp[1] : -- d
temp[1].a = RCP |src0.a|;
-- d = 0.01 * d;
src0.a = temp[1] , -- d
src1.rgb = const[0] : -- 0.01 (b)
temp[1].a = MAD src0.a src1.b src0.0 ;
-- d = pow(d, 1.2);
src0.a = temp[1] : -- d
temp[1].a = LN2 src0.a ;
src0.a = temp[1] ,
src1.rgb = const[0] : -- 1.2 (g)
temp[1].a = MAD src0.a src1.g src0.0 ;
src0.a = temp[1] :
temp[1].a = EX2 src0.a ;
-- final_color = col * d + final_color
src0.rgb = temp[3] , -- col
src1.a = temp[1] , -- d
src2.rgb = temp[2] : -- final_color
temp[2].rgb = MAD src0.rgb src1.aaa src2.rgb ;
-- i = i + 1
src0.a = temp[3] :
temp[3].a = MAD src0.1 src0.a src0.1 ;

View File

@ -0,0 +1,224 @@
0x00001800,
0x08020001,
0x080200bc,
0x00ed8720,
0x00000000,
0x00790010,
0x00001800,
0x08020001,
0x08020080,
0x00000720,
0x00000000,
0x00000019,
0x00001800,
0x08020001,
0x0802c080,
0x00fb0720,
0x00000000,
0x00f6d010,
0x00004000,
0x08020000,
0x08020080,
0x00840420,
0x00000001,
0x00000001,
0x00004000,
0x08020080,
0x08020000,
0x00000000,
0x0004c00b,
0x00000000,
0x00004000,
0x08020080,
0x08020000,
0x00000000,
0x0000c00a,
0x00000000,
0x00004000,
0x08020080,
0x00300100,
0x00000000,
0x0060e010,
0x1a000000,
0x00004000,
0x08020100,
0x08000500,
0x00000000,
0x00600010,
0x1a000000,
0x00003800,
0x8802c102,
0x08020001,
0x006d86d8,
0x00000000,
0x00223030,
0x00003800,
0x08020003,
0x08020080,
0x00000220,
0x00000000,
0x00000039,
0x00003800,
0x0802c003,
0x08020080,
0x004406d8,
0x00000000,
0x00a21030,
0x00000800,
0x08020003,
0x08020080,
0x00000000,
0x0000000d,
0x0000003a,
0x00001000,
0x08020003,
0x08020080,
0x00000000,
0x0000400d,
0x0000003a,
0x00002000,
0x08020003,
0x08020080,
0x00000000,
0x0000800d,
0x0000003a,
0x00003800,
0x0802c003,
0x08020080,
0x00440221,
0x00000000,
0x00221030,
0x00004000,
0x08020080,
0x08020000,
0x00000000,
0x0002c018,
0x00000000,
0x00004000,
0x08020001,
0x08020080,
0x00840420,
0x00000001,
0x00000001,
0x00004000,
0x08020080,
0x08020000,
0x00000000,
0x0004c00b,
0x00000000,
0x00004000,
0x08020080,
0x08020000,
0x00000000,
0x0000c00a,
0x00000000,
0x00004000,
0x08020080,
0x08000400,
0x00000000,
0x0068c010,
0x20000000,
0x00004000,
0x10020080,
0x08034001,
0x00000000,
0x0068c010,
0x04000000,
0x00004000,
0x08040480,
0x0b020001,
0x00000000,
0x0028c010,
0x1c000000,
0x00004000,
0x08020080,
0x08020001,
0x00000000,
0x0000c017,
0x00000000,
0x00004000,
0x08020080,
0x0802c001,
0x00000000,
0x00618010,
0x5a000000,
0x00004000,
0x08020080,
0x08020001,
0x00000000,
0x1800c01c,
0x00000000,
0x00004000,
0x08020080,
0x08020001,
0x00000000,
0x0004c01a,
0x00000000,
0x00004000,
0x08040080,
0x08020001,
0x00000000,
0x0048c010,
0x20000000,
0x00004000,
0x08020080,
0x08020001,
0x00000000,
0x0000c019,
0x00000000,
0x00004000,
0x08040080,
0x08020001,
0x00000000,
0x0028c010,
0x20000000,
0x00004000,
0x08020080,
0x08020001,
0x00000000,
0x0000c018,
0x00000000,
0x00003800,
0x00220003,
0x08000480,
0x006da220,
0x00000000,
0x00222020,
0x00004000,
0x08020080,
0x08020003,
0x00000000,
0x00618030,
0x30000000,

View File

@ -646,7 +646,7 @@ int main()
assert(texturebuffer_ptr != MAP_FAILED);
// copy texture
void * texture_buf = read_file("../texture/butterfly_1024x1024_argb8888.data");
void * texture_buf = read_file("../texture/butterfly_1024x1024_rgba8888.data");
assert(texture_buf != NULL);
for (int i = 0; i < texture_size / 4; i++) {

View File

@ -663,7 +663,7 @@ int main()
assert(texturebuffer_ptr != MAP_FAILED);
// copy texture
void * texture_buf = read_file("../texture/butterfly_128x128_argb8888.data");
void * texture_buf = read_file("../texture/butterfly_128x128_rgba8888.data");
assert(texture_buf != NULL);
for (int i = 0; i < texture_size / 4; i++) {

View File

@ -707,7 +707,7 @@ int main()
intermediate_handle[1] = create_colorbuffer(fd, texture_size, NULL);
{
void * texture_buf = read_file("../texture/butterfly_1024x1024_argb8888.data");
void * texture_buf = read_file("../texture/butterfly_1024x1024_rgba8888.data");
assert(texture_buf != NULL);
for (int i = 0; i < texture_size / 4; i++) {
((uint32_t*)texturebuffer_ptr)[i] = ((uint32_t*)texture_buf)[i];

View File

@ -765,7 +765,7 @@ int main()
assert(texturebuffer_ptr != MAP_FAILED);
// copy texture
void * texture_buf = read_file("../texture/butterfly_1024x1024_argb8888.data");
void * texture_buf = read_file("../texture/butterfly_1024x1024_rgba8888.data");
assert(texture_buf != NULL);
for (int i = 0; i < texture_size / 4; i++) {

View File

@ -935,7 +935,7 @@ int main()
assert(texturebuffer_ptr != MAP_FAILED);
// copy texture
void * texture_buf = read_file("../texture/butterfly_1024x1024_argb8888.data");
void * texture_buf = read_file("../texture/butterfly_1024x1024_rgba8888.data");
assert(texture_buf != NULL);
for (int i = 0; i < texture_size / 4; i++) {

View File

@ -991,7 +991,7 @@ int main()
assert(texturebuffer_ptr != MAP_FAILED);
// copy texture
void * texture_buf = read_file("../texture/butterfly_1024x1024_argb8888.data");
void * texture_buf = read_file("../texture/butterfly_1024x1024_rgba8888.data");
assert(texture_buf != NULL);
for (int i = 0; i < texture_size / 4; i++) {

View File

@ -981,7 +981,7 @@ int main()
assert(texturebuffer_ptr != MAP_FAILED);
// copy texture
void * texture_buf = read_file("../texture/butterfly_1024x1024_argb8888.data");
void * texture_buf = read_file("../texture/butterfly_1024x1024_rgba8888.data");
assert(texture_buf != NULL);
for (int i = 0; i < texture_size / 4; i++) {

View File

@ -1040,7 +1040,7 @@ int main()
assert(texturebuffer_ptr != MAP_FAILED);
// copy texture
void * texture_buf = read_file("../texture/butterfly_1024x1024_argb8888.data");
void * texture_buf = read_file("../texture/butterfly_1024x1024_rgba8888.data");
assert(texture_buf != NULL);
for (int i = 0; i < texture_size / 4; i++) {

View File

@ -981,7 +981,7 @@ int main()
assert(texturebuffer_ptr != MAP_FAILED);
// copy texture
void * texture_buf = read_file("../texture/butterfly_1024x1024_argb8888.data");
void * texture_buf = read_file("../texture/butterfly_1024x1024_rgba8888.data");
assert(texture_buf != NULL);
for (int i = 0; i < texture_size / 4; i++) {

View File

@ -990,7 +990,7 @@ int main()
assert(texturebuffer_ptr != MAP_FAILED);
// copy texture
void * texture_buf = read_file("../texture/butterfly_1024x1024_argb8888.data");
void * texture_buf = read_file("../texture/butterfly_1024x1024_rgba8888.data");
assert(texture_buf != NULL);
for (int i = 0; i < texture_size / 4; i++) {

View File

@ -996,7 +996,7 @@ int main()
assert(texturebuffer_ptr != MAP_FAILED);
// copy texture
void * texture_buf = read_file("../texture/butterfly_1024x1024_argb8888.data");
void * texture_buf = read_file("../texture/butterfly_1024x1024_rgba8888.data");
assert(texture_buf != NULL);
for (int i = 0; i < texture_size / 4; i++) {

View File

@ -699,10 +699,10 @@ int main()
const int texture_size = 1024 * 1024 * 4;
texturebuffer_handle[0] = load_texture(fd,
"../texture/butterfly_1024x1024_argb8888.data",
"../texture/butterfly_1024x1024_rgba8888.data",
texture_size);
texturebuffer_handle[1] = load_texture(fd,
"../texture/bird_1024x1024_argb8888.data",
"../texture/bird_1024x1024_rgba8888.data",
texture_size);
{ // clear colorbuffer

View File

@ -972,7 +972,7 @@ int main()
assert(texturebuffer_ptr != MAP_FAILED);
// copy texture
void * texture_buf = read_file("../texture/butterfly_1024x1024_argb8888.data");
void * texture_buf = read_file("../texture/butterfly_1024x1024_rgba8888.data");
assert(texture_buf != NULL);
for (int i = 0; i < texture_size / 4; i++) {

48
model/cube_indexed.h Normal file
View File

@ -0,0 +1,48 @@
#pragma once
const int cube_Cube_triangles[] = {
0, 1, 2,
3, 4, 5,
6, 7, 8,
9, 10, 11,
12, 13, 14,
15, 16, 17,
0, 18, 1,
3, 19, 4,
6, 20, 7,
9, 21, 10,
12, 22, 13,
15, 23, 16,
};
const int cube_Cube_triangles_length = (sizeof (cube_Cube_triangles)) / (sizeof (cube_Cube_triangles[0]));
const float cube_vertices[] = {
-1.000000f, 1.000000f, -1.000000f, 0.875000f, 0.500000f, -0.0000f, 1.0000f, -0.0000f,
1.000000f, 1.000000f, 1.000000f, 0.625000f, 0.750000f, -0.0000f, 1.0000f, -0.0000f,
1.000000f, 1.000000f, -1.000000f, 0.625000f, 0.500000f, -0.0000f, 1.0000f, -0.0000f,
1.000000f, 1.000000f, 1.000000f, 0.625000f, 0.750000f, -0.0000f, -0.0000f, 1.0000f,
-1.000000f, -1.000000f, 1.000000f, 0.375000f, 1.000000f, -0.0000f, -0.0000f, 1.0000f,
1.000000f, -1.000000f, 1.000000f, 0.375000f, 0.750000f, -0.0000f, -0.0000f, 1.0000f,
-1.000000f, 1.000000f, 1.000000f, 0.625000f, 0.000000f, -1.0000f, -0.0000f, -0.0000f,
-1.000000f, -1.000000f, -1.000000f, 0.375000f, 0.250000f, -1.0000f, -0.0000f, -0.0000f,
-1.000000f, -1.000000f, 1.000000f, 0.375000f, 0.000000f, -1.0000f, -0.0000f, -0.0000f,
1.000000f, -1.000000f, -1.000000f, 0.375000f, 0.500000f, -0.0000f, -1.0000f, -0.0000f,
-1.000000f, -1.000000f, 1.000000f, 0.125000f, 0.750000f, -0.0000f, -1.0000f, -0.0000f,
-1.000000f, -1.000000f, -1.000000f, 0.125000f, 0.500000f, -0.0000f, -1.0000f, -0.0000f,
1.000000f, 1.000000f, -1.000000f, 0.625000f, 0.500000f, 1.0000f, -0.0000f, -0.0000f,
1.000000f, -1.000000f, 1.000000f, 0.375000f, 0.750000f, 1.0000f, -0.0000f, -0.0000f,
1.000000f, -1.000000f, -1.000000f, 0.375000f, 0.500000f, 1.0000f, -0.0000f, -0.0000f,
-1.000000f, 1.000000f, -1.000000f, 0.625000f, 0.250000f, -0.0000f, -0.0000f, -1.0000f,
1.000000f, -1.000000f, -1.000000f, 0.375000f, 0.500000f, -0.0000f, -0.0000f, -1.0000f,
-1.000000f, -1.000000f, -1.000000f, 0.375000f, 0.250000f, -0.0000f, -0.0000f, -1.0000f,
-1.000000f, 1.000000f, 1.000000f, 0.875000f, 0.750000f, -0.0000f, 1.0000f, -0.0000f,
-1.000000f, 1.000000f, 1.000000f, 0.625000f, 1.000000f, -0.0000f, -0.0000f, 1.0000f,
-1.000000f, 1.000000f, -1.000000f, 0.625000f, 0.250000f, -1.0000f, -0.0000f, -0.0000f,
1.000000f, -1.000000f, 1.000000f, 0.375000f, 0.750000f, -0.0000f, -1.0000f, -0.0000f,
1.000000f, 1.000000f, 1.000000f, 0.625000f, 0.750000f, 1.0000f, -0.0000f, -0.0000f,
1.000000f, 1.000000f, -1.000000f, 0.625000f, 0.500000f, -0.0000f, -0.0000f, -1.0000f,
};
const int cube_vertices_length = (sizeof (cube_vertices)) / (sizeof (cube_vertices[0]));

329
model/cubesphere_indexed.h Normal file
View File

@ -0,0 +1,329 @@
#pragma once
const int cubesphere_Cube_triangles[] = {
0, 1, 2,
3, 1, 4,
1, 5, 2,
1, 6, 7,
8, 0, 9,
10, 4, 0,
11, 4, 12,
13, 3, 11,
14, 15, 3,
15, 16, 6,
6, 17, 18,
7, 18, 19,
7, 20, 5,
5, 21, 22,
2, 22, 23,
9, 2, 23,
24, 25, 26,
27, 25, 28,
25, 29, 26,
25, 30, 31,
32, 24, 33,
34, 28, 24,
35, 28, 36,
21, 27, 35,
37, 38, 27,
38, 39, 30,
30, 40, 41,
31, 41, 42,
31, 43, 29,
29, 44, 45,
26, 45, 46,
33, 26, 46,
47, 48, 49,
50, 48, 51,
48, 52, 49,
48, 53, 54,
55, 47, 56,
57, 51, 47,
58, 51, 59,
60, 50, 58,
61, 62, 50,
62, 63, 53,
53, 64, 65,
54, 65, 66,
54, 67, 52,
52, 68, 69,
49, 69, 70,
56, 49, 70,
71, 72, 73,
74, 72, 75,
72, 76, 73,
72, 77, 78,
79, 71, 80,
81, 75, 71,
82, 75, 83,
84, 74, 82,
85, 86, 74,
86, 87, 77,
77, 32, 88,
78, 88, 89,
78, 90, 76,
76, 91, 92,
73, 92, 93,
80, 73, 93,
94, 95, 96,
97, 95, 98,
95, 99, 96,
95, 100, 101,
84, 94, 85,
102, 98, 94,
103, 98, 104,
8, 97, 103,
9, 105, 97,
105, 22, 100,
100, 21, 35,
101, 35, 36,
101, 34, 99,
99, 32, 87,
96, 87, 106,
85, 96, 106,
107, 108, 109,
110, 108, 111,
108, 112, 109,
108, 113, 114,
68, 107, 115,
67, 111, 107,
65, 111, 66,
64, 110, 65,
116, 117, 110,
117, 118, 113,
113, 8, 103,
114, 103, 104,
114, 102, 112,
112, 84, 119,
109, 119, 120,
115, 109, 120,
0, 4, 1,
3, 15, 1,
1, 7, 5,
1, 15, 6,
8, 10, 0,
10, 12, 4,
11, 3, 4,
13, 14, 3,
14, 121, 15,
15, 121, 16,
6, 16, 17,
7, 6, 18,
7, 19, 20,
5, 20, 21,
2, 5, 22,
9, 0, 2,
24, 28, 25,
27, 38, 25,
25, 31, 29,
25, 38, 30,
32, 34, 24,
34, 36, 28,
35, 27, 28,
21, 37, 27,
37, 122, 38,
38, 122, 39,
30, 39, 40,
31, 30, 41,
31, 42, 43,
29, 43, 44,
26, 29, 45,
33, 24, 26,
47, 51, 48,
50, 62, 48,
48, 54, 52,
48, 62, 53,
55, 57, 47,
57, 59, 51,
58, 50, 51,
60, 61, 50,
61, 123, 62,
62, 123, 63,
53, 63, 64,
54, 53, 65,
54, 66, 67,
52, 67, 68,
49, 52, 69,
56, 47, 49,
71, 75, 72,
74, 86, 72,
72, 78, 76,
72, 86, 77,
79, 81, 71,
81, 83, 75,
82, 74, 75,
84, 85, 74,
85, 106, 86,
86, 106, 87,
77, 87, 32,
78, 77, 88,
78, 89, 90,
76, 90, 91,
73, 76, 92,
80, 71, 73,
94, 98, 95,
97, 105, 95,
95, 101, 99,
95, 105, 100,
84, 102, 94,
102, 104, 98,
103, 97, 98,
8, 9, 97,
9, 23, 105,
105, 23, 22,
100, 22, 21,
101, 100, 35,
101, 36, 34,
99, 34, 32,
96, 99, 87,
85, 94, 96,
107, 111, 108,
110, 117, 108,
108, 114, 112,
108, 117, 113,
68, 67, 107,
67, 66, 111,
65, 110, 111,
64, 116, 110,
116, 124, 117,
117, 124, 118,
113, 118, 8,
114, 113, 103,
114, 104, 102,
112, 102, 84,
109, 112, 119,
115, 107, 109,
};
const int cubesphere_Cube_triangles_length = (sizeof (cubesphere_Cube_triangles)) / (sizeof (cubesphere_Cube_triangles[0]));
const float cubesphere_vertices[] = {
0.316157f, 0.728990f, -0.316157f, 0.687500f, 0.562500f, 0.3362f, 0.8797f, -0.3362f,
-0.000000f, 0.839506f, -0.000000f, 0.750000f, 0.625000f, -0.0000f, 1.0000f, -0.0000f,
0.333140f, 0.781829f, 0.000000f, 0.687500f, 0.625000f, 0.3553f, 0.9348f, -0.0000f,
-0.316157f, 0.728990f, -0.316157f, 0.812500f, 0.562500f, -0.3362f, 0.8797f, -0.3362f,
0.000000f, 0.781829f, -0.333140f, 0.750000f, 0.562500f, -0.0000f, 0.9348f, -0.3553f,
0.316157f, 0.728990f, 0.316157f, 0.687500f, 0.687500f, 0.3362f, 0.8797f, 0.3362f,
-0.316157f, 0.728990f, 0.316157f, 0.812500f, 0.687500f, -0.3362f, 0.8797f, 0.3362f,
-0.000000f, 0.781829f, 0.333140f, 0.750000f, 0.687500f, -0.0000f, 0.9348f, 0.3553f,
0.500000f, 0.500000f, -0.500000f, 0.625000f, 0.500000f, 0.5774f, 0.5774f, -0.5774f,
0.572933f, 0.572933f, -0.296650f, 0.625000f, 0.562500f, 0.6737f, 0.6737f, -0.3038f,
0.296650f, 0.572933f, -0.572933f, 0.687500f, 0.500000f, 0.3038f, 0.6737f, -0.6737f,
-0.296650f, 0.572933f, -0.572933f, 0.812500f, 0.500000f, -0.3038f, 0.6737f, -0.6737f,
-0.000000f, 0.609568f, -0.609568f, 0.750000f, 0.500000f, -0.0000f, 0.7071f, -0.7071f,
-0.500000f, 0.500000f, -0.500000f, 0.875000f, 0.500000f, -0.5774f, 0.5774f, -0.5774f,
-0.572933f, 0.572933f, -0.296650f, 0.875000f, 0.562500f, -0.6737f, 0.6737f, -0.3038f,
-0.333140f, 0.781829f, -0.000000f, 0.812500f, 0.625000f, -0.3553f, 0.9348f, -0.0000f,
-0.572933f, 0.572933f, 0.296650f, 0.875000f, 0.687500f, -0.6737f, 0.6737f, 0.3038f,
-0.500000f, 0.500000f, 0.500000f, 0.875000f, 0.750000f, -0.5774f, 0.5774f, 0.5774f,
-0.296650f, 0.572933f, 0.572933f, 0.812500f, 0.750000f, -0.3038f, 0.6737f, 0.6737f,
0.000000f, 0.609568f, 0.609568f, 0.750000f, 0.750000f, -0.0000f, 0.7071f, 0.7071f,
0.296650f, 0.572933f, 0.572933f, 0.687500f, 0.750000f, 0.3038f, 0.6737f, 0.6737f,
0.500000f, 0.500000f, 0.500000f, 0.625000f, 0.750000f, 0.5774f, 0.5774f, 0.5774f,
0.572933f, 0.572933f, 0.296650f, 0.625000f, 0.687500f, 0.6737f, 0.6737f, 0.3038f,
0.609568f, 0.609568f, -0.000000f, 0.625000f, 0.625000f, 0.7071f, 0.7071f, -0.0000f,
0.316157f, -0.316157f, 0.728990f, 0.437500f, 0.812500f, 0.3362f, -0.3362f, 0.8797f,
0.000000f, 0.000000f, 0.839506f, 0.500000f, 0.875000f, -0.0000f, -0.0000f, 1.0000f,
-0.000000f, -0.333140f, 0.781829f, 0.437500f, 0.875000f, -0.0000f, -0.3553f, 0.9348f,
0.316157f, 0.316157f, 0.728990f, 0.562500f, 0.812500f, 0.3362f, 0.3362f, 0.8797f,
0.333140f, -0.000000f, 0.781829f, 0.500000f, 0.812500f, 0.3553f, -0.0000f, 0.9348f,
-0.316157f, -0.316157f, 0.728990f, 0.437500f, 0.937500f, -0.3362f, -0.3362f, 0.8797f,
-0.316157f, 0.316157f, 0.728990f, 0.562500f, 0.937500f, -0.3362f, 0.3362f, 0.8797f,
-0.333140f, 0.000000f, 0.781829f, 0.500000f, 0.937500f, -0.3553f, -0.0000f, 0.9348f,
0.500000f, -0.500000f, 0.500000f, 0.375000f, 0.750000f, 0.5774f, -0.5774f, 0.5774f,
0.296650f, -0.572933f, 0.572933f, 0.375000f, 0.812500f, 0.3038f, -0.6737f, 0.6737f,
0.572933f, -0.296650f, 0.572933f, 0.437500f, 0.750000f, 0.6737f, -0.3038f, 0.6737f,
0.572933f, 0.296650f, 0.572933f, 0.562500f, 0.750000f, 0.6737f, 0.3038f, 0.6737f,
0.609568f, 0.000000f, 0.609568f, 0.500000f, 0.750000f, 0.7071f, -0.0000f, 0.7071f,
0.296650f, 0.572933f, 0.572933f, 0.625000f, 0.812500f, 0.3038f, 0.6737f, 0.6737f,
0.000000f, 0.333140f, 0.781829f, 0.562500f, 0.875000f, -0.0000f, 0.3553f, 0.9348f,
-0.296650f, 0.572933f, 0.572933f, 0.625000f, 0.937500f, -0.3038f, 0.6737f, 0.6737f,
-0.500000f, 0.500000f, 0.500000f, 0.625000f, 1.000000f, -0.5774f, 0.5774f, 0.5774f,
-0.572933f, 0.296650f, 0.572933f, 0.562500f, 1.000000f, -0.6737f, 0.3038f, 0.6737f,
-0.609568f, -0.000000f, 0.609568f, 0.500000f, 1.000000f, -0.7071f, -0.0000f, 0.7071f,
-0.572933f, -0.296650f, 0.572933f, 0.437500f, 1.000000f, -0.6737f, -0.3038f, 0.6737f,
-0.500000f, -0.500000f, 0.500000f, 0.375000f, 1.000000f, -0.5774f, -0.5774f, 0.5774f,
-0.296650f, -0.572933f, 0.572933f, 0.375000f, 0.937500f, -0.3038f, -0.6737f, 0.6737f,
-0.000000f, -0.609568f, 0.609568f, 0.375000f, 0.875000f, -0.0000f, -0.7071f, 0.7071f,
-0.728990f, -0.316157f, 0.316157f, 0.437500f, 0.062500f, -0.8797f, -0.3362f, 0.3362f,
-0.839506f, 0.000000f, -0.000000f, 0.500000f, 0.125000f, -1.0000f, -0.0000f, -0.0000f,
-0.781829f, -0.333140f, -0.000000f, 0.437500f, 0.125000f, -0.9348f, -0.3553f, -0.0000f,
-0.728990f, 0.316157f, 0.316157f, 0.562500f, 0.062500f, -0.8797f, 0.3362f, 0.3362f,
-0.781829f, -0.000000f, 0.333140f, 0.500000f, 0.062500f, -0.9348f, -0.0000f, 0.3553f,
-0.728990f, -0.316157f, -0.316157f, 0.437500f, 0.187500f, -0.8797f, -0.3362f, -0.3362f,
-0.728990f, 0.316157f, -0.316157f, 0.562500f, 0.187500f, -0.8797f, 0.3362f, -0.3362f,
-0.781829f, 0.000000f, -0.333140f, 0.500000f, 0.187500f, -0.9348f, -0.0000f, -0.3553f,
-0.500000f, -0.500000f, 0.500000f, 0.375000f, 0.000000f, -0.5774f, -0.5774f, 0.5774f,
-0.572933f, -0.572933f, 0.296650f, 0.375000f, 0.062500f, -0.6737f, -0.6737f, 0.3038f,
-0.572933f, -0.296650f, 0.572933f, 0.437500f, 0.000000f, -0.6737f, -0.3038f, 0.6737f,
-0.572933f, 0.296650f, 0.572933f, 0.562500f, 0.000000f, -0.6737f, 0.3038f, 0.6737f,
-0.609568f, -0.000000f, 0.609568f, 0.500000f, 0.000000f, -0.7071f, -0.0000f, 0.7071f,
-0.500000f, 0.500000f, 0.500000f, 0.625000f, 0.000000f, -0.5774f, 0.5774f, 0.5774f,
-0.572933f, 0.572933f, 0.296650f, 0.625000f, 0.062500f, -0.6737f, 0.6737f, 0.3038f,
-0.781829f, 0.333140f, -0.000000f, 0.562500f, 0.125000f, -0.9348f, 0.3553f, -0.0000f,
-0.572933f, 0.572933f, -0.296650f, 0.625000f, 0.187500f, -0.6737f, 0.6737f, -0.3038f,
-0.500000f, 0.500000f, -0.500000f, 0.625000f, 0.250000f, -0.5774f, 0.5774f, -0.5774f,
-0.572933f, 0.296650f, -0.572933f, 0.562500f, 0.250000f, -0.6737f, 0.3038f, -0.6737f,
-0.609568f, -0.000000f, -0.609568f, 0.500000f, 0.250000f, -0.7071f, -0.0000f, -0.7071f,
-0.572933f, -0.296650f, -0.572933f, 0.437500f, 0.250000f, -0.6737f, -0.3038f, -0.6737f,
-0.500000f, -0.500000f, -0.500000f, 0.375000f, 0.250000f, -0.5774f, -0.5774f, -0.5774f,
-0.572933f, -0.572933f, -0.296650f, 0.375000f, 0.187500f, -0.6737f, -0.6737f, -0.3038f,
-0.609568f, -0.609568f, -0.000000f, 0.375000f, 0.125000f, -0.7071f, -0.7071f, -0.0000f,
-0.316157f, -0.728990f, -0.316157f, 0.187500f, 0.562500f, -0.3362f, -0.8797f, -0.3362f,
-0.000000f, -0.839506f, 0.000000f, 0.250000f, 0.625000f, -0.0000f, -1.0000f, -0.0000f,
-0.333140f, -0.781829f, 0.000000f, 0.187500f, 0.625000f, -0.3553f, -0.9348f, -0.0000f,
0.316157f, -0.728990f, -0.316157f, 0.312500f, 0.562500f, 0.3362f, -0.8797f, -0.3362f,
-0.000000f, -0.781829f, -0.333140f, 0.250000f, 0.562500f, -0.0000f, -0.9348f, -0.3553f,
-0.316157f, -0.728990f, 0.316157f, 0.187500f, 0.687500f, -0.3362f, -0.8797f, 0.3362f,
0.316157f, -0.728990f, 0.316157f, 0.312500f, 0.687500f, 0.3362f, -0.8797f, 0.3362f,
0.000000f, -0.781829f, 0.333140f, 0.250000f, 0.687500f, -0.0000f, -0.9348f, 0.3553f,
-0.500000f, -0.500000f, -0.500000f, 0.125000f, 0.500000f, -0.5774f, -0.5774f, -0.5774f,
-0.572933f, -0.572933f, -0.296650f, 0.125000f, 0.562500f, -0.6737f, -0.6737f, -0.3038f,
-0.296650f, -0.572933f, -0.572933f, 0.187500f, 0.500000f, -0.3038f, -0.6737f, -0.6737f,
0.296650f, -0.572933f, -0.572933f, 0.312500f, 0.500000f, 0.3038f, -0.6737f, -0.6737f,
0.000000f, -0.609568f, -0.609568f, 0.250000f, 0.500000f, -0.0000f, -0.7071f, -0.7071f,
0.500000f, -0.500000f, -0.500000f, 0.375000f, 0.500000f, 0.5774f, -0.5774f, -0.5774f,
0.572933f, -0.572933f, -0.296650f, 0.375000f, 0.562500f, 0.6737f, -0.6737f, -0.3038f,
0.333140f, -0.781829f, -0.000000f, 0.312500f, 0.625000f, 0.3553f, -0.9348f, -0.0000f,
0.572933f, -0.572933f, 0.296650f, 0.375000f, 0.687500f, 0.6737f, -0.6737f, 0.3038f,
0.296650f, -0.572933f, 0.572933f, 0.312500f, 0.750000f, 0.3038f, -0.6737f, 0.6737f,
-0.000000f, -0.609568f, 0.609568f, 0.250000f, 0.750000f, -0.0000f, -0.7071f, 0.7071f,
-0.296650f, -0.572933f, 0.572933f, 0.187500f, 0.750000f, -0.3038f, -0.6737f, 0.6737f,
-0.500000f, -0.500000f, 0.500000f, 0.125000f, 0.750000f, -0.5774f, -0.5774f, 0.5774f,
-0.572933f, -0.572933f, 0.296650f, 0.125000f, 0.687500f, -0.6737f, -0.6737f, 0.3038f,
-0.609568f, -0.609568f, -0.000000f, 0.125000f, 0.625000f, -0.7071f, -0.7071f, -0.0000f,
0.728990f, -0.316157f, -0.316157f, 0.437500f, 0.562500f, 0.8797f, -0.3362f, -0.3362f,
0.839506f, -0.000000f, -0.000000f, 0.500000f, 0.625000f, 1.0000f, -0.0000f, -0.0000f,
0.781829f, -0.333140f, 0.000000f, 0.437500f, 0.625000f, 0.9348f, -0.3553f, -0.0000f,
0.728990f, 0.316157f, -0.316157f, 0.562500f, 0.562500f, 0.8797f, 0.3362f, -0.3362f,
0.781829f, -0.000000f, -0.333140f, 0.500000f, 0.562500f, 0.9348f, -0.0000f, -0.3553f,
0.728990f, -0.316157f, 0.316157f, 0.437500f, 0.687500f, 0.8797f, -0.3362f, 0.3362f,
0.728990f, 0.316157f, 0.316157f, 0.562500f, 0.687500f, 0.8797f, 0.3362f, 0.3362f,
0.781829f, 0.000000f, 0.333140f, 0.500000f, 0.687500f, 0.9348f, -0.0000f, 0.3553f,
0.572933f, -0.296650f, -0.572933f, 0.437500f, 0.500000f, 0.6737f, -0.3038f, -0.6737f,
0.572933f, 0.296650f, -0.572933f, 0.562500f, 0.500000f, 0.6737f, 0.3038f, -0.6737f,
0.609568f, 0.000000f, -0.609568f, 0.500000f, 0.500000f, 0.7071f, -0.0000f, -0.7071f,
0.781829f, 0.333140f, -0.000000f, 0.562500f, 0.625000f, 0.9348f, 0.3553f, -0.0000f,
0.609568f, -0.609568f, -0.000000f, 0.375000f, 0.625000f, 0.7071f, -0.7071f, -0.0000f,
-0.316157f, -0.316157f, -0.728990f, 0.437500f, 0.312500f, -0.3362f, -0.3362f, -0.8797f,
0.000000f, 0.000000f, -0.839506f, 0.500000f, 0.375000f, -0.0000f, -0.0000f, -1.0000f,
0.000000f, -0.333140f, -0.781829f, 0.437500f, 0.375000f, -0.0000f, -0.3553f, -0.9348f,
-0.316157f, 0.316157f, -0.728990f, 0.562500f, 0.312500f, -0.3362f, 0.3362f, -0.8797f,
-0.333140f, -0.000000f, -0.781829f, 0.500000f, 0.312500f, -0.3553f, -0.0000f, -0.9348f,
0.316157f, -0.316157f, -0.728990f, 0.437500f, 0.437500f, 0.3362f, -0.3362f, -0.8797f,
0.316157f, 0.316157f, -0.728990f, 0.562500f, 0.437500f, 0.3362f, 0.3362f, -0.8797f,
0.333140f, 0.000000f, -0.781829f, 0.500000f, 0.437500f, 0.3553f, -0.0000f, -0.9348f,
-0.296650f, -0.572933f, -0.572933f, 0.375000f, 0.312500f, -0.3038f, -0.6737f, -0.6737f,
-0.296650f, 0.572933f, -0.572933f, 0.625000f, 0.312500f, -0.3038f, 0.6737f, -0.6737f,
-0.000000f, 0.333140f, -0.781829f, 0.562500f, 0.375000f, -0.0000f, 0.3553f, -0.9348f,
0.296650f, 0.572933f, -0.572933f, 0.625000f, 0.437500f, 0.3038f, 0.6737f, -0.6737f,
0.296650f, -0.572933f, -0.572933f, 0.375000f, 0.437500f, 0.3038f, -0.6737f, -0.6737f,
0.000000f, -0.609568f, -0.609568f, 0.375000f, 0.375000f, -0.0000f, -0.7071f, -0.7071f,
-0.609568f, 0.609568f, -0.000000f, 0.875000f, 0.625000f, -0.7071f, 0.7071f, -0.0000f,
0.000000f, 0.609568f, 0.609568f, 0.625000f, 0.875000f, -0.0000f, 0.7071f, 0.7071f,
-0.609568f, 0.609568f, -0.000000f, 0.625000f, 0.125000f, -0.7071f, 0.7071f, -0.0000f,
-0.000000f, 0.609568f, -0.609568f, 0.625000f, 0.375000f, -0.0000f, 0.7071f, -0.7071f,
};
const int cubesphere_vertices_length = (sizeof (cubesphere_vertices)) / (sizeof (cubesphere_vertices[0]));

50
model/plane.h Normal file
View File

@ -0,0 +1,50 @@
#pragma once
const vec3 plane_position[] = {
{1.000000f, 1.000000f, -0.000000f},
{1.000000f, -1.000000f, -0.000000f},
{-1.000000f, 1.000000f, 0.000000f},
{-1.000000f, -1.000000f, 0.000000f},
};
const vec2 plane_texture[] = {
{1.000000f, 0.000000f},
{0.000000f, 1.000000f},
{0.000000f, 0.000000f},
{1.000000f, 1.000000f},
};
const vec3 plane_normal[] = {
{-0.0000f, -0.0000f, -1.0000f},
};
const triangle_t plane_Plane_triangle[] = {
{
{1, 0, 0},
{2, 1, 0},
{0, 2, 0},
},
{
{1, 0, 0},
{3, 3, 0},
{2, 1, 0},
},
};
const object plane_Plane = {
.triangle = &plane_Plane_triangle[0],
.triangle_count = 2,
};
const object * plane_object[] = {
&plane_Plane,
};
const model plane_model = {
.position = plane_position,
.texture = plane_texture,
.normal = plane_normal,
.object = plane_object,
.object_count = 1
};

View File

@ -21,6 +21,41 @@ static inline void wreg(void * rmmio, uint32_t offset, uint32_t value)
asm volatile ("" ::: "memory");
}
static inline void wreg_slow(void * rmmio, uint32_t offset, uint32_t value)
{
#define MM_INDEX 0x0
#define MM_DATA 0x4
wreg(rmmio, MM_INDEX, offset);
wreg(rmmio, MM_DATA, value);
}
static inline uint32_t rreg_slow(void * rmmio, uint32_t offset)
{
wreg(rmmio, MM_INDEX, offset);
uint32_t value = rreg(rmmio, MM_DATA);
return value;
}
struct name_address {
const char * name;
const int address;
};
const struct name_address display_addresses[] = {
#include "../regs/display_registers.inc"
};
const int display_addresses_length = (sizeof (display_addresses)) / (sizeof (display_addresses[0]));
const struct name_address memory_controller_addresses[] = {
#include "../regs/memory_controller_registers.inc"
};
const int memory_controller_addresses_length = (sizeof (memory_controller_addresses)) / (sizeof (memory_controller_addresses[0]));
const struct name_address pcie_addresses[] = {
#include "../regs/pcie_registers.inc"
};
const int pcie_addresses_length = (sizeof (pcie_addresses)) / (sizeof (pcie_addresses[0]));
int main()
{
////////////////////////////////////////////////////////////////////////
@ -36,16 +71,47 @@ int main()
void * rmmio = resource2_base;
uint32_t value1 = rreg(rmmio, 0x6110);
printf("[r500] D1GRPH_PRIMARY_SURFACE_ADDRESS %08x\n", value1);
uint32_t value2 = rreg(rmmio, 0x6110 + 0x800);
printf("[r500] D2GRPH_PRIMARY_SURFACE_ADDRESS %08x\n", value2);
uint32_t value3 = rreg(rmmio, 0x6118);
printf("[r500] D1GRPH_SECONDARY_SURFACE_ADDRESS %08x\n", value3);
uint32_t value4 = rreg(rmmio, 0x6118 + 0x800);
printf("[r500] D2GRPH_SECONDARY_SURFACE_ADDRESS %08x\n", value4);
wreg(rmmio, 0x6110, 0x813000);
wreg(rmmio, 0x6118, 0x813000);
if (1) {
for (int i = 0; i < display_addresses_length; i++) {
uint32_t value = rreg(rmmio, display_addresses[i].address);
printf("%s %x %08x\n", display_addresses[i].name, display_addresses[i].address, value);
}
}
if (0) {
#define MC_IND_INDEX 0x70
#define MC_IND_INDEX__MC_IND_ADDR(x) (((x) & 0xffff) << 0)
#define MC_IND_INDEX__MC_IND_ADDR__CLEAR (~0xffff)
#define MC_IND_DATA 0x74
// skip MC_IND_INDEX/MC_IND_DATA
const int masks[] = {
(1 << 16),
(1 << 17),
(1 << 20),
(1 << 21),
(1 << 22),
};
for (int i = 2; i < memory_controller_addresses_length; i++) {
const char * name = memory_controller_addresses[i].name;
int address = memory_controller_addresses[i].address;
int mask = (1 << 16) | (1 << 17) | (1 << 20) | (1 << 21) | (1 << 22);
wreg(rmmio, MC_IND_INDEX, MC_IND_INDEX__MC_IND_ADDR(address) | mask);
uint32_t value = rreg(rmmio, MC_IND_DATA);
wreg(rmmio, MC_IND_INDEX, MC_IND_INDEX__MC_IND_ADDR__CLEAR);
printf("%s %x %08x\n", name, address, value);
}
}
if (0) {
#define PCIE_INDEX 0x30
#define PCIE_DATA 0x38
for (int i = 0; i < pcie_addresses_length; i++) {
const char * name = pcie_addresses[i].name;
int address = pcie_addresses[i].address;
wreg_slow(rmmio, PCIE_INDEX, address);
uint32_t value = rreg_slow(rmmio, PCIE_DATA);
printf("%s %x %08x\n", name, address, value);
}
}
}

View File

@ -1,8 +1,10 @@
import sys
def print_error(filename, buf, e):
def print_error(e):
assert len(e.args) == 2, e
message, token = e.args
with open(token.filename, 'rb') as f:
buf = f.read()
lines = buf.splitlines()
line = lines[token.line - 1]
@ -11,7 +13,7 @@ def print_error(filename, buf, e):
col_pointer = '^' * len(token.lexeme)
RED = "\033[0;31m"
DEFAULT = "\033[0;0m"
print(f'File: "{filename}", line {token.line}, column {token.col}\n', file=sys.stderr)
print(f'File: "{token.filename}", line {token.line}, column {token.col}\n', file=sys.stderr)
sys.stderr.write(' ')
wrote_default = False
for i, c in enumerate(line.decode('utf-8')):

View File

@ -10,8 +10,8 @@ from assembler.fs.validator import validate_instruction
from assembler.fs.emitter import emit_instruction
from assembler.error import print_error
def frontend_inner(buf):
lexer = Lexer(buf, find_keyword, emit_newlines=False, minus_is_token=True)
def frontend_inner(filename, buf):
lexer = Lexer(filename, buf, find_keyword, emit_newlines=False, minus_is_token=True)
tokens = list(lexer.lex_tokens())
parser = Parser(tokens)
for ins_ast in parser.instructions():
@ -22,15 +22,15 @@ def frontend_inner(buf):
def frontend(filename, buf):
try:
yield from frontend_inner(buf)
yield from frontend_inner(filename, buf)
except LexerError as e:
print_error(filename, buf, e)
print_error(e)
raise
except ParserError as e:
print_error(filename, buf, e)
print_error(e)
raise
except ValidatorError as e:
print_error(filename, buf, e)
print_error(e)
raise
if __name__ == "__main__":

View File

@ -17,6 +17,9 @@ def emit_alpha_op(code, alpha_op):
if alpha_op.dest.omask is not None:
US_CMN_INST.ALPHA_OMASK(code, alpha_op.dest.omask.value)
# omod
US_ALU_ALPHA_INST.OMOD(code, alpha_op.omod.value)
# opcode
US_ALU_ALPHA_INST.ALPHA_OP(code, alpha_op.opcode.value)
@ -59,6 +62,9 @@ def emit_rgb_op(code, rgb_op):
if rgb_op.dest.omask is not None:
US_CMN_INST.RGB_OMASK(code, rgb_op.dest.omask.value)
# omod
US_ALU_RGB_INST.OMOD(code, rgb_op.omod.value)
# opcode
US_ALU_RGBA_INST.RGB_OP(code, rgb_op.opcode.value)

View File

@ -110,6 +110,26 @@ class Swizzle(IntEnum):
one = 6
unused = 7
class Omod(IntEnum):
mul_1 = 0
mul_2 = 1
mul_4 = 2
mul_8 = 3
div_2 = 4
div_4 = 5
div_8 = 6
disable = 7
omod_lexemes = OrderedDict([
((b"1", b"0"), Omod.mul_1),
((b"2", b"0"), Omod.mul_2),
((b"4", b"0"), Omod.mul_4),
((b"8", b"0"), Omod.mul_8),
((b"0", b"5"), Omod.div_2),
((b"0", b"25"), Omod.div_4),
((b"0", b"125"), Omod.div_8),
])
@dataclass
class SwizzleSel:
src: SwizzleSelSrc
@ -119,12 +139,14 @@ class SwizzleSel:
@dataclass
class AlphaOperation:
dest: AlphaDest
omod: Omod
opcode: AlphaOp
sels: list[SwizzleSel]
@dataclass
class RGBOperation:
dest: RGBDest
omod: Omod
opcode: RGBOp
sels: list[SwizzleSel]
@ -413,14 +435,27 @@ def validate_instruction_operation_sels(swizzle_sels, is_alpha):
sels.append(SwizzleSel(src, swizzle, mod))
return sels
def validate_omod_operation(operation):
omod = Omod.mul_1
if operation.omod != None:
integer, decimal = operation.omod
key = (integer.lexeme, decimal.lexeme)
if key not in omod_lexemes:
valid_omods = b", ".join(b".".join(key) for key in omod_lexemes.keys()).decode('utf-8')
raise ValidatorError(f"invalid omod, expected one of [{valid_omods}]", integer)
omod = omod_lexemes[key]
return omod
def validate_alpha_instruction_operation(operation):
dest = validate_instruction_operation_dest(operation.dest_addr_swizzles,
mask_lookup=alpha_masks,
type_cls=AlphaDest)
omod = validate_omod_operation(operation)
opcode = alpha_op_kws[operation.opcode_keyword.keyword]
sels = validate_instruction_operation_sels(operation.swizzle_sels, is_alpha=True)
return AlphaOperation(
dest,
omod,
opcode,
sels
)
@ -429,10 +464,12 @@ def validate_rgb_instruction_operation(operation):
dest = validate_instruction_operation_dest(operation.dest_addr_swizzles,
mask_lookup=rgb_masks,
type_cls=RGBDest)
omod = validate_omod_operation(operation)
opcode = rgb_op_kws[operation.opcode_keyword.keyword]
sels = validate_instruction_operation_sels(operation.swizzle_sels, is_alpha=False)
return RGBOperation(
dest,
omod,
opcode,
sels
)

View File

@ -35,6 +35,7 @@ class ALUSwizzleSel:
@dataclass
class ALUOperation:
dest_addr_swizzles: list[DestAddrSwizzle]
omod: tuple[Token, Token]
opcode_keyword: Token
swizzle_sels: list[ALUSwizzleSel]
@ -115,6 +116,15 @@ class Parser(BaseParser):
return token.keyword in opcode_keywords
return False
def alu_is_omod(self):
is_omod = (
self.match(TT.identifier, offset=0)
and self.match(TT.dot, offset=1)
and self.match(TT.identifier, offset=2)
and self.match(TT.star, offset=3)
)
return is_omod
def alu_is_neg(self):
result = self.match(TT.minus)
if result:
@ -154,9 +164,17 @@ class Parser(BaseParser):
def alu_operation(self):
dest_addr_swizzles = []
while not self.alu_is_opcode():
while not (self.alu_is_opcode() or self.alu_is_omod()):
dest_addr_swizzles.append(self.dest_addr_swizzle())
omod = None
if self.alu_is_omod():
omod_integer = self.consume(TT.identifier, "expected omod decimal identifier")
self.consume(TT.dot, "expected omod decimal dot")
omod_decimal = self.consume(TT.identifier, "expected omod decimal identifier")
self.consume(TT.star, "expected omod star")
omod = (omod_integer, omod_decimal)
opcode_keyword = self.consume(TT.keyword, "expected opcode keyword")
swizzle_sels = []
@ -165,6 +183,7 @@ class Parser(BaseParser):
return ALUOperation(
dest_addr_swizzles,
omod,
opcode_keyword,
swizzle_sels
)

View File

@ -55,7 +55,7 @@ def validate_swizzle(token):
return tuple(swizzles[c] for c in token.lexeme)
def validate_mask_swizzle(token) -> tuple[AlphaMask, RGBMask]:
argb_masks = OrderedDict([
rgba_masks = OrderedDict([
(b"none" , (AlphaMask.NONE, RGBMask.NONE)),
(b"r" , (AlphaMask.NONE, RGBMask.R)),
(b"g" , (AlphaMask.NONE, RGBMask.G)),
@ -72,9 +72,9 @@ def validate_mask_swizzle(token) -> tuple[AlphaMask, RGBMask]:
(b"gba" , (AlphaMask.A, RGBMask.GB)),
(b"rgba" , (AlphaMask.A, RGBMask.RGB)),
])
if token.lexeme not in argb_masks:
if token.lexeme not in rgba_masks:
raise ValidatorError("invalid destination mask", token)
return argb_masks[token.lexeme]
return rgba_masks[token.lexeme]
def validate_masks(ins_ast: parser.TEXInstruction):
addresses = set()

View File

@ -2,6 +2,7 @@ from dataclasses import dataclass
from enum import Enum, auto
from itertools import chain
from typing import Union, Any
from os import path
DEBUG = True
@ -21,9 +22,11 @@ class TT(Enum):
bar = auto()
comma = auto()
minus = auto()
star = auto()
@dataclass
class Token:
filename: str
start_ix: int
line: int
col: int
@ -42,7 +45,10 @@ class LexerError(Exception):
pass
class Lexer:
def __init__(self, buf: memoryview, find_keyword,
def __init__(self,
filename: str,
buf: memoryview,
find_keyword,
emit_newlines=False,
minus_is_token=False):
self.start_ix = 0
@ -53,6 +59,8 @@ class Lexer:
self.find_keyword = find_keyword
self.emit_newlines = emit_newlines
self.minus_is_token = minus_is_token
self.filename = filename
self.nested_lexer = None
def at_end_p(self):
return self.current_ix >= len(self.buf)
@ -73,7 +81,7 @@ class Lexer:
return self.buf[self.current_ix]
def pos(self):
return self.start_ix, self.line, self.col - (self.current_ix - self.start_ix)
return self.filename, self.start_ix, self.line, self.col - (self.current_ix - self.start_ix)
def identifier(self):
while not self.at_end_p() and self.peek() in identifier_characters:
@ -84,8 +92,26 @@ class Lexer:
else:
return Token(*self.pos(), TT.identifier, self.lexeme(), None)
def include(self, filename):
dirname = path.dirname(self.filename)
new_filename = path.join(dirname, filename.decode('utf-8'))
with open(new_filename, 'rb') as f:
buf = f.read()
self.nested_lexer = Lexer(new_filename,
buf,
find_keyword=self.find_keyword,
emit_newlines=self.emit_newlines,
minus_is_token=self.minus_is_token)
def lex_token(self):
while True:
if self.nested_lexer is not None:
token = self.nested_lexer.lex_token()
if token.type is TT.eof:
self.nested_lexer = None
else:
return token
self.start_ix = self.current_ix
if self.at_end_p():
@ -112,6 +138,8 @@ class Lexer:
return Token(*self.pos(), TT.semicolon, self.lexeme())
elif c == ord(','):
return Token(*self.pos(), TT.comma, self.lexeme())
elif c == ord('*'):
return Token(*self.pos(), TT.star, self.lexeme())
elif c == ord('-') and self.peek() == ord('-'):
self.advance()
while not self.at_end_p() and self.peek() != ord('\n'):
@ -119,8 +147,24 @@ class Lexer:
elif self.minus_is_token and c == ord('-'):
return Token(*self.pos(), TT.minus, self.lexeme())
elif c == ord('#'):
while not self.at_end_p() and self.peek() != ord('\n'):
for c in b"include":
o = self.advance()
if o != c:
token = Token(*self.pos(), None, self.lexeme())
raise LexerError(f"unexpected character at line:{self.line} col:{self.col}, expected `#include`", token)
while self.peek() == ord(' '):
self.advance()
self.start_ix = self.current_ix
quote = self.advance()
if quote != ord('"'):
token = Token(*self.pos(), None, self.lexeme())
raise LexerError(f"unexpected character at line:{self.line} col:{self.col}, expected `\"`", token)
self.start_ix = self.current_ix
while self.peek() != ord('"'):
self.advance()
filename = self.lexeme()
assert self.advance() == ord('"')
self.include(filename)
elif c == ord(' ') or c == ord('\r') or c == ord('\t'):
pass
elif c == ord('\n'):

View File

@ -22,8 +22,8 @@ class BaseParser:
self.current_ix += 1
return token
def match(self, token_type):
token = self.peek()
def match(self, token_type, offset=0):
token = self.peek(offset)
return token.type == token_type
def match_keyword(self, keyword):

View File

@ -22,8 +22,8 @@ out[0].xz = VE_MAD input[0].-y-_-0-_ temp[0].x_0_ temp[0].y_0_
out[0].yw = VE_MAD input[0]._x_0 temp[0]._x_0 temp[0]._z_1
"""
def frontend_inner(buf):
lexer = Lexer(buf, find_keyword)
def frontend_inner(filename, buf):
lexer = Lexer(filename, buf, find_keyword)
tokens = list(lexer.lex_tokens())
parser = Parser(tokens)
for ins in parser.instructions():
@ -37,15 +37,15 @@ def frontend_inner(buf):
def frontend(filename, buf):
try:
yield from frontend_inner(buf)
yield from frontend_inner(filename, buf)
except ParserError as e:
print_error(input_filename, buf, e)
print_error(e)
raise
except LexerError as e:
print_error(input_filename, buf, e)
print_error(e)
raise
except ValidatorError as e:
print_error(filename, buf, e)
print_error(e)
raise
if __name__ == "__main__":

2336
regs/display_registers.inc Normal file

File diff suppressed because it is too large Load Diff

1752
regs/display_registers.txt Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,628 @@
{
.name = "MC_IND_INDEX",
.address = 0x70,
},
{
.name = "MC_IND_DATA",
.address = 0x74,
},
{
.name = "MC_STATUS",
.address = 0x0,
},
{
.name = "MC_ARB_MIN",
.address = 0x10,
},
{
.name = "MC_PT0_CNTL",
.address = 0x100,
},
{
.name = "MC_PT0_CONTEXT0_CNTL",
.address = 0x102,
},
{
.name = "MC_PT0_CONTEXT1_CNTL",
.address = 0x103,
},
{
.name = "MC_PT0_CONTEXT2_CNTL",
.address = 0x104,
},
{
.name = "MC_PT0_CONTEXT3_CNTL",
.address = 0x105,
},
{
.name = "MC_PT0_CONTEXT4_CNTL",
.address = 0x106,
},
{
.name = "MC_PT0_CONTEXT5_CNTL",
.address = 0x107,
},
{
.name = "MC_PT0_CONTEXT6_CNTL",
.address = 0x108,
},
{
.name = "MC_PT0_CONTEXT7_CNTL",
.address = 0x109,
},
{
.name = "MC_PT0_SYSTEM_APERTURE_LOW_ADDR",
.address = 0x112,
},
{
.name = "MC_PT0_SYSTEM_APERTURE_HIGH_ADDR",
.address = 0x114,
},
{
.name = "MC_PT0_SURFACE_PROBE",
.address = 0x116,
},
{
.name = "MC_PT0_SURFACE_PROBE_FAULT_STATUS",
.address = 0x118,
},
{
.name = "MC_PT0_PROTECTION_FAULT_STATUS",
.address = 0x11a,
},
{
.name = "MC_PT0_CONTEXT0_DEFAULT_READ_ADDR",
.address = 0x11c,
},
{
.name = "MC_PT0_CONTEXT1_DEFAULT_READ_ADDR",
.address = 0x11d,
},
{
.name = "MC_PT0_CONTEXT2_DEFAULT_READ_ADDR",
.address = 0x11e,
},
{
.name = "MC_PT0_CONTEXT3_DEFAULT_READ_ADDR",
.address = 0x11f,
},
{
.name = "MC_ARB_TIMERS",
.address = 0x12,
},
{
.name = "MC_PT0_CONTEXT4_DEFAULT_READ_ADDR",
.address = 0x120,
},
{
.name = "MC_PT0_CONTEXT5_DEFAULT_READ_ADDR",
.address = 0x121,
},
{
.name = "MC_PT0_CONTEXT6_DEFAULT_READ_ADDR",
.address = 0x122,
},
{
.name = "MC_PT0_CONTEXT7_DEFAULT_READ_ADDR",
.address = 0x123,
},
{
.name = "MC_PT0_CONTEXT0_FLAT_BASE_ADDR",
.address = 0x12c,
},
{
.name = "MC_PT0_CONTEXT1_FLAT_BASE_ADDR",
.address = 0x12d,
},
{
.name = "MC_PT0_CONTEXT2_FLAT_BASE_ADDR",
.address = 0x12e,
},
{
.name = "MC_PT0_CONTEXT3_FLAT_BASE_ADDR",
.address = 0x12f,
},
{
.name = "MC_ARB_DRAM_PENALTIES",
.address = 0x13,
},
{
.name = "MC_PT0_CONTEXT4_FLAT_BASE_ADDR",
.address = 0x130,
},
{
.name = "MC_PT0_CONTEXT5_FLAT_BASE_ADDR",
.address = 0x131,
},
{
.name = "MC_PT0_CONTEXT6_FLAT_BASE_ADDR",
.address = 0x132,
},
{
.name = "MC_PT0_CONTEXT7_FLAT_BASE_ADDR",
.address = 0x133,
},
{
.name = "MC_PT0_CONTEXT0_FLAT_START_ADDR",
.address = 0x13c,
},
{
.name = "MC_PT0_CONTEXT1_FLAT_START_ADDR",
.address = 0x13d,
},
{
.name = "MC_PT0_CONTEXT2_FLAT_START_ADDR",
.address = 0x13e,
},
{
.name = "MC_PT0_CONTEXT3_FLAT_START_ADDR",
.address = 0x13f,
},
{
.name = "MC_ARB_DRAM_PENALTIES2",
.address = 0x14,
},
{
.name = "MC_PT0_CONTEXT4_FLAT_START_ADDR",
.address = 0x140,
},
{
.name = "MC_PT0_CONTEXT5_FLAT_START_ADDR",
.address = 0x141,
},
{
.name = "MC_PT0_CONTEXT6_FLAT_START_ADDR",
.address = 0x142,
},
{
.name = "MC_PT0_CONTEXT7_FLAT_START_ADDR",
.address = 0x143,
},
{
.name = "MC_PT0_CONTEXT0_FLAT_END_ADDR",
.address = 0x14c,
},
{
.name = "MC_PT0_CONTEXT1_FLAT_END_ADDR",
.address = 0x14d,
},
{
.name = "MC_PT0_CONTEXT2_FLAT_END_ADDR",
.address = 0x14e,
},
{
.name = "MC_PT0_CONTEXT3_FLAT_END_ADDR",
.address = 0x14f,
},
{
.name = "MC_ARB_DRAM_PENALTIES3",
.address = 0x15,
},
{
.name = "MC_PT0_CONTEXT4_FLAT_END_ADDR",
.address = 0x150,
},
{
.name = "MC_PT0_CONTEXT5_FLAT_END_ADDR",
.address = 0x151,
},
{
.name = "MC_PT0_CONTEXT6_FLAT_END_ADDR",
.address = 0x152,
},
{
.name = "MC_PT0_CONTEXT7_FLAT_END_ADDR",
.address = 0x153,
},
{
.name = "MC_PT0_CONTEXT0_MULTI_LEVEL_BASE_ADDR",
.address = 0x15c,
},
{
.name = "MC_PT0_CONTEXT1_MULTI_LEVEL_BASE_ADDR",
.address = 0x15d,
},
{
.name = "MC_PT0_CONTEXT2_MULTI_LEVEL_BASE_ADDR",
.address = 0x15e,
},
{
.name = "MC_PT0_CONTEXT3_MULTI_LEVEL_BASE_ADDR",
.address = 0x15f,
},
{
.name = "MC_ARB_RATIO_CLK_SEQ",
.address = 0x16,
},
{
.name = "MC_PT0_CONTEXT4_MULTI_LEVEL_BASE_ADDR",
.address = 0x160,
},
{
.name = "MC_PT0_CONTEXT5_MULTI_LEVEL_BASE_ADDR",
.address = 0x161,
},
{
.name = "MC_PT0_CONTEXT6_MULTI_LEVEL_BASE_ADDR",
.address = 0x162,
},
{
.name = "MC_PT0_CONTEXT7_MULTI_LEVEL_BASE_ADDR",
.address = 0x163,
},
{
.name = "MC_PT0_CLIENT0_CNTL",
.address = 0x16c,
},
{
.name = "MC_PT0_CLIENT1_CNTL",
.address = 0x16d,
},
{
.name = "MC_PT0_CLIENT2_CNTL",
.address = 0x16e,
},
{
.name = "MC_PT0_CLIENT3_CNTL",
.address = 0x16f,
},
{
.name = "MC_ARB_RDWR_SWITCH",
.address = 0x17,
},
{
.name = "MC_PT0_CLIENT4_CNTL",
.address = 0x170,
},
{
.name = "MC_PT0_CLIENT5_CNTL",
.address = 0x171,
},
{
.name = "MC_PT0_CLIENT6_CNTL",
.address = 0x172,
},
{
.name = "MC_PT0_CLIENT7_CNTL",
.address = 0x173,
},
{
.name = "MC_PT0_CLIENT8_CNTL",
.address = 0x174,
},
{
.name = "MC_PT0_CLIENT9_CNTL",
.address = 0x175,
},
{
.name = "MC_PT0_CLIENT10_CNTL",
.address = 0x176,
},
{
.name = "MC_PT0_CLIENT11_CNTL",
.address = 0x177,
},
{
.name = "MC_PT0_CLIENT12_CNTL",
.address = 0x178,
},
{
.name = "MC_PT0_CLIENT13_CNTL",
.address = 0x179,
},
{
.name = "MC_PT0_CLIENT14_CNTL",
.address = 0x17a,
},
{
.name = "MC_PT0_CLIENT15_CNTL",
.address = 0x17b,
},
{
.name = "MC_PT0_CLIENT16_CNTL",
.address = 0x17c,
},
{
.name = "MC_SW_CNTL",
.address = 0x18,
},
{
.name = "MC_TIMING_CNTL_2",
.address = 0x3,
},
{
.name = "MC_WRITE_AGE1",
.address = 0x37,
},
{
.name = "MC_WRITE_AGE2",
.address = 0x38,
},
{
.name = "MC_FB_LOCATION",
.address = 0x4,
},
{
.name = "MC_AGP_LOCATION",
.address = 0x5,
},
{
.name = "AGP_BASE",
.address = 0x6,
},
{
.name = "MC_SEQ_DRAM",
.address = 0x60,
},
{
.name = "MC_SEQ_RAS_TIMING",
.address = 0x61,
},
{
.name = "MC_SEQ_CAS_TIMING",
.address = 0x62,
},
{
.name = "MC_SEQ_MISC_TIMING",
.address = 0x63,
},
{
.name = "MC_SEQ_RD_CTL_I0",
.address = 0x64,
},
{
.name = "MC_SEQ_RD_CTL_I1",
.address = 0x65,
},
{
.name = "MC_SEQ_WR_CTL_I0",
.address = 0x66,
},
{
.name = "MC_SEQ_WR_CTL_I1",
.address = 0x67,
},
{
.name = "MC_SEQ_IO_CTL_I0",
.address = 0x68,
},
{
.name = "MC_SEQ_IO_CTL_I1",
.address = 0x69,
},
{
.name = "MC_SEQ_NPL_CTL_I0",
.address = 0x6a,
},
{
.name = "MC_SEQ_NPL_CTL_I1",
.address = 0x6b,
},
{
.name = "MC_SEQ_CK_PAD_CNTL_I0",
.address = 0x6c,
},
{
.name = "MC_SEQ_CK_PAD_CNTL_I1",
.address = 0x6d,
},
{
.name = "MC_SEQ_CMD_PAD_CNTL_I0",
.address = 0x6e,
},
{
.name = "MC_SEQ_CMD_PAD_CNTL_I1",
.address = 0x6f,
},
{
.name = "AGP_BASE_2",
.address = 0x7,
},
{
.name = "MC_SEQ_DQ_PAD_CNTL_I0",
.address = 0x70,
},
{
.name = "MC_SEQ_DQ_PAD_CNTL_I1",
.address = 0x71,
},
{
.name = "MC_SEQ_QS_PAD_CNTL_I0",
.address = 0x72,
},
{
.name = "MC_SEQ_QS_PAD_CNTL_I1",
.address = 0x73,
},
{
.name = "MC_SEQ_A_PAD_CNTL_I0",
.address = 0x74,
},
{
.name = "MC_SEQ_A_PAD_CNTL_I1",
.address = 0x75,
},
{
.name = "MC_SEQ_CMD",
.address = 0x76,
},
{
.name = "MC_SEQ_STATUS",
.address = 0x77,
},
{
.name = "MC_CNTL0",
.address = 0x8,
},
{
.name = "MC_IO_PAD_CNTL_I0",
.address = 0x80,
},
{
.name = "MC_IO_PAD_CNTL_I1",
.address = 0x81,
},
{
.name = "MC_IO_PAD_CNTL",
.address = 0x82,
},
{
.name = "MC_IO_RD_DQ_CNTL_I0",
.address = 0x84,
},
{
.name = "MC_IO_RD_DQ_CNTL_I1",
.address = 0x85,
},
{
.name = "MC_IO_RD_QS_CNTL_I0",
.address = 0x86,
},
{
.name = "MC_IO_RD_QS_CNTL_I1",
.address = 0x87,
},
{
.name = "MC_IO_WR_CNTL_I0",
.address = 0x88,
},
{
.name = "MC_IO_WR_CNTL_I1",
.address = 0x89,
},
{
.name = "MC_IO_CK_PAD_CNTL_I0",
.address = 0x8a,
},
{
.name = "MC_IO_CK_PAD_CNTL_I1",
.address = 0x8b,
},
{
.name = "MC_IO_CMD_PAD_CNTL_I0",
.address = 0x8c,
},
{
.name = "MC_IO_CMD_PAD_CNTL_I1",
.address = 0x8d,
},
{
.name = "MC_IO_DQ_PAD_CNTL_I0",
.address = 0x8e,
},
{
.name = "MC_IO_DQ_PAD_CNTL_I1",
.address = 0x8f,
},
{
.name = "MC_CNTL1",
.address = 0x9,
},
{
.name = "MC_IO_QS_PAD_CNTL_I0",
.address = 0x90,
},
{
.name = "MC_IO_QS_PAD_CNTL_I1",
.address = 0x91,
},
{
.name = "MC_IO_A_PAD_CNTL_I0",
.address = 0x92,
},
{
.name = "MC_IO_A_PAD_CNTL_I1",
.address = 0x93,
},
{
.name = "MC_IO_WR_DQ_CNTL_I0",
.address = 0x94,
},
{
.name = "MC_IO_WR_DQ_CNTL_I1",
.address = 0x95,
},
{
.name = "MC_IO_WR_QS_CNTL_I0",
.address = 0x96,
},
{
.name = "MC_IO_WR_QS_CNTL_I1",
.address = 0x97,
},
{
.name = "MC_VENDOR_ID_I0",
.address = 0x98,
},
{
.name = "MC_VENDOR_ID_I1",
.address = 0x99,
},
{
.name = "MC_NPL_STATUS_I0",
.address = 0x9a,
},
{
.name = "MC_NPL_STATUS_I1",
.address = 0x9b,
},
{
.name = "MC_IO_RD_QS2_CNTL_I0",
.address = 0x9c,
},
{
.name = "MC_IO_RD_QS2_CNTL_I1",
.address = 0x9d,
},
{
.name = "MC_RFSH_CNTL",
.address = 0xa,
},
{
.name = "MC_IMP_CNTL",
.address = 0xa0,
},
{
.name = "MC_IMP_DEBUG",
.address = 0xa1,
},
{
.name = "MC_IMP_STATUS",
.address = 0xa2,
},
{
.name = "MC_RBS_MAP",
.address = 0xb0,
},
{
.name = "MC_RBS_CZT_HWM",
.address = 0xb1,
},
{
.name = "MC_RBS_SUN_HWM",
.address = 0xb2,
},
{
.name = "MC_RBS_MISC",
.address = 0xb3,
},
{
.name = "MC_PMG_CMD",
.address = 0xe0,
},
{
.name = "MC_PMG_CFG",
.address = 0xe1,
},
{
.name = "MC_MISC_0",
.address = 0xf0,
},
{
.name = "MC_MISC_1",
.address = 0xf1,
},
{
.name = "MC_DEBUG",
.address = 0xfe,
},

View File

@ -0,0 +1,471 @@
MC_IND_INDEX
MCDEC:0x70
2-2
MC_IND_DATA
MCDEC:0x74
2-2
MC_STATUS
MCIND:0x0
2-2
MC_ARB_MIN
MCIND:0x10
2-8
MC_PT0_CNTL
MCIND:0x100
2-32
MC_PT0_CONTEXT0_CNTL
MCIND:0x102
2-33
MC_PT0_CONTEXT1_CNTL
MCIND:0x103
2-33
MC_PT0_CONTEXT2_CNTL
MCIND:0x104
2-33
MC_PT0_CONTEXT3_CNTL
MCIND:0x105
2-34
MC_PT0_CONTEXT4_CNTL
MCIND:0x106
2-34
MC_PT0_CONTEXT5_CNTL
MCIND:0x107
2-34
MC_PT0_CONTEXT6_CNTL
MCIND:0x108
2-34
MC_PT0_CONTEXT7_CNTL
MCIND:0x109
2-35
MC_PT0_SYSTEM_APERTURE_LOW_ADDR
MCIND:0x112
2-35
MC_PT0_SYSTEM_APERTURE_HIGH_ADDR
MCIND:0x114
2-35
MC_PT0_SURFACE_PROBE
MCIND:0x116
2-35
MC_PT0_SURFACE_PROBE_FAULT_STATUS
MCIND:0x118
2-36
MC_PT0_PROTECTION_FAULT_STATUS
MCIND:0x11A
2-36
MC_PT0_CONTEXT0_DEFAULT_READ_ADDR
MCIND:0x11C
2-36
MC_PT0_CONTEXT1_DEFAULT_READ_ADDR
MCIND:0x11D
2-36
MC_PT0_CONTEXT2_DEFAULT_READ_ADDR
MCIND:0x11E
2-37
MC_PT0_CONTEXT3_DEFAULT_READ_ADDR
MCIND:0x11F
2-37
MC_ARB_TIMERS
MCIND:0x12
2-8
MC_PT0_CONTEXT4_DEFAULT_READ_ADDR
MCIND:0x120
2-37
MC_PT0_CONTEXT5_DEFAULT_READ_ADDR
MCIND:0x121
2-37
MC_PT0_CONTEXT6_DEFAULT_READ_ADDR
MCIND:0x122
2-37
MC_PT0_CONTEXT7_DEFAULT_READ_ADDR
MCIND:0x123
2-37
MC_PT0_CONTEXT0_FLAT_BASE_ADDR
MCIND:0x12C
2-37
MC_PT0_CONTEXT1_FLAT_BASE_ADDR
MCIND:0x12D
2-38
MC_PT0_CONTEXT2_FLAT_BASE_ADDR
MCIND:0x12E
2-38
MC_PT0_CONTEXT3_FLAT_BASE_ADDR
MCIND:0x12F
2-38
MC_ARB_DRAM_PENALTIES
MCIND:0x13
2-8
MC_PT0_CONTEXT4_FLAT_BASE_ADDR
MCIND:0x130
2-38
MC_PT0_CONTEXT5_FLAT_BASE_ADDR
MCIND:0x131
2-38
MC_PT0_CONTEXT6_FLAT_BASE_ADDR
MCIND:0x132
2-38
MC_PT0_CONTEXT7_FLAT_BASE_ADDR
MCIND:0x133
2-39
MC_PT0_CONTEXT0_FLAT_START_ADDR
MCIND:0x13C
2-39
MC_PT0_CONTEXT1_FLAT_START_ADDR
MCIND:0x13D
2-39
MC_PT0_CONTEXT2_FLAT_START_ADDR
MCIND:0x13E
2-39
MC_PT0_CONTEXT3_FLAT_START_ADDR
MCIND:0x13F
2-39
MC_ARB_DRAM_PENALTIES2
MCIND:0x14
2-8
MC_PT0_CONTEXT4_FLAT_START_ADDR
MCIND:0x140
2-39
MC_PT0_CONTEXT5_FLAT_START_ADDR
MCIND:0x141
2-40
MC_PT0_CONTEXT6_FLAT_START_ADDR
MCIND:0x142
2-40
MC_PT0_CONTEXT7_FLAT_START_ADDR
MCIND:0x143
2-40
MC_PT0_CONTEXT0_FLAT_END_ADDR
MCIND:0x14C
2-40
MC_PT0_CONTEXT1_FLAT_END_ADDR
MCIND:0x14D
2-40
MC_PT0_CONTEXT2_FLAT_END_ADDR
MCIND:0x14E
2-40
MC_PT0_CONTEXT3_FLAT_END_ADDR
MCIND:0x14F
2-41
MC_ARB_DRAM_PENALTIES3
MCIND:0x15
2-9
MC_PT0_CONTEXT4_FLAT_END_ADDR
MCIND:0x150
2-41
MC_PT0_CONTEXT5_FLAT_END_ADDR
MCIND:0x151
2-41
MC_PT0_CONTEXT6_FLAT_END_ADDR
MCIND:0x152
2-41
MC_PT0_CONTEXT7_FLAT_END_ADDR
MCIND:0x153
2-41
MC_PT0_CONTEXT0_MULTI_LEVEL_BASE_ADDR
MCIND:0x15C
2-41
MC_PT0_CONTEXT1_MULTI_LEVEL_BASE_ADDR
MCIND:0x15D
2-42
MC_PT0_CONTEXT2_MULTI_LEVEL_BASE_ADDR
MCIND:0x15E
2-42
MC_PT0_CONTEXT3_MULTI_LEVEL_BASE_ADDR
MCIND:0x15F
2-42
MC_ARB_RATIO_CLK_SEQ
MCIND:0x16
2-9
MC_PT0_CONTEXT4_MULTI_LEVEL_BASE_ADDR
MCIND:0x160
2-42
MC_PT0_CONTEXT5_MULTI_LEVEL_BASE_ADDR
MCIND:0x161
2-42
MC_PT0_CONTEXT6_MULTI_LEVEL_BASE_ADDR
MCIND:0x162
2-42
MC_PT0_CONTEXT7_MULTI_LEVEL_BASE_ADDR
MCIND:0x163
2-42
MC_PT0_CLIENT0_CNTL
MCIND:0x16C
2-43
MC_PT0_CLIENT1_CNTL
MCIND:0x16D
2-43
MC_PT0_CLIENT2_CNTL
MCIND:0x16E
2-44
MC_PT0_CLIENT3_CNTL
MCIND:0x16F
2-45
MC_ARB_RDWR_SWITCH
MCIND:0x17
2-9
MC_PT0_CLIENT4_CNTL
MCIND:0x170
2-46
MC_PT0_CLIENT5_CNTL
MCIND:0x171
2-47
MC_PT0_CLIENT6_CNTL
MCIND:0x172
2-48
MC_PT0_CLIENT7_CNTL
MCIND:0x173
2-49
MC_PT0_CLIENT8_CNTL
MCIND:0x174
2-49
MC_PT0_CLIENT9_CNTL
MCIND:0x175
2-50
MC_PT0_CLIENT10_CNTL
MCIND:0x176
2-51
MC_PT0_CLIENT11_CNTL
MCIND:0x177
2-52
MC_PT0_CLIENT12_CNTL
MCIND:0x178
2-53
MC_PT0_CLIENT13_CNTL
MCIND:0x179
2-54
MC_PT0_CLIENT14_CNTL
MCIND:0x17A
2-55
MC_PT0_CLIENT15_CNTL
MCIND:0x17B
2-55
MC_PT0_CLIENT16_CNTL
MCIND:0x17C
2-56
MC_SW_CNTL
MCIND:0x18
2-9
MC_TIMING_CNTL_2
MCIND:0x3
2-3
MC_WRITE_AGE1
MCIND:0x37
2-9
MC_WRITE_AGE2
MCIND:0x38
2-10
MC_FB_LOCATION
MCIND:0x4
2-3
MC_AGP_LOCATION
MCIND:0x5
2-3
AGP_BASE
MCIND:0x6
2-3
MC_SEQ_DRAM
MCIND:0x60
2-10
MC_SEQ_RAS_TIMING
MCIND:0x61
2-11
MC_SEQ_CAS_TIMING
MCIND:0x62
2-11
MC_SEQ_MISC_TIMING
MCIND:0x63
2-12
MC_SEQ_RD_CTL_I0
MCIND:0x64
2-12
MC_SEQ_RD_CTL_I1
MCIND:0x65
2-13
MC_SEQ_WR_CTL_I0
MCIND:0x66
2-14
MC_SEQ_WR_CTL_I1
MCIND:0x67
2-15
MC_SEQ_IO_CTL_I0
MCIND:0x68
2-15
MC_SEQ_IO_CTL_I1
MCIND:0x69
2-15
MC_SEQ_NPL_CTL_I0
MCIND:0x6A
2-16
MC_SEQ_NPL_CTL_I1
MCIND:0x6B
2-16
MC_SEQ_CK_PAD_CNTL_I0
MCIND:0x6C
2-16
MC_SEQ_CK_PAD_CNTL_I1
MCIND:0x6D
2-16
MC_SEQ_CMD_PAD_CNTL_I0
MCIND:0x6E
2-17
MC_SEQ_CMD_PAD_CNTL_I1
MCIND:0x6F
2-17
AGP_BASE_2
MCIND:0x7
2-3
MC_SEQ_DQ_PAD_CNTL_I0
MCIND:0x70
2-17
MC_SEQ_DQ_PAD_CNTL_I1
MCIND:0x71
2-17
MC_SEQ_QS_PAD_CNTL_I0
MCIND:0x72
2-18
MC_SEQ_QS_PAD_CNTL_I1
MCIND:0x73
2-18
MC_SEQ_A_PAD_CNTL_I0
MCIND:0x74
2-18
MC_SEQ_A_PAD_CNTL_I1
MCIND:0x75
2-18
MC_SEQ_CMD
MCIND:0x76
2-19
MC_SEQ_STATUS
MCIND:0x77
2-19
MC_CNTL0
MCIND:0x8
2-4
MC_IO_PAD_CNTL_I0
MCIND:0x80
2-19
MC_IO_PAD_CNTL_I1
MCIND:0x81
2-20
MC_IO_PAD_CNTL
MCIND:0x82
2-21
MC_IO_RD_DQ_CNTL_I0
MCIND:0x84
2-22
MC_IO_RD_DQ_CNTL_I1
MCIND:0x85
2-22
MC_IO_RD_QS_CNTL_I0
MCIND:0x86
2-22
MC_IO_RD_QS_CNTL_I1
MCIND:0x87
2-22
MC_IO_WR_CNTL_I0
MCIND:0x88
2-22
MC_IO_WR_CNTL_I1
MCIND:0x89
2-23
MC_IO_CK_PAD_CNTL_I0
MCIND:0x8A
2-23
MC_IO_CK_PAD_CNTL_I1
MCIND:0x8B
2-23
MC_IO_CMD_PAD_CNTL_I0
MCIND:0x8C
2-23
MC_IO_CMD_PAD_CNTL_I1
MCIND:0x8D
2-24
MC_IO_DQ_PAD_CNTL_I0
MCIND:0x8E
2-24
MC_IO_DQ_PAD_CNTL_I1
MCIND:0x8F
2-24
MC_CNTL1
MCIND:0x9
2-6
MC_IO_QS_PAD_CNTL_I0
MCIND:0x90
2-25
MC_IO_QS_PAD_CNTL_I1
MCIND:0x91
2-25
MC_IO_A_PAD_CNTL_I0
MCIND:0x92
2-25
MC_IO_A_PAD_CNTL_I1
MCIND:0x93
2-26
MC_IO_WR_DQ_CNTL_I0
MCIND:0x94
2-26
MC_IO_WR_DQ_CNTL_I1
MCIND:0x95
2-26
MC_IO_WR_QS_CNTL_I0
MCIND:0x96
2-26
MC_IO_WR_QS_CNTL_I1
MCIND:0x97
2-27
MC_VENDOR_ID_I0
MCIND:0x98
2-27
MC_VENDOR_ID_I1
MCIND:0x99
2-27
MC_NPL_STATUS_I0
MCIND:0x9A
2-27
MC_NPL_STATUS_I1
MCIND:0x9B
2-27
MC_IO_RD_QS2_CNTL_I0
MCIND:0x9C
2-28
MC_IO_RD_QS2_CNTL_I1
MCIND:0x9D
2-28
MC_RFSH_CNTL
MCIND:0xA
2-8
MC_IMP_CNTL
MCIND:0xA0
2-28
MC_IMP_DEBUG
MCIND:0xA1
2-28
MC_IMP_STATUS
MCIND:0xA2
2-28
MC_RBS_MAP
MCIND:0xB0
2-29
MC_RBS_CZT_HWM
MCIND:0xB1
2-30
MC_RBS_SUN_HWM
MCIND:0xB2
2-30
MC_RBS_MISC
MCIND:0xB3
2-30
MC_PMG_CMD
MCIND:0xE0
2-31
MC_PMG_CFG
MCIND:0xE1
2-31
MC_MISC_0
MCIND:0xF0
2-31
MC_MISC_1
MCIND:0xF1
2-31
MC_DEBUG
MCIND:0xFE
2-32

View File

@ -8,6 +8,7 @@ with open(sys.argv[1]) as f:
values = [
int(s.strip(), 16)
for s in f.read().strip().split(",")
if s
]
undocumented_registers = {
@ -15,6 +16,17 @@ undocumented_registers = {
0x2184: "VAP_VSM_VTX_ASSM",
}
def decode_print(register_name, value, paren=False, display_register_name=None):
if display_register_name is None:
display_register_name = register_name
decoded_value = decode_bits(register_name, value)
head = decoded_value[0][2:]
tail = indent('\n'.join([f"= {head}", *decoded_value[1:]]), ' ')
if paren:
print(f" ({display_register_name})\n{tail}")
else:
print(f" {display_register_name}\n{tail}")
class Parser:
def __init__(self, values):
self.ix = 0
@ -52,10 +64,7 @@ class Parser:
try:
if one_reg or value == 0:
assert False
decoded_value = decode_bits(register_name, value)
head = decoded_value[0][2:]
tail = indent('\n'.join(decoded_value[1:]), ' ')
print(f" {register_name} = {head}\n{tail}")
decode_print(register_name, value)
except AssertionError:
print(f" {register_name} = 0x{value:08x}")
else:
@ -79,11 +88,83 @@ class Parser:
it_opcode = (header >> 8) & 0xff
count = (header >> 16) & 0x3fff
print(f"type 3: op:{it_opcode:02x} count:{count:04x}")
opcode_names = dict((v, k) for k, v in [
("3D_DRAW_VBUF", 0x28),
("3D_DRAW_IMMD", 0x29),
("3D_DRAW_INDX", 0x2A),
("LOAD_PALETTE", 0x2C),
("3D_LOAD_VBPNTR", 0x2F),
("INDX_BUFFER", 0x33),
("3D_DRAW_VBUF_2", 0x34),
("3D_DRAW_IMMD_2", 0x35),
("3D_DRAW_INDX_2", 0x36),
("3D_CLEAR_HIZ", 0x37),
("3D_DRAW_128", 0x39),
])
opcode_name = f"{it_opcode:02x}" if it_opcode not in opcode_names else opcode_names[it_opcode]
registers = {
"3D_DRAW_VBUF": ["VAP_VTX_FMT", "VAP_VF_CNTL"],
"3D_DRAW_IMMD": ["VAP_VTX_FMT", "VAP_VF_CNTL"],
"3D_DRAW_INDX": ["VAP_VTX_FMT", "VAP_VF_CNTL"],
"3D_LOAD_VBPNTR": ["VAP_VTX_NUM_ARRAYS",
"VAP_VTX_AOS_ATTR01",
"VAP_VTX_AOS_ADDR0",
"VAP_VTX_AOS_ADDR1",
"VAP_VTX_AOS_ATTR23",
"VAP_VTX_AOS_ADDR2",
"VAP_VTX_AOS_ADDR3",
"VAP_VTX_AOS_ATTR45",
"VAP_VTX_AOS_ADDR4",
"VAP_VTX_AOS_ADDR5",
"VAP_VTX_AOS_ATTR67",
"VAP_VTX_AOS_ADDR6",
"VAP_VTX_AOS_ADDR7",
"VAP_VTX_AOS_ATTR89",
"VAP_VTX_AOS_ADDR8",
"VAP_VTX_AOS_ADDR9",
"VAP_VTX_AOS_ATTR1011",
"VAP_VTX_AOS_ADDR10",
"VAP_VTX_AOS_ADDR11",
"VAP_VTX_AOS_ATTR1213",
"VAP_VTX_AOS_ADDR12",
"VAP_VTX_AOS_ADDR13",
"VAP_VTX_AOS_ATTR1415",
"VAP_VTX_AOS_ADDR14",
"VAP_VTX_AOS_ADDR15"],
"INDX_BUFFER": [(("ONE_REG_WR", (31, 31)), ("SKIP_COUNT", (18, 16)), ("DESTINATION", (12, 0))),
(("BUFFER_BASE", (31, 0)),),
(("BUFFER_SIZE", (31, 0)),)],
"3D_DRAW_VBUF_2": ["VAP_VF_CNTL"],
"3D_DRAW_IMMD_2": ["VAP_VF_CNTL"],
"3D_DRAW_INDX_2": ["VAP_VF_CNTL"],
}
print(f"type 3: op:{opcode_name} count:{count:04x}")
ix = 0
while count >= 0:
value = self.consume()
if opcode_name in registers and ix < len(registers[opcode_name]):
register_name = registers[opcode_name][ix]
if type(register_name) is str:
if "_AOS_ATTR" in register_name:
decode_print(register_name[:-2], value, paren=True, display_register_name=register_name)
elif "_AOS_ADDR" in register_name:
decode_print(register_name[:-1], value, paren=True, display_register_name=register_name)
else:
decode_print(register_name, value, paren=True)
else:
print(f" ({opcode_name}__{ix})")
for i, desc in enumerate(register_name):
eq_bar = '=' if i == 0 else '|'
d_name, (high, low) = desc
mask = (1 << ((high - low) + 1)) - 1
v = (value >> low) & mask
print(f' {eq_bar} {opcode_name}__{ix}__{d_name}({v})')
else:
print(f" {value:08x}")
count -= 1
ix += 1
def packet(self):
value = self.peek()

648
regs/pcie_registers.inc Normal file
View File

@ -0,0 +1,648 @@
{
.name = "PCIE_TX_CNTL",
.address = 0x1,
},
{
.name = "PCIE_TX_GART_CNTL",
.address = 0x10,
},
{
.name = "PCIE_TX_GART_DISCARD_RD_ADDR_LO",
.address = 0x11,
},
{
.name = "PCIE_TX_GART_DISCARD_RD_ADDR_HI",
.address = 0x12,
},
{
.name = "PCIE_TX_GART_BASE",
.address = 0x13,
},
{
.name = "PCIE_TX_GART_START_LO",
.address = 0x14,
},
{
.name = "PCIE_TX_GART_START_HI",
.address = 0x15,
},
{
.name = "PCIE_TX_GART_END_LO",
.address = 0x16,
},
{
.name = "PCIE_TX_GART_END_HI",
.address = 0x17,
},
{
.name = "PCIE_TX_GART_ERROR",
.address = 0x18,
},
{
.name = "PCIE_TX_SEQ",
.address = 0x2,
},
{
.name = "PCIE_TX_GART_LRU_MRU_PTR",
.address = 0x20,
},
{
.name = "PCIE_TX_GART_STATUS",
.address = 0x21,
},
{
.name = "PCIE_TX_GART_TLB_VALID",
.address = 0x22,
},
{
.name = "PCIE_TX_GART_TLB0_DATA",
.address = 0x23,
},
{
.name = "PCIE_TX_GART_TLB1_DATA",
.address = 0x24,
},
{
.name = "PCIE_TX_GART_TLB2_DATA",
.address = 0x25,
},
{
.name = "PCIE_TX_GART_TLB3_DATA",
.address = 0x26,
},
{
.name = "PCIE_TX_GART_TLB4_DATA",
.address = 0x27,
},
{
.name = "PCIE_TX_GART_TLB5_DATA",
.address = 0x28,
},
{
.name = "PCIE_TX_GART_TLB6_DATA",
.address = 0x29,
},
{
.name = "PCIE_TX_GART_TLB7_DATA",
.address = 0x2a,
},
{
.name = "PCIE_TX_GART_TLB8_DATA",
.address = 0x2b,
},
{
.name = "PCIE_TX_GART_TLB9_DATA",
.address = 0x2c,
},
{
.name = "PCIE_TX_GART_TLB10_DATA",
.address = 0x2d,
},
{
.name = "PCIE_TX_GART_TLB11_DATA",
.address = 0x2e,
},
{
.name = "PCIE_TX_GART_TLB12_DATA",
.address = 0x2f,
},
{
.name = "PCIE_TX_REPLAY",
.address = 0x3,
},
{
.name = "PCIE_TX_GART_TLB13_DATA",
.address = 0x30,
},
{
.name = "PCIE_TX_GART_TLB14_DATA",
.address = 0x31,
},
{
.name = "PCIE_TX_GART_TLB15_DATA",
.address = 0x32,
},
{
.name = "PCIE_TX_GART_TLB16_DATA",
.address = 0x33,
},
{
.name = "PCIE_TX_GART_TLB17_DATA",
.address = 0x34,
},
{
.name = "PCIE_TX_GART_TLB18_DATA",
.address = 0x35,
},
{
.name = "PCIE_TX_GART_TLB19_DATA",
.address = 0x36,
},
{
.name = "PCIE_TX_GART_TLB20_DATA",
.address = 0x37,
},
{
.name = "PCIE_TX_GART_TLB21_DATA",
.address = 0x38,
},
{
.name = "PCIE_TX_GART_TLB22_DATA",
.address = 0x39,
},
{
.name = "PCIE_TX_GART_TLB23_DATA",
.address = 0x3a,
},
{
.name = "PCIE_TX_GART_TLB24_DATA",
.address = 0x3b,
},
{
.name = "PCIE_TX_GART_TLB25_DATA",
.address = 0x3c,
},
{
.name = "PCIE_TX_GART_TLB26_DATA",
.address = 0x3d,
},
{
.name = "PCIE_TX_GART_TLB27_DATA",
.address = 0x3e,
},
{
.name = "PCIE_TX_GART_TLB28_DATA",
.address = 0x3f,
},
{
.name = "PCIE_TX_CREDITS_CONSUMED",
.address = 0x4,
},
{
.name = "PCIE_TX_GART_TLB29_DATA",
.address = 0x40,
},
{
.name = "PCIE_CLK_CNTL",
.address = 0x400,
},
{
.name = "PCIE_PRBS10",
.address = 0x401,
},
{
.name = "PCIE_PRBS23_BITCNT0",
.address = 0x402,
},
{
.name = "PCIE_PRBS23_BITCNT1",
.address = 0x403,
},
{
.name = "PCIE_PRBS23_BITCNT2",
.address = 0x404,
},
{
.name = "PCIE_PRBS23_BITCNT3",
.address = 0x405,
},
{
.name = "PCIE_PRBS23_BITCNT4",
.address = 0x406,
},
{
.name = "PCIE_PRBS23_BITCNT5",
.address = 0x407,
},
{
.name = "PCIE_PRBS23_BITCNT6",
.address = 0x408,
},
{
.name = "PCIE_PRBS23_BITCNT7",
.address = 0x409,
},
{
.name = "PCIE_PRBS23_BITCNT8",
.address = 0x40a,
},
{
.name = "PCIE_PRBS23_BITCNT9",
.address = 0x40b,
},
{
.name = "PCIE_PRBS23_BITCNT10",
.address = 0x40c,
},
{
.name = "PCIE_PRBS23_BITCNT11",
.address = 0x40d,
},
{
.name = "PCIE_PRBS23_BITCNT12",
.address = 0x40e,
},
{
.name = "PCIE_PRBS23_BITCNT13",
.address = 0x40f,
},
{
.name = "PCIE_TX_GART_TLB30_DATA",
.address = 0x41,
},
{
.name = "PCIE_PRBS23_BITCNT14",
.address = 0x410,
},
{
.name = "PCIE_PRBS23_BITCNT15",
.address = 0x411,
},
{
.name = "PCIE_PRBS23_ERRCNT0",
.address = 0x412,
},
{
.name = "PCIE_PRBS23_ERRCNT1",
.address = 0x413,
},
{
.name = "PCIE_PRBS23_ERRCNT2",
.address = 0x414,
},
{
.name = "PCIE_PRBS23_ERRCNT3",
.address = 0x415,
},
{
.name = "PCIE_PRBS23_ERRCNT4",
.address = 0x416,
},
{
.name = "PCIE_PRBS23_ERRCNT5",
.address = 0x417,
},
{
.name = "PCIE_PRBS23_ERRCNT6",
.address = 0x418,
},
{
.name = "PCIE_PRBS23_ERRCNT7",
.address = 0x419,
},
{
.name = "PCIE_PRBS23_ERRCNT8",
.address = 0x41a,
},
{
.name = "PCIE_PRBS23_ERRCNT9",
.address = 0x41b,
},
{
.name = "PCIE_PRBS23_ERRCNT10",
.address = 0x41c,
},
{
.name = "PCIE_PRBS23_ERRCNT11",
.address = 0x41d,
},
{
.name = "PCIE_PRBS23_ERRCNT12",
.address = 0x41e,
},
{
.name = "PCIE_PRBS23_ERRCNT13",
.address = 0x41f,
},
{
.name = "PCIE_TX_GART_TLB31_DATA",
.address = 0x42,
},
{
.name = "PCIE_PRBS23_ERRCNT14",
.address = 0x420,
},
{
.name = "PCIE_PRBS23_ERRCNT15",
.address = 0x421,
},
{
.name = "PCIE_PRBS23_CTRL0",
.address = 0x422,
},
{
.name = "PCIE_PRBS23_CTRL1",
.address = 0x423,
},
{
.name = "PCIE_PRBS_EN",
.address = 0x424,
},
{
.name = "PCIE_XSTRAP1",
.address = 0x425,
},
{
.name = "PCIE_XSTRAP2",
.address = 0x426,
},
{
.name = "PCIE_XSTRAP5",
.address = 0x429,
},
{
.name = "PCIE_TX_CREDITS_CONSUMED_D",
.address = 0x5,
},
{
.name = "PCIE_TX_CREDITS_CONSUMED_CPLD",
.address = 0x6,
},
{
.name = "PCIE_FLOW_CNTL",
.address = 0x60,
},
{
.name = "PCIE_TXRX_DEBUG_SEQNUM",
.address = 0x61,
},
{
.name = "PCIE_TXRX_TEST_MODE",
.address = 0x62,
},
{
.name = "PCIE_TX_CREDITS_LIMIT",
.address = 0x7,
},
{
.name = "PCIE_RX_CNTL",
.address = 0x70,
},
{
.name = "PCIE_RX_NUM_NACK",
.address = 0x71,
},
{
.name = "PCIE_RX_NUM_NACK_GENERATED",
.address = 0x72,
},
{
.name = "PCIE_RX_ACK_NACK_LATENCY",
.address = 0x73,
},
{
.name = "PCIE_RX_ACK_NACK_LATENCY_THRESHOLD",
.address = 0x74,
},
{
.name = "PCIE_RX_TLP_HDR0",
.address = 0x75,
},
{
.name = "PCIE_RX_TLP_HDR1",
.address = 0x76,
},
{
.name = "PCIE_RX_TLP_HDR2",
.address = 0x77,
},
{
.name = "PCIE_RX_TLP_HDR3",
.address = 0x78,
},
{
.name = "PCIE_RX_TLP_HDR4",
.address = 0x79,
},
{
.name = "PCIE_RX_TLP_CRC",
.address = 0x7a,
},
{
.name = "PCIE_RX_DLP0",
.address = 0x7b,
},
{
.name = "PCIE_RX_DLP1",
.address = 0x7c,
},
{
.name = "PCIE_RX_DLP_CRC",
.address = 0x7d,
},
{
.name = "PCIE_RX_CREDITS_ALLOCATED",
.address = 0x7e,
},
{
.name = "PCIE_RX_CREDITS_ALLOCATED_D",
.address = 0x7f,
},
{
.name = "PCIE_TX_CREDITS_LIMIT_D",
.address = 0x8,
},
{
.name = "PCIE_RX_CREDITS_ALLOCATED_CPLD",
.address = 0x80,
},
{
.name = "PCIE_RX_CREDITS_RECEIVED",
.address = 0x81,
},
{
.name = "PCIE_RX_CREDITS_RECEIVED_D",
.address = 0x82,
},
{
.name = "PCIE_RX_CREDITS_RECEIVED_CPLD",
.address = 0x83,
},
{
.name = "PCIE_RX_MAL_TLP_COUNT",
.address = 0x84,
},
{
.name = "PCIE_RX_ERR_LOG",
.address = 0x85,
},
{
.name = "PCIE_RX_EXPECTED_SEQNUM",
.address = 0x86,
},
{
.name = "PCIE_TX_CREDITS_LIMIT_CPLD",
.address = 0x9,
},
{
.name = "PCIE_CI_CNTL",
.address = 0x90,
},
{
.name = "PCIE_CI_FLUSH_CNTL",
.address = 0x91,
},
{
.name = "PCIE_CI_PANIC",
.address = 0x92,
},
{
.name = "PCIE_CI_HANG",
.address = 0x93,
},
{
.name = "PCIE_LC_CNTL",
.address = 0xa0,
},
{
.name = "PCIE_LC_N_FTS_CNTL",
.address = 0xa1,
},
{
.name = "PCIE_LC_LINK_WIDTH_CNTL",
.address = 0xa2,
},
{
.name = "PCIE_LC_STATE0",
.address = 0xa5,
},
{
.name = "PCIE_LC_STATE1",
.address = 0xa6,
},
{
.name = "PCIE_LC_STATE2",
.address = 0xa7,
},
{
.name = "PCIE_LC_STATE3",
.address = 0xa8,
},
{
.name = "PCIE_LC_STATE4",
.address = 0xa9,
},
{
.name = "PCIE_LC_STATE5",
.address = 0xaa,
},
{
.name = "PCIE_LC_FORCE_SYNC_LOSS_CNTL",
.address = 0xab,
},
{
.name = "PCIE_P_CNTL",
.address = 0xb0,
},
{
.name = "PCIE_P_CNTL2",
.address = 0xb1,
},
{
.name = "PCIE_P_BUF_STATUS",
.address = 0xb2,
},
{
.name = "PCIE_P_DECODER_STATUS",
.address = 0xb3,
},
{
.name = "PCIE_P_MISC_DEBUG_STATUS",
.address = 0xb4,
},
{
.name = "PCIE_P_IMP_CNTL_STRENGTH",
.address = 0xc0,
},
{
.name = "PCIE_P_IMP_CNTL_UPDATE",
.address = 0xc1,
},
{
.name = "PCIE_P_STR_CNTL_UPDATE",
.address = 0xc2,
},
{
.name = "PCIE_P_PAD_MISC_CNTL",
.address = 0xc3,
},
{
.name = "PCIE_P_SYMSYNC_CTL",
.address = 0xc4,
},
{
.name = "PCIE_P_DECODE_ERR_CNTL",
.address = 0xc5,
},
{
.name = "PCIE_ERR_CNTL",
.address = 0xe0,
},
{
.name = "PCIE_CLK_RST_CNTL",
.address = 0xe1,
},
{
.name = "PCIE_P_DECODE_ERR_CNT_0",
.address = 0xf0,
},
{
.name = "PCIE_P_DECODE_ERR_CNT_1",
.address = 0xf1,
},
{
.name = "PCIE_P_DECODE_ERR_CNT_2",
.address = 0xf2,
},
{
.name = "PCIE_P_DECODE_ERR_CNT_3",
.address = 0xf3,
},
{
.name = "PCIE_P_DECODE_ERR_CNT_4",
.address = 0xf4,
},
{
.name = "PCIE_P_DECODE_ERR_CNT_5",
.address = 0xf5,
},
{
.name = "PCIE_P_DECODE_ERR_CNT_6",
.address = 0xf6,
},
{
.name = "PCIE_P_DECODE_ERR_CNT_7",
.address = 0xf7,
},
{
.name = "PCIE_P_DECODE_ERR_CNT_8",
.address = 0xf8,
},
{
.name = "PCIE_P_DECODE_ERR_CNT_9",
.address = 0xf9,
},
{
.name = "PCIE_P_DECODE_ERR_CNT_10",
.address = 0xfa,
},
{
.name = "PCIE_P_DECODE_ERR_CNT_11",
.address = 0xfb,
},
{
.name = "PCIE_P_DECODE_ERR_CNT_12",
.address = 0xfc,
},
{
.name = "PCIE_P_DECODE_ERR_CNT_13",
.address = 0xfd,
},
{
.name = "PCIE_P_DECODE_ERR_CNT_14",
.address = 0xfe,
},
{
.name = "PCIE_P_DECODE_ERR_CNT_15",
.address = 0xff,
},

486
regs/pcie_registers.txt Normal file
View File

@ -0,0 +1,486 @@
PCIE_TX_CNTL
PCIEIND:0x1
2-80
PCIE_TX_GART_CNTL
PCIEIND:0x10
2-82
PCIE_TX_GART_DISCARD_RD_ADDR_LO
PCIEIND:0x11
2-82
PCIE_TX_GART_DISCARD_RD_ADDR_HI
PCIEIND:0x12
2-82
PCIE_TX_GART_BASE
PCIEIND:0x13
2-83
PCIE_TX_GART_START_LO
PCIEIND:0x14
2-83
PCIE_TX_GART_START_HI
PCIEIND:0x15
2-83
PCIE_TX_GART_END_LO
PCIEIND:0x16
2-83
PCIE_TX_GART_END_HI
PCIEIND:0x17
2-83
PCIE_TX_GART_ERROR
PCIEIND:0x18
2-83
PCIE_TX_SEQ
PCIEIND:0x2
2-80
PCIE_TX_GART_LRU_MRU_PTR
PCIEIND:0x20
2-84
PCIE_TX_GART_STATUS
PCIEIND:0x21
2-84
PCIE_TX_GART_TLB_VALID
PCIEIND:0x22
2-84
PCIE_TX_GART_TLB0_DATA
PCIEIND:0x23
2-84
PCIE_TX_GART_TLB1_DATA
PCIEIND:0x24
2-84
PCIE_TX_GART_TLB2_DATA
PCIEIND:0x25
2-84
PCIE_TX_GART_TLB3_DATA
PCIEIND:0x26
2-85
PCIE_TX_GART_TLB4_DATA
PCIEIND:0x27
2-85
PCIE_TX_GART_TLB5_DATA
PCIEIND:0x28
2-85
PCIE_TX_GART_TLB6_DATA
PCIEIND:0x29
2-85
PCIE_TX_GART_TLB7_DATA
PCIEIND:0x2A
2-85
PCIE_TX_GART_TLB8_DATA
PCIEIND:0x2B
2-85
PCIE_TX_GART_TLB9_DATA
PCIEIND:0x2C
2-85
PCIE_TX_GART_TLB10_DATA
PCIEIND:0x2D
2-86
PCIE_TX_GART_TLB11_DATA
PCIEIND:0x2E
2-86
PCIE_TX_GART_TLB12_DATA
PCIEIND:0x2F
2-86
PCIE_TX_REPLAY
PCIEIND:0x3
2-80
PCIE_TX_GART_TLB13_DATA
PCIEIND:0x30
2-86
PCIE_TX_GART_TLB14_DATA
PCIEIND:0x31
2-86
PCIE_TX_GART_TLB15_DATA
PCIEIND:0x32
2-86
PCIE_TX_GART_TLB16_DATA
PCIEIND:0x33
2-87
PCIE_TX_GART_TLB17_DATA
PCIEIND:0x34
2-87
PCIE_TX_GART_TLB18_DATA
PCIEIND:0x35
2-87
PCIE_TX_GART_TLB19_DATA
PCIEIND:0x36
2-87
PCIE_TX_GART_TLB20_DATA
PCIEIND:0x37
2-87
PCIE_TX_GART_TLB21_DATA
PCIEIND:0x38
2-87
PCIE_TX_GART_TLB22_DATA
PCIEIND:0x39
2-88
PCIE_TX_GART_TLB23_DATA
PCIEIND:0x3A
2-88
PCIE_TX_GART_TLB24_DATA
PCIEIND:0x3B
2-88
PCIE_TX_GART_TLB25_DATA
PCIEIND:0x3C
2-88
PCIE_TX_GART_TLB26_DATA
PCIEIND:0x3D
2-88
PCIE_TX_GART_TLB27_DATA
PCIEIND:0x3E
2-88
PCIE_TX_GART_TLB28_DATA
PCIEIND:0x3F
2-89
PCIE_TX_CREDITS_CONSUMED
PCIEIND:0x4
2-81
PCIE_TX_GART_TLB29_DATA
PCIEIND:0x40
2-89
PCIE_CLK_CNTL
PCIEIND:0x400
2-107
PCIE_PRBS10
PCIEIND:0x401
2-107
PCIE_PRBS23_BITCNT0
PCIEIND:0x402
2-107
PCIE_PRBS23_BITCNT1
PCIEIND:0x403
2-107
PCIE_PRBS23_BITCNT2
PCIEIND:0x404
2-107
PCIE_PRBS23_BITCNT3
PCIEIND:0x405
2-108
PCIE_PRBS23_BITCNT4
PCIEIND:0x406
2-108
PCIE_PRBS23_BITCNT5
PCIEIND:0x407
2-108
PCIE_PRBS23_BITCNT6
PCIEIND:0x408
2-108
PCIE_PRBS23_BITCNT7
PCIEIND:0x409
2-108
PCIE_PRBS23_BITCNT8
PCIEIND:0x40A
2-108
PCIE_PRBS23_BITCNT9
PCIEIND:0x40B
2-108
PCIE_PRBS23_BITCNT10
PCIEIND:0x40C
2-109
PCIE_PRBS23_BITCNT11
PCIEIND:0x40D
2-109
PCIE_PRBS23_BITCNT12
PCIEIND:0x40E
2-109
PCIE_PRBS23_BITCNT13
PCIEIND:0x40F
2-109
PCIE_TX_GART_TLB30_DATA
PCIEIND:0x41
2-89
PCIE_PRBS23_BITCNT14
PCIEIND:0x410
2-109
PCIE_PRBS23_BITCNT15
PCIEIND:0x411
2-109
PCIE_PRBS23_ERRCNT0
PCIEIND:0x412
2-110
PCIE_PRBS23_ERRCNT1
PCIEIND:0x413
2-110
PCIE_PRBS23_ERRCNT2
PCIEIND:0x414
2-110
PCIE_PRBS23_ERRCNT3
PCIEIND:0x415
2-110
PCIE_PRBS23_ERRCNT4
PCIEIND:0x416
2-110
PCIE_PRBS23_ERRCNT5
PCIEIND:0x417
2-110
PCIE_PRBS23_ERRCNT6
PCIEIND:0x418
2-110
PCIE_PRBS23_ERRCNT7
PCIEIND:0x419
2-111
PCIE_PRBS23_ERRCNT8
PCIEIND:0x41A
2-111
PCIE_PRBS23_ERRCNT9
PCIEIND:0x41B
2-111
PCIE_PRBS23_ERRCNT10
PCIEIND:0x41C
2-111
PCIE_PRBS23_ERRCNT11
PCIEIND:0x41D
2-111
PCIE_PRBS23_ERRCNT12
PCIEIND:0x41E
2-111
PCIE_PRBS23_ERRCNT13
PCIEIND:0x41F
2-111
PCIE_TX_GART_TLB31_DATA
PCIEIND:0x42
2-89
PCIE_PRBS23_ERRCNT14
PCIEIND:0x420
2-112
PCIE_PRBS23_ERRCNT15
PCIEIND:0x421
2-112
PCIE_PRBS23_CTRL0
PCIEIND:0x422
2-112
PCIE_PRBS23_CTRL1
PCIEIND:0x423
2-113
PCIE_PRBS_EN
PCIEIND:0x424
2-113
PCIE_XSTRAP1
PCIEIND:0x425
2-113
PCIE_XSTRAP2
PCIEIND:0x426
2-114
PCIE_XSTRAP5
PCIEIND:0x429
2-114
PCIE_TX_CREDITS_CONSUMED_D
PCIEIND:0x5
2-81
PCIE_TX_CREDITS_CONSUMED_CPLD
PCIEIND:0x6
2-81
PCIE_FLOW_CNTL
PCIEIND:0x60
2-89
PCIE_TXRX_DEBUG_SEQNUM
PCIEIND:0x61
2-90
PCIE_TXRX_TEST_MODE
PCIEIND:0x62
2-90
PCIE_TX_CREDITS_LIMIT
PCIEIND:0x7
2-81
PCIE_RX_CNTL
PCIEIND:0x70
2-90
PCIE_RX_NUM_NACK
PCIEIND:0x71
2-91
PCIE_RX_NUM_NACK_GENERATED
PCIEIND:0x72
2-91
PCIE_RX_ACK_NACK_LATENCY
PCIEIND:0x73
2-91
PCIE_RX_ACK_NACK_LATENCY_THRESHOLD
PCIEIND:0x74
2-91
PCIE_RX_TLP_HDR0
PCIEIND:0x75
2-91
PCIE_RX_TLP_HDR1
PCIEIND:0x76
2-91
PCIE_RX_TLP_HDR2
PCIEIND:0x77
2-91
PCIE_RX_TLP_HDR3
PCIEIND:0x78
2-92
PCIE_RX_TLP_HDR4
PCIEIND:0x79
2-92
PCIE_RX_TLP_CRC
PCIEIND:0x7A
2-92
PCIE_RX_DLP0
PCIEIND:0x7B
2-92
PCIE_RX_DLP1
PCIEIND:0x7C
2-92
PCIE_RX_DLP_CRC
PCIEIND:0x7D
2-92
PCIE_RX_CREDITS_ALLOCATED
PCIEIND:0x7E
2-92
PCIE_RX_CREDITS_ALLOCATED_D
PCIEIND:0x7F
2-93
PCIE_TX_CREDITS_LIMIT_D
PCIEIND:0x8
2-81
PCIE_RX_CREDITS_ALLOCATED_CPLD
PCIEIND:0x80
2-93
PCIE_RX_CREDITS_RECEIVED
PCIEIND:0x81
2-93
PCIE_RX_CREDITS_RECEIVED_D
PCIEIND:0x82
2-93
PCIE_RX_CREDITS_RECEIVED_CPLD
PCIEIND:0x83
2-93
PCIE_RX_MAL_TLP_COUNT
PCIEIND:0x84
2-93
PCIE_RX_ERR_LOG
PCIEIND:0x85
2-94
PCIE_RX_EXPECTED_SEQNUM
PCIEIND:0x86
2-94
PCIE_TX_CREDITS_LIMIT_CPLD
PCIEIND:0x9
2-82
PCIE_CI_CNTL
PCIEIND:0x90
2-94
PCIE_CI_FLUSH_CNTL
PCIEIND:0x91
2-94
PCIE_CI_PANIC
PCIEIND:0x92
2-94
PCIE_CI_HANG
PCIEIND:0x93
2-95
PCIE_LC_CNTL
PCIEIND:0xA0
2-95
PCIE_LC_N_FTS_CNTL
PCIEIND:0xA1
2-95
PCIE_LC_LINK_WIDTH_CNTL
PCIEIND:0xA2
2-97
PCIE_LC_STATE0
PCIEIND:0xA5
2-95
PCIE_LC_STATE1
PCIEIND:0xA6
2-96
PCIE_LC_STATE2
PCIEIND:0xA7
2-96
PCIE_LC_STATE3
PCIEIND:0xA8
2-96
PCIE_LC_STATE4
PCIEIND:0xA9
2-96
PCIE_LC_STATE5
PCIEIND:0xAA
2-96
PCIE_LC_FORCE_SYNC_LOSS_CNTL
PCIEIND:0xAB
2-97
PCIE_P_CNTL
PCIEIND:0xB0
2-97
PCIE_P_CNTL2
PCIEIND:0xB1
2-98
PCIE_P_BUF_STATUS
PCIEIND:0xB2
2-98
PCIE_P_DECODER_STATUS
PCIEIND:0xB3
2-99
PCIE_P_MISC_DEBUG_STATUS
PCIEIND:0xB4
2-100
PCIE_P_IMP_CNTL_STRENGTH
PCIEIND:0xC0
2-101
PCIE_P_IMP_CNTL_UPDATE
PCIEIND:0xC1
2-102
PCIE_P_STR_CNTL_UPDATE
PCIEIND:0xC2
2-102
PCIE_P_PAD_MISC_CNTL
PCIEIND:0xC3
2-102
PCIE_P_SYMSYNC_CTL
PCIEIND:0xC4
2-102
PCIE_P_DECODE_ERR_CNTL
PCIEIND:0xC5
2-103
PCIE_ERR_CNTL
PCIEIND:0xE0
2-105
PCIE_CLK_RST_CNTL
PCIEIND:0xE1
2-105
PCIE_P_DECODE_ERR_CNT_0
PCIEIND:0xF0
2-103
PCIE_P_DECODE_ERR_CNT_1
PCIEIND:0xF1
2-103
PCIE_P_DECODE_ERR_CNT_2
PCIEIND:0xF2
2-103
PCIE_P_DECODE_ERR_CNT_3
PCIEIND:0xF3
2-103
PCIE_P_DECODE_ERR_CNT_4
PCIEIND:0xF4
2-103
PCIE_P_DECODE_ERR_CNT_5
PCIEIND:0xF5
2-103
PCIE_P_DECODE_ERR_CNT_6
PCIEIND:0xF6
2-104
PCIE_P_DECODE_ERR_CNT_7
PCIEIND:0xF7
2-104
PCIE_P_DECODE_ERR_CNT_8
PCIEIND:0xF8
2-104
PCIE_P_DECODE_ERR_CNT_9
PCIEIND:0xF9
2-104
PCIE_P_DECODE_ERR_CNT_10
PCIEIND:0xFA
2-104
PCIE_P_DECODE_ERR_CNT_11
PCIEIND:0xFB
2-104
PCIE_P_DECODE_ERR_CNT_12
PCIEIND:0xFC
2-105
PCIE_P_DECODE_ERR_CNT_13
PCIEIND:0xFD
2-105
PCIE_P_DECODE_ERR_CNT_14
PCIEIND:0xFE
2-105
PCIE_P_DECODE_ERR_CNT_15
PCIEIND:0xFF
2-105

37
regs/rrg_registers.py Normal file
View File

@ -0,0 +1,37 @@
import sys
with open(sys.argv[1]) as f:
buf = f.read()
prefixes = sys.argv[2:]
assert len(prefixes) >= 1
lines = [line.strip() for line in buf.strip().split()]
assert len(lines) % 3 == 0
def parse(lines):
for i in range(len(lines) // 3):
name = lines[i * 3 + 0]
address = lines[i * 3 + 1]
page = lines[i * 3 + 2]
assert '-' in page, page
orig_address = address
for prefix in prefixes:
if address.startswith(f"{prefix}:"):
address = address.removeprefix(f"{prefix}:")
assert address != orig_address
assert address.startswith("0x")
address = address.removeprefix("0x")
address = int(address, 16)
yield name, address, page
for name, address, page in parse(lines):
print("{")
print(f" .name = \"{name}\",")
print(f" .address = {hex(address)},")
print("},")
#print(f"#define {name} {hex(address)}")

View File

@ -0,0 +1,12 @@
0x00007807,
0x02400000,
0xe400f400,
0x00000000,
0x00000000,
0x00000000,
0x00078005,
0x08020000,
0x08020000,
0x1c440220,
0x1c60c003,
0x00000005,

View File

@ -0,0 +1,8 @@
0x00f00203,
0x00d10001,
0x01248001,
0x01248001,
0x00f02203,
0x00d10021,
0x01248021,
0x01248021,

View File

@ -21,7 +21,7 @@ R500_COMMON = \
drm/drm.o \
file.o
matrix_cubesphere: $(R500_COMMON) matrix_cubesphere.o | shaders
%: $(R500_COMMON) %.o | shaders
$(CXX) $(LDFLAGS) $^ -o $@
%.o: %.c
@ -45,9 +45,9 @@ matrix_cubesphere: $(R500_COMMON) matrix_cubesphere.o | shaders
shaders: $(SHADER_BIN)
@true
#find . -type f ! -name "*.*" -delete
clean:
find . -type f -name "*.o" -delete
find . -type f -name "*.o" -delete -print
find . -type f -executable ! -name '*.*' -delete -print
.SUFFIXES:
.INTERMEDIATE:

View File

@ -7,10 +7,10 @@
#include "drm.h"
#include "../r500/indirect_buffer.h" // for extern uint32_t ib[];
void drm_radeon_cs(int fd,
int drm_radeon_cs(int fd,
int colorbuffer_handle,
int zbuffer_handle,
int flush_handle,
int vertexbuffer_handle,
int * texturebuffer_handles,
int texturebuffer_handles_length,
int ib_dwords)
@ -29,15 +29,23 @@ void drm_radeon_cs(int fd,
.write_domain = 4, // RADEON_GEM_DOMAIN_VRAM
.flags = 8,
};
/*
relocs[FLUSH_RELOC_INDEX] = (struct drm_radeon_cs_reloc){
.handle = flush_handle,
.read_domains = 2, // RADEON_GEM_DOMAIN_GTT
.write_domain = 2, // RADEON_GEM_DOMAIN_GTT
.flags = 0,
};
*/
relocs[VERTEXBUFFER_RELOC_INDEX] = (struct drm_radeon_cs_reloc){
.handle = vertexbuffer_handle,
.read_domains = 4, // RADEON_GEM_DOMAIN_VRAM
.write_domain = 4, // RADEON_GEM_DOMAIN_VRAM
.flags = 8,
};
for (int i = 0; i < texturebuffer_handles_length; i++) {
relocs[3 + i] = (struct drm_radeon_cs_reloc){
relocs[TEXTUREBUFFER_RELOC_INDEX + i] = (struct drm_radeon_cs_reloc){
.handle = texturebuffer_handles[i],
.read_domains = 4, // RADEON_GEM_DOMAIN_VRAM
.write_domain = 4, // RADEON_GEM_DOMAIN_VRAM
@ -85,5 +93,70 @@ void drm_radeon_cs(int fd,
int ret = drmCommandWriteRead(fd, DRM_RADEON_CS, &cs, (sizeof (struct drm_radeon_cs)));
if (ret != 0) {
perror("drmCommandWriteRead(DRM_RADEON_CS)");
return -1;
}
return 0;
}
int drm_radeon_cs2(int fd,
int * handles,
int handles_length,
int ib_dwords)
{
struct drm_radeon_cs_reloc relocs[handles_length];
for (int i = 0; i < handles_length; i++) {
relocs[i] = (struct drm_radeon_cs_reloc){
.handle = handles[i],
.read_domains = 4, // RADEON_GEM_DOMAIN_VRAM
.write_domain = 4, // RADEON_GEM_DOMAIN_VRAM
.flags = 8,
};
}
const uint32_t flags[2] = {
5, // RADEON_CS_KEEP_TILING_FLAGS | RADEON_CS_END_OF_FRAME
0, // RADEON_CS_RING_GFX
};
struct drm_radeon_cs_chunk chunks[3] = {
{
.chunk_id = RADEON_CHUNK_ID_IB,
.length_dw = ib_dwords,
.chunk_data = (uint64_t)(uintptr_t)ib,
},
{
.chunk_id = RADEON_CHUNK_ID_RELOCS,
.length_dw = (sizeof (relocs)) / (sizeof (uint32_t)),
.chunk_data = (uint64_t)(uintptr_t)relocs,
},
{
.chunk_id = RADEON_CHUNK_ID_FLAGS,
.length_dw = (sizeof (flags)) / (sizeof (uint32_t)),
.chunk_data = (uint64_t)(uintptr_t)&flags,
},
};
uint64_t chunks_array[3] = {
(uint64_t)(uintptr_t)&chunks[0],
(uint64_t)(uintptr_t)&chunks[1],
(uint64_t)(uintptr_t)&chunks[2],
};
struct drm_radeon_cs cs = {
.num_chunks = 3,
.cs_id = 0,
.chunks = (uint64_t)(uintptr_t)chunks_array,
.gart_limit = 0,
.vram_limit = 0,
};
int ret = drmCommandWriteRead(fd, DRM_RADEON_CS, &cs, (sizeof (struct drm_radeon_cs)));
if (ret != 0) {
perror("drmCommandWriteRead(DRM_RADEON_CS)");
return -1;
}
return 0;
}

View File

@ -2,21 +2,27 @@
#define COLORBUFFER_RELOC_INDEX 0
#define ZBUFFER_RELOC_INDEX 1
#define FLUSH_RELOC_INDEX 2
#define TEXTURE_RELOC_INDEX 3
//#define FLUSH_RELOC_INDEX 2
#define VERTEXBUFFER_RELOC_INDEX 2
#define TEXTUREBUFFER_RELOC_INDEX 3
#ifdef __cplusplus
extern "C" {
#endif
void drm_radeon_cs(int fd,
int drm_radeon_cs(int fd,
int colorbuffer_handle,
int zbuffer_handle,
int flush_handle,
int vertexbuffer_handle,
int * texturebuffer_handles,
int texturebuffer_handles_length,
int ib_dwords);
int drm_radeon_cs2(int fd,
int * handles,
int handles_length,
int ib_dwords);
#ifdef __cplusplus
}
#endif

View File

@ -67,7 +67,7 @@ void _3d_clear(struct shaders& shaders)
//
ib_zbuffer(ZBUFFER_RELOC_INDEX, 7); // always
ib_zbuffer(ZBUFFER_RELOC_INDEX, 1600, 7); // always
ib_texture__0();
@ -315,7 +315,7 @@ vec3 _3d_light(struct shaders& shaders,
//
ib_zbuffer(ZBUFFER_RELOC_INDEX, 1); // less
ib_zbuffer(ZBUFFER_RELOC_INDEX, 1600, 1); // less
ib_texture__0();
@ -403,9 +403,17 @@ void _3d_cube(struct shaders& shaders,
//
ib_zbuffer(ZBUFFER_RELOC_INDEX, 1); // less
ib_zbuffer(ZBUFFER_RELOC_INDEX, 1600, 1); // less
ib_texture__1(TEXTURE_RELOC_INDEX);
int width = 1024;
int height = 1024;
int macrotile = 0;
int microtile = 0;
int clamp = 0; // wrap/repeat
ib_texture__1(TEXTUREBUFFER_RELOC_INDEX,
width, height,
macrotile, microtile,
clamp);
ib_vap_stream_cntl__323();
@ -473,16 +481,28 @@ void _3d_cube(struct shaders& shaders,
int indirect_buffer(shaders& shaders,
float theta)
{
int width = 1600;
int height = 1200;
int pitch = width;
ib_ix = 0;
ib_generic_initialization();
T0V(RB3D_BLENDCNTL, 0);
T0V(RB3D_ABLENDCNTL, 0);
ib_viewport(width, height);
ib_colorbuffer(COLORBUFFER_RELOC_INDEX, pitch, 0, 0);
T0V(GB_ENABLE, 0);
T0V(US_OUT_FMT_0
, US_OUT_FMT__OUT_FMT(0) // C4_8
| US_OUT_FMT__C0_SEL(3) // Blue
| US_OUT_FMT__C1_SEL(2) // Green
| US_OUT_FMT__C2_SEL(1) // Red
| US_OUT_FMT__C3_SEL(0) // Alpha
| US_OUT_FMT__C0_SEL__BLUE
| US_OUT_FMT__C1_SEL__GREEN
| US_OUT_FMT__C2_SEL__RED
| US_OUT_FMT__C3_SEL__ALPHA
| US_OUT_FMT__OUT_SIGN(0)
);
T0V(US_OUT_FMT_1
@ -495,8 +515,6 @@ int indirect_buffer(shaders& shaders,
, US_OUT_FMT__OUT_FMT(15) // render target is not used
);
ib_colorbuffer(COLORBUFFER_RELOC_INDEX);
load_pvs_shaders(shaders.vertex, shaders.vertex_length);
load_us_shaders(shaders.fragment, shaders.fragment_length);
@ -527,7 +545,7 @@ int indirect_buffer(shaders& shaders,
}
const char * textures[] = {
"../texture/butterfly_1024x1024_argb8888.data",
"../texture/butterfly_1024x1024_rgba8888.data",
};
const int textures_length = (sizeof (textures)) / (sizeof (textures[0]));
@ -571,13 +589,15 @@ int main()
while (true) {
int ib_dwords = indirect_buffer(shaders, theta);
drm_radeon_cs(fd,
int ret = drm_radeon_cs(fd,
colorbuffer_handle[colorbuffer_ix],
zbuffer_handle,
flush_handle,
texturebuffer_handle,
textures_length,
ib_dwords);
if (ret == -1)
break;
primary_surface_address(rmmio, colorbuffer_ix);

View File

@ -0,0 +1,672 @@
#include <assert.h>
#include <errno.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <stdlib.h>
#include <math.h>
#include <fcntl.h>
#include <sys/mman.h>
#include "r500/3d_registers.h"
#include "r500/3d_registers_undocumented.h"
#include "r500/3d_registers_bits.h"
#include "r500/indirect_buffer.h"
#include "r500/shader.h"
#include "r500/display_controller.h"
#include "drm/buffer.h"
#include "drm/drm.h"
#include "math/float_types.hpp"
#include "math/transform.hpp"
#include "math/constants.hpp"
#include "../model/model2.h"
#include "../model/cubesphere.h"
#include "../model/cubesphere_indexed.h"
#define CLEAR_SHADER 0
#define CUBESPHERE_SHADER 1
#define LIGHT_SHADER 2
#define INDEXBUFFER_RELOC_INDEX 4
const char * vertex_shader_paths[] = {
"clear.vs.bin",
"matrix_cubesphere.vs.bin",
"light.vs.bin",
};
const int vertex_shader_paths_length = (sizeof (vertex_shader_paths)) / (sizeof (vertex_shader_paths[0]));
const char * fragment_shader_paths[] = {
"clear.fs.bin",
"matrix_cubesphere.fs.bin",
"light.fs.bin"
};
const int fragment_shader_paths_length = (sizeof (fragment_shader_paths)) / (sizeof (fragment_shader_paths[0]));
struct shaders {
struct shader_offset * vertex;
struct shader_offset * fragment;
int vertex_length;
int fragment_length;
};
void _3d_clear(struct shaders& shaders)
{
ib_rs_instructions(0);
//////////////////////////////////////////////////////////////////////////////
// VAP OUT
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_OUT_VTX_FMT_0
, VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1));
T0V(VAP_OUT_VTX_FMT_1
, 0);
//
ib_zbuffer(ZBUFFER_RELOC_INDEX, 1600, 7); // always
ib_texture__0();
ib_vap_stream_cntl__2();
// shaders
T0V(US_PIXSIZE
, US_PIXSIZE__PIX_SIZE(1)
);
ib_ga_us(&shaders.fragment[CLEAR_SHADER]);
ib_vap_pvs(&shaders.vertex[CLEAR_SHADER]);
//////////////////////////////////////////////////////////////////////////////
// VAP INDEX
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_INDEX_OFFSET, 0);
T0V(VAP_VF_MAX_VTX_INDX
, VAP_VF_MAX_VTX_INDX__MAX_INDX(0)
);
T0V(VAP_VF_MIN_VTX_INDX
, VAP_VF_MIN_VTX_INDX__MIN_INDX(0)
);
//////////////////////////////////////////////////////////////////////////////
// VAP
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_CLIP_CNTL
, VAP_CLIP_CNTL__CLIP_DISABLE(1)
);
T0V(VAP_VTE_CNTL
, VAP_VTE_CNTL__VTX_XY_FMT(1) // disable W division
| VAP_VTE_CNTL__VTX_Z_FMT(1) // disable W division
);
T0V(VAP_CNTL_STATUS
, VAP_CNTL_STATUS__PVS_BYPASS(0)
);
//////////////////////////////////////////////////////////////////////////////
// GA POINT SIZE
//////////////////////////////////////////////////////////////////////////////
T0V(GA_POINT_SIZE
, GA_POINT_SIZE__HEIGHT(600 * 12)
| GA_POINT_SIZE__WIDTH(800 * 12)
);
//////////////////////////////////////////////////////////////////////////////
// 3D_DRAW
//////////////////////////////////////////////////////////////////////////////
const int dwords_per_vtx = 2;
T0V(VAP_VTX_SIZE
, VAP_VTX_SIZE__DWORDS_PER_VTX(dwords_per_vtx)
);
const float center[] = {
800.0f, 600.0f,
};
const int vertex_count = 1;
T3(_3D_DRAW_IMMD_2, (1 + vertex_count * dwords_per_vtx) - 1);
TU( VAP_VF_CNTL__PRIM_TYPE(1) // point list
| VAP_VF_CNTL__PRIM_WALK(3)
| VAP_VF_CNTL__INDEX_SIZE(0)
| VAP_VF_CNTL__VTX_REUSE_DIS(0)
| VAP_VF_CNTL__DUAL_INDEX_MODE(0)
| VAP_VF_CNTL__USE_ALT_NUM_VERTS(0)
| VAP_VF_CNTL__NUM_VERTICES(vertex_count)
);
for (int i = 0; i < 2; i++) {
TF(center[i]);
}
}
mat4x4 perspective(float low1, float high1,
float low2, float high2,
float low3, float high3)
{
float scale2 = (high2 - low2) / (high1 - low1);
float scale3 = (high3 - low3) / (high1 - low1);
mat4x4 m1 = mat4x4(1, 0, 0, 0,
0, 1, 0, 0,
0, 0, 1, -low1,
0, 0, 0, 1
);
mat4x4 m2 = mat4x4(1, 0, 0, 0,
0, 1, 0, 0,
0, 0, scale2, low2,
0, 0, scale3, low3
);
return m2 * m1;
}
void _3d_cube_inner(mat4x4 trans,
mat4x4 world_trans,
vec4 light_pos)
{
T0V(VAP_PVS_STATE_FLUSH_REG, 0x00000000);
//////////////////////////////////////////////////////////////////////////////
// VAP_PVS
//////////////////////////////////////////////////////////////////////////////
const float consts[] = {
// 0
trans[0][0], trans[0][1], trans[0][2], trans[0][3],
trans[1][0], trans[1][1], trans[1][2], trans[1][3],
trans[2][0], trans[2][1], trans[2][2], trans[2][3],
trans[3][0], trans[3][1], trans[3][2], trans[3][3],
// 4
world_trans[0][0], world_trans[0][1], world_trans[0][2], world_trans[0][3],
world_trans[1][0], world_trans[1][1], world_trans[1][2], world_trans[1][3],
world_trans[2][0], world_trans[2][1], world_trans[2][2], world_trans[2][3],
world_trans[3][0], world_trans[3][1], world_trans[3][2], world_trans[3][3],
// 8
light_pos.x, light_pos.y, light_pos.z, light_pos.w,
};
ib_vap_pvs_const_cntl(consts, (sizeof (consts)));
//////////////////////////////////////////////////////////////////////////////
// AOS
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_PVS_STATE_FLUSH_REG, 0x00000000);
T3(_3D_LOAD_VBPNTR, (6 - 1));
TU( // VAP_VTX_NUM_ARRAYS
VAP_VTX_NUM_ARRAYS__VTX_NUM_ARRAYS(3)
| VAP_VTX_NUM_ARRAYS__VC_FORCE_PREFETCH(1)
);
TU( // VAP_VTX_AOS_ATTR01
VAP_VTX_AOS_ATTR__VTX_AOS_COUNT0(3)
| VAP_VTX_AOS_ATTR__VTX_AOS_STRIDE0(8)
| VAP_VTX_AOS_ATTR__VTX_AOS_COUNT1(2)
| VAP_VTX_AOS_ATTR__VTX_AOS_STRIDE1(8)
);
TU( // VAP_VTX_AOS_ADDR0
(4 * 0)
);
TU( // VAP_VTX_AOS_ADDR1
(4 * 3)
);
TU( // VAP_VTX_AOS_ATTR23
VAP_VTX_AOS_ATTR__VTX_AOS_COUNT0(3)
| VAP_VTX_AOS_ATTR__VTX_AOS_STRIDE0(8)
);
TU( // VAP_VTX_AOS_ADDR2
(4 * 5)
);
// VAP_VTX_AOS_ADDR is an absolute address in VRAM. However, DRM_RADEON_CS
// modifies this to be an offset relative to the GEM buffer handles given via
// NOP:
T3(_NOP, 0);
TU(VERTEXBUFFER_RELOC_INDEX * 4); // index into relocs array for VAP_VTX_AOS_ADDR0
T3(_NOP, 0);
TU(VERTEXBUFFER_RELOC_INDEX * 4); // index into relocs array for VAP_VTX_AOS_ADDR1
T3(_NOP, 0);
TU(VERTEXBUFFER_RELOC_INDEX * 4); // index into relocs array for VAP_VTX_AOS_ADDR2
//////////////////////////////////////////////////////////////////////////////
// 3D_DRAW
//////////////////////////////////////////////////////////////////////////////
const int vertex_count = cubesphere_Cube_triangles_length;
T3(_3D_DRAW_INDX_2, (1 - 1));
TU( VAP_VF_CNTL__PRIM_TYPE(4)
| VAP_VF_CNTL__PRIM_WALK(1) // indexes in command stream , vertices from memory
| VAP_VF_CNTL__INDEX_SIZE(1) // 32 bits per index
| VAP_VF_CNTL__VTX_REUSE_DIS(0)
| VAP_VF_CNTL__DUAL_INDEX_MODE(0)
| VAP_VF_CNTL__USE_ALT_NUM_VERTS(0)
| VAP_VF_CNTL__NUM_VERTICES(vertex_count)
);
T3(_INDX_BUFFER, (3 - 1));
TU( // INDX_BUFFER__0
INDX_BUFFER__0__ONE_REG_WR(1)
| INDX_BUFFER__0__SKIP_COUNT(0)
| INDX_BUFFER__0__DESTINATION(VAP_PORT_IDX0 >> 2)
);
TU( // INDX_BUFFER__1
INDX_BUFFER__1__BUFFER_BASE(0) // replaced by reloc
);
TU( // INDX_BUFFER__2
INDX_BUFFER__2__BUFFER_SIZE(vertex_count) // in dwords
);
T3(_NOP, 0);
TU(INDEXBUFFER_RELOC_INDEX * 4); // index into relocs array for INDX_BUFFER__1__BUFFER_BASE
}
void _3d_light_inner(mat4x4 trans)
{
T0V(VAP_PVS_STATE_FLUSH_REG, 0x00000000);
//////////////////////////////////////////////////////////////////////////////
// VAP_PVS
//////////////////////////////////////////////////////////////////////////////
const vec4 color = {1, 1, 0, 1};
const float consts[] = {
// 0
trans[0][0], trans[0][1], trans[0][2], trans[0][3],
trans[1][0], trans[1][1], trans[1][2], trans[1][3],
trans[2][0], trans[2][1], trans[2][2], trans[2][3],
trans[3][0], trans[3][1], trans[3][2], trans[3][3],
// 4
color[0], color[1], color[2], color[2],
};
ib_vap_pvs_const_cntl(consts, (sizeof (consts)));
//////////////////////////////////////////////////////////////////////////////
// 3D_DRAW
//////////////////////////////////////////////////////////////////////////////
const model * model = &cubesphere_model;
const object * obj = model->object[0];
const int triangle_count = obj->triangle_count;
const int vertex_count = triangle_count * 3;
int dwords_per_vtx = 3;
T0V(VAP_VTX_SIZE
, VAP_VTX_SIZE__DWORDS_PER_VTX(dwords_per_vtx)
);
T3(_3D_DRAW_IMMD_2, (1 + vertex_count * dwords_per_vtx) - 1);
TU( VAP_VF_CNTL__PRIM_TYPE(4)
| VAP_VF_CNTL__PRIM_WALK(3)
| VAP_VF_CNTL__INDEX_SIZE(0)
| VAP_VF_CNTL__VTX_REUSE_DIS(0)
| VAP_VF_CNTL__DUAL_INDEX_MODE(0)
| VAP_VF_CNTL__USE_ALT_NUM_VERTS(0)
| VAP_VF_CNTL__NUM_VERTICES(vertex_count)
);
for (int i = 0; i < triangle_count; i++) {
for (int j = 0; j < 3; j++) {
vec3 p = model->position[obj->triangle[i][j].position];
TF(p.x);
TF(p.y);
TF(p.z);
}
}
}
vec3 _3d_light(struct shaders& shaders,
const mat4x4& view_to_clip,
float theta)
{
ib_rs_instructions(1);
//////////////////////////////////////////////////////////////////////////////
// VAP OUT
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_OUT_VTX_FMT_0
, VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1));
T0V(VAP_OUT_VTX_FMT_1
, VAP_OUT_VTX_FMT_1__TEX_0_COMP_CNT(4));
//
ib_zbuffer(ZBUFFER_RELOC_INDEX, 1600, 1); // less
ib_texture__0();
ib_vap_stream_cntl__3();
// shaders
T0V(US_PIXSIZE
, US_PIXSIZE__PIX_SIZE(1)
);
ib_ga_us(&shaders.fragment[LIGHT_SHADER]);
ib_vap_pvs(&shaders.vertex[LIGHT_SHADER]);
//////////////////////////////////////////////////////////////////////////////
// VAP
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_CLIP_CNTL
, VAP_CLIP_CNTL__PS_UCP_MODE(3)
);
T0V(VAP_VTE_CNTL
, VAP_VTE_CNTL__VPORT_X_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_X_OFFSET_ENA(1)
| VAP_VTE_CNTL__VPORT_Y_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_Y_OFFSET_ENA(1)
| VAP_VTE_CNTL__VPORT_Z_SCALE_ENA(0)
| VAP_VTE_CNTL__VPORT_Z_OFFSET_ENA(0)
| VAP_VTE_CNTL__VTX_XY_FMT(0) // enable W division
| VAP_VTE_CNTL__VTX_Z_FMT(1) // disable W division
| VAP_VTE_CNTL__VTX_W0_FMT(1)
| VAP_VTE_CNTL__SERIAL_PROC_ENA(0)
);
T0V(VAP_CNTL_STATUS
, VAP_CNTL_STATUS__PVS_BYPASS(0)
);
//////////////////////////////////////////////////////////////////////////////
// VAP INDEX
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_INDEX_OFFSET, 0);
T0V(VAP_VF_MAX_VTX_INDX
, VAP_VF_MAX_VTX_INDX__MAX_INDX(0)
);
T0V(VAP_VF_MIN_VTX_INDX
, VAP_VF_MIN_VTX_INDX__MIN_INDX(0)
);
// light
mat4x4 t1 = translate(vec3(1, 0, 0));
mat4x4 s = scale(0.1f);
mat4x4 rz = rotate_y(theta * 2.f);
mat4x4 world_trans = rz * t1 * s;
mat4x4 trans = view_to_clip * world_trans;
_3d_light_inner(trans);
vec3 light_pos = world_trans * light_pos;
return light_pos;
}
void _3d_cube(struct shaders& shaders,
const mat4x4& view_to_clip,
float theta,
const vec3& light_pos)
{
ib_rs_instructions(4);
//////////////////////////////////////////////////////////////////////////////
// VAP OUT
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_OUT_VTX_FMT_0
, VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1));
T0V(VAP_OUT_VTX_FMT_1
, VAP_OUT_VTX_FMT_1__TEX_0_COMP_CNT(4)
| VAP_OUT_VTX_FMT_1__TEX_1_COMP_CNT(4)
| VAP_OUT_VTX_FMT_1__TEX_2_COMP_CNT(4)
| VAP_OUT_VTX_FMT_1__TEX_3_COMP_CNT(4));
//
ib_zbuffer(ZBUFFER_RELOC_INDEX, 1600, 1); // less
int width = 1024;
int height = 1024;
int macrotile = 0;
int microtile = 0;
int clamp = 0; // wrap/repeat
ib_texture__1(TEXTUREBUFFER_RELOC_INDEX,
width, height,
macrotile, microtile,
clamp);
ib_vap_stream_cntl__323();
// shaders
T0V(US_PIXSIZE
, US_PIXSIZE__PIX_SIZE(4)
);
ib_ga_us(&shaders.fragment[CUBESPHERE_SHADER]);
ib_vap_pvs(&shaders.vertex[CUBESPHERE_SHADER]);
//////////////////////////////////////////////////////////////////////////////
// VAP
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_CLIP_CNTL
, VAP_CLIP_CNTL__PS_UCP_MODE(3)
);
T0V(VAP_VTE_CNTL
, VAP_VTE_CNTL__VPORT_X_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_X_OFFSET_ENA(1)
| VAP_VTE_CNTL__VPORT_Y_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_Y_OFFSET_ENA(1)
| VAP_VTE_CNTL__VPORT_Z_SCALE_ENA(0)
| VAP_VTE_CNTL__VPORT_Z_OFFSET_ENA(0)
| VAP_VTE_CNTL__VTX_XY_FMT(0) // enable W division
| VAP_VTE_CNTL__VTX_Z_FMT(1) // disable W division
| VAP_VTE_CNTL__VTX_W0_FMT(1)
| VAP_VTE_CNTL__SERIAL_PROC_ENA(0)
);
T0V(VAP_CNTL_STATUS
, VAP_CNTL_STATUS__PVS_BYPASS(0)
);
//////////////////////////////////////////////////////////////////////////////
// VAP INDEX
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_INDEX_OFFSET, 0);
T0V(VAP_VF_MAX_VTX_INDX
, VAP_VF_MAX_VTX_INDX__MAX_INDX(124)
);
T0V(VAP_VF_MIN_VTX_INDX
, VAP_VF_MIN_VTX_INDX__MIN_INDX(0)
);
//////////////////////////////////////////////////////////////////////////////
// matrix
//////////////////////////////////////////////////////////////////////////////
// cube
mat4x4 rx = rotate_x(1 * theta * 0.5f);
mat4x4 ry = rotate_y(0 * theta * 0.8f + 1.4f);
mat4x4 s = scale(0.9f);
mat4x4 world_trans = rx * ry * s;
mat4x4 trans = view_to_clip * world_trans;
_3d_cube_inner(trans, world_trans, light_pos);
}
int indirect_buffer(shaders& shaders,
float theta)
{
int width = 1600;
int height = 1200;
int pitch = width;
ib_ix = 0;
ib_generic_initialization();
T0V(RB3D_BLENDCNTL, 0);
T0V(RB3D_ABLENDCNTL, 0);
ib_viewport(width, height);
ib_colorbuffer(COLORBUFFER_RELOC_INDEX, pitch, 0, 0);
T0V(GB_ENABLE, 0);
T0V(US_OUT_FMT_0
, US_OUT_FMT__OUT_FMT(0) // C4_8
| US_OUT_FMT__C0_SEL__BLUE
| US_OUT_FMT__C1_SEL__GREEN
| US_OUT_FMT__C2_SEL__RED
| US_OUT_FMT__C3_SEL__ALPHA
| US_OUT_FMT__OUT_SIGN(0)
);
T0V(US_OUT_FMT_1
, US_OUT_FMT__OUT_FMT(15) // render target is not used
);
T0V(US_OUT_FMT_2
, US_OUT_FMT__OUT_FMT(15) // render target is not used
);
T0V(US_OUT_FMT_2
, US_OUT_FMT__OUT_FMT(15) // render target is not used
);
load_pvs_shaders(shaders.vertex, shaders.vertex_length);
load_us_shaders(shaders.fragment, shaders.fragment_length);
//////////////////////////////////////////////////////////////////////////////
// DRAW
//////////////////////////////////////////////////////////////////////////////
mat4x4 aspect = scale(vec3(3.0f/4.0f, 1, 1));
mat4x4 p = perspective(0.01f, 5.0f,
0.001f, 0.999f,
0.5f, 2.0f);
mat4x4 t = translate(vec3(0, 0, 3));
mat4x4 view_to_clip = aspect * p * t;
_3d_clear(shaders);
vec3 light_pos = _3d_light(shaders, view_to_clip, theta);
_3d_cube(shaders, view_to_clip, theta, light_pos);
//////////////////////////////////////////////////////////////////////////////
// padding
//////////////////////////////////////////////////////////////////////////////
while ((ib_ix % 8) != 0) {
TU(0x80000000);
}
return ib_ix;
}
const char * textures[] = {
"../texture/butterfly_1024x1024_rgba8888.data",
};
const int textures_length = (sizeof (textures)) / (sizeof (textures[0]));
void transfer_vertex_index(void * vertexbuffer_ptr,
void * indexbuffer_ptr)
{
for (int i = 0; i < cubesphere_vertices_length; i++) {
((float *)vertexbuffer_ptr)[i] = cubesphere_vertices[i];
}
for (int i = 0; i < cubesphere_vertices_length; i++) {
((int *)indexbuffer_ptr)[i] = cubesphere_Cube_triangles[i];
}
asm volatile ("" ::: "memory");
}
int main()
{
struct shaders shaders = {
.vertex = load_shaders(vertex_shader_paths, vertex_shader_paths_length),
.fragment = load_shaders(fragment_shader_paths, fragment_shader_paths_length),
.vertex_length = vertex_shader_paths_length,
.fragment_length = fragment_shader_paths_length,
};
void * rmmio = map_pci_resource2();
int fd = open("/dev/dri/card0", O_RDWR | O_CLOEXEC);
assert(fd != -1);
const int colorbuffer_size = 1600 * 1200 * 4;
int colorbuffer_handle[2];
int zbuffer_handle;
int vertexbuffer_handle;
int * texturebuffer_handle;
int indexbuffer_handle;
void * vertexbuffer_ptr;
void * indexbuffer_ptr;
void * colorbuffer_ptr[2];
void * zbuffer_ptr;
// cubesphere_Cube_triangles_length, cubesphere_vertices_length
const int vertexbuffer_size = cubesphere_vertices_length * 4;
const int indexbuffer_size = cubesphere_Cube_triangles_length * 4;
// colorbuffer
colorbuffer_handle[0] = create_buffer(fd, colorbuffer_size, &colorbuffer_ptr[0]);
colorbuffer_handle[1] = create_buffer(fd, colorbuffer_size, &colorbuffer_ptr[1]);
zbuffer_handle = create_buffer(fd, colorbuffer_size, &zbuffer_ptr);
vertexbuffer_handle = create_buffer(fd, vertexbuffer_size, &vertexbuffer_ptr);
texturebuffer_handle = load_textures(fd, textures, textures_length);
indexbuffer_handle = create_buffer(fd, indexbuffer_size, &indexbuffer_ptr);
fprintf(stderr, "colorbuffer handle[0] %d\n", colorbuffer_handle[0]);
fprintf(stderr, "colorbuffer handle[1] %d\n", colorbuffer_handle[1]);
fprintf(stderr, "zbuffer handle %d\n", zbuffer_handle);
transfer_vertex_index(vertexbuffer_ptr, indexbuffer_ptr);
int colorbuffer_ix = 0;
float theta = 0;
while (true) {
int ib_dwords = indirect_buffer(shaders, theta);
int handles[] = {
colorbuffer_handle[colorbuffer_ix], // 0
zbuffer_handle, // 1
vertexbuffer_handle, // 2
texturebuffer_handle[0], // 3
indexbuffer_handle, // 4
};
int handles_length = (sizeof (handles)) / (sizeof (handles[0]));
int ret = drm_radeon_cs2(fd,
handles,
handles_length,
ib_dwords);
if (ret == -1)
break;
primary_surface_address(rmmio, colorbuffer_ix);
// next state
theta += 0.01f;
colorbuffer_ix = (colorbuffer_ix + 1) & 1;
}
close(fd);
}

View File

@ -0,0 +1,821 @@
#include <assert.h>
#include <errno.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <stdlib.h>
#include <math.h>
#include <fcntl.h>
#include <sys/mman.h>
#include "r500/3d_registers.h"
#include "r500/3d_registers_undocumented.h"
#include "r500/3d_registers_bits.h"
#include "r500/indirect_buffer.h"
#include "r500/shader.h"
#include "r500/display_controller.h"
#include "drm/buffer.h"
#include "drm/drm.h"
#include "math/float_types.hpp"
#include "math/transform.hpp"
#include "math/constants.hpp"
#include "../model/model2.h"
#include "../model/cubesphere.h"
#define CLEAR_SHADER 0
#define CUBESPHERE_SHADER 1
#define LIGHT_SHADER 2
#define TEXTURE_TILE_SHADER 3
const char * vertex_shader_paths[] = {
"clear.vs.bin",
"matrix_cubesphere.vs.bin",
"light.vs.bin",
"texture_tile.vs.bin",
};
const int vertex_shader_paths_length = (sizeof (vertex_shader_paths)) / (sizeof (vertex_shader_paths[0]));
const char * fragment_shader_paths[] = {
"clear.fs.bin",
"matrix_cubesphere.fs.bin",
"light.fs.bin",
"texture_tile.fs.bin",
};
const int fragment_shader_paths_length = (sizeof (fragment_shader_paths)) / (sizeof (fragment_shader_paths[0]));
struct shaders {
struct shader_offset * vertex;
struct shader_offset * fragment;
int vertex_length;
int fragment_length;
};
void _3d_clear(const shaders& shaders)
{
ib_rs_instructions(0);
//////////////////////////////////////////////////////////////////////////////
// VAP OUT
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_OUT_VTX_FMT_0
, VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1));
T0V(VAP_OUT_VTX_FMT_1
, 0);
//
ib_zbuffer(ZBUFFER_RELOC_INDEX, 1600, 7); // always
ib_texture__0();
ib_vap_stream_cntl__2();
// shaders
T0V(US_PIXSIZE
, US_PIXSIZE__PIX_SIZE(1)
);
ib_ga_us(&shaders.fragment[CLEAR_SHADER]);
ib_vap_pvs(&shaders.vertex[CLEAR_SHADER]);
//////////////////////////////////////////////////////////////////////////////
// VAP INDEX
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_INDEX_OFFSET, 0);
T0V(VAP_VF_MAX_VTX_INDX
, VAP_VF_MAX_VTX_INDX__MAX_INDX(0)
);
T0V(VAP_VF_MIN_VTX_INDX
, VAP_VF_MIN_VTX_INDX__MIN_INDX(0)
);
//////////////////////////////////////////////////////////////////////////////
// VAP
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_CLIP_CNTL
, VAP_CLIP_CNTL__CLIP_DISABLE(1)
);
T0V(VAP_VTE_CNTL
, VAP_VTE_CNTL__VTX_XY_FMT(1) // disable W division
| VAP_VTE_CNTL__VTX_Z_FMT(1) // disable W division
);
T0V(VAP_CNTL_STATUS
, VAP_CNTL_STATUS__PVS_BYPASS(0)
);
//////////////////////////////////////////////////////////////////////////////
// GA POINT SIZE
//////////////////////////////////////////////////////////////////////////////
T0V(GA_POINT_SIZE
, GA_POINT_SIZE__HEIGHT(600 * 12)
| GA_POINT_SIZE__WIDTH(800 * 12)
);
//////////////////////////////////////////////////////////////////////////////
// 3D_DRAW
//////////////////////////////////////////////////////////////////////////////
const int dwords_per_vtx = 2;
T0V(VAP_VTX_SIZE
, VAP_VTX_SIZE__DWORDS_PER_VTX(dwords_per_vtx)
);
const float center[] = {
800.0f, 600.0f,
};
const int vertex_count = 1;
T3(_3D_DRAW_IMMD_2, (1 + vertex_count * dwords_per_vtx) - 1);
TU( VAP_VF_CNTL__PRIM_TYPE(1) // point list
| VAP_VF_CNTL__PRIM_WALK(3)
| VAP_VF_CNTL__INDEX_SIZE(0)
| VAP_VF_CNTL__VTX_REUSE_DIS(0)
| VAP_VF_CNTL__DUAL_INDEX_MODE(0)
| VAP_VF_CNTL__USE_ALT_NUM_VERTS(0)
| VAP_VF_CNTL__NUM_VERTICES(vertex_count)
);
for (int i = 0; i < 2; i++) {
TF(center[i]);
}
}
mat4x4 perspective(float low1, float high1,
float low2, float high2,
float low3, float high3)
{
float scale2 = (high2 - low2) / (high1 - low1);
float scale3 = (high3 - low3) / (high1 - low1);
mat4x4 m1 = mat4x4(1, 0, 0, 0,
0, 1, 0, 0,
0, 0, 1, -low1,
0, 0, 0, 1
);
mat4x4 m2 = mat4x4(1, 0, 0, 0,
0, 1, 0, 0,
0, 0, scale2, low2,
0, 0, scale3, low3
);
return m2 * m1;
}
void _3d_cube_inner(mat4x4 trans,
mat4x4 world_trans,
vec4 light_pos)
{
T0V(VAP_PVS_STATE_FLUSH_REG, 0x00000000);
//////////////////////////////////////////////////////////////////////////////
// VAP_PVS
//////////////////////////////////////////////////////////////////////////////
const float consts[] = {
// 0
trans[0][0], trans[0][1], trans[0][2], trans[0][3],
trans[1][0], trans[1][1], trans[1][2], trans[1][3],
trans[2][0], trans[2][1], trans[2][2], trans[2][3],
trans[3][0], trans[3][1], trans[3][2], trans[3][3],
// 4
world_trans[0][0], world_trans[0][1], world_trans[0][2], world_trans[0][3],
world_trans[1][0], world_trans[1][1], world_trans[1][2], world_trans[1][3],
world_trans[2][0], world_trans[2][1], world_trans[2][2], world_trans[2][3],
world_trans[3][0], world_trans[3][1], world_trans[3][2], world_trans[3][3],
// 8
light_pos.x, light_pos.y, light_pos.z, light_pos.w,
};
ib_vap_pvs_const_cntl(consts, (sizeof (consts)));
//////////////////////////////////////////////////////////////////////////////
// 3D_DRAW
//////////////////////////////////////////////////////////////////////////////
const model * model = &cubesphere_model;
const object * obj = model->object[0];
const int triangle_count = obj->triangle_count;
const int vertex_count = triangle_count * 3;
const int dwords_per_vtx = 8;
T0V(VAP_VTX_SIZE
, VAP_VTX_SIZE__DWORDS_PER_VTX(dwords_per_vtx)
);
T3(_3D_DRAW_IMMD_2, (1 + vertex_count * dwords_per_vtx) - 1);
TU( VAP_VF_CNTL__PRIM_TYPE(4)
| VAP_VF_CNTL__PRIM_WALK(3)
| VAP_VF_CNTL__INDEX_SIZE(0)
| VAP_VF_CNTL__VTX_REUSE_DIS(0)
| VAP_VF_CNTL__DUAL_INDEX_MODE(0)
| VAP_VF_CNTL__USE_ALT_NUM_VERTS(0)
| VAP_VF_CNTL__NUM_VERTICES(vertex_count)
);
for (int i = 0; i < triangle_count; i++) {
for (int j = 0; j < 3; j++) {
vec3 p = model->position[obj->triangle[i][j].position];
vec2 t = model->texture[obj->triangle[i][j].texture];
vec3 n = model->normal[obj->triangle[i][j].normal];
TF(p.x);
TF(p.y);
TF(p.z);
TF(t.x);
TF(t.y);
TF(n.x);
TF(n.y);
TF(n.z);
}
}
}
void _3d_light_inner(mat4x4 trans)
{
T0V(VAP_PVS_STATE_FLUSH_REG, 0x00000000);
//////////////////////////////////////////////////////////////////////////////
// VAP_PVS
//////////////////////////////////////////////////////////////////////////////
const vec4 color = {1, 1, 0, 1};
const float consts[] = {
// 0
trans[0][0], trans[0][1], trans[0][2], trans[0][3],
trans[1][0], trans[1][1], trans[1][2], trans[1][3],
trans[2][0], trans[2][1], trans[2][2], trans[2][3],
trans[3][0], trans[3][1], trans[3][2], trans[3][3],
// 4
color[0], color[1], color[2], color[2],
};
ib_vap_pvs_const_cntl(consts, (sizeof (consts)));
//////////////////////////////////////////////////////////////////////////////
// 3D_DRAW
//////////////////////////////////////////////////////////////////////////////
const model * model = &cubesphere_model;
const object * obj = model->object[0];
const int triangle_count = obj->triangle_count;
const int vertex_count = triangle_count * 3;
int dwords_per_vtx = 3;
T0V(VAP_VTX_SIZE
, VAP_VTX_SIZE__DWORDS_PER_VTX(dwords_per_vtx)
);
T3(_3D_DRAW_IMMD_2, (1 + vertex_count * dwords_per_vtx) - 1);
TU( VAP_VF_CNTL__PRIM_TYPE(4)
| VAP_VF_CNTL__PRIM_WALK(3)
| VAP_VF_CNTL__INDEX_SIZE(0)
| VAP_VF_CNTL__VTX_REUSE_DIS(0)
| VAP_VF_CNTL__DUAL_INDEX_MODE(0)
| VAP_VF_CNTL__USE_ALT_NUM_VERTS(0)
| VAP_VF_CNTL__NUM_VERTICES(vertex_count)
);
for (int i = 0; i < triangle_count; i++) {
for (int j = 0; j < 3; j++) {
vec3 p = model->position[obj->triangle[i][j].position];
TF(p.x);
TF(p.y);
TF(p.z);
}
}
}
vec3 _3d_light(const shaders& shaders,
const mat4x4& view_to_clip,
float theta)
{
ib_rs_instructions(1);
//////////////////////////////////////////////////////////////////////////////
// VAP OUT
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_OUT_VTX_FMT_0
, VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1));
T0V(VAP_OUT_VTX_FMT_1
, VAP_OUT_VTX_FMT_1__TEX_0_COMP_CNT(4));
//
ib_zbuffer(ZBUFFER_RELOC_INDEX, 1600, 1); // less
ib_texture__0();
ib_vap_stream_cntl__3();
// shaders
T0V(US_PIXSIZE
, US_PIXSIZE__PIX_SIZE(1)
);
ib_ga_us(&shaders.fragment[LIGHT_SHADER]);
ib_vap_pvs(&shaders.vertex[LIGHT_SHADER]);
//////////////////////////////////////////////////////////////////////////////
// VAP
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_CLIP_CNTL
, VAP_CLIP_CNTL__PS_UCP_MODE(3)
);
T0V(VAP_VTE_CNTL
, VAP_VTE_CNTL__VPORT_X_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_X_OFFSET_ENA(1)
| VAP_VTE_CNTL__VPORT_Y_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_Y_OFFSET_ENA(1)
| VAP_VTE_CNTL__VPORT_Z_SCALE_ENA(0)
| VAP_VTE_CNTL__VPORT_Z_OFFSET_ENA(0)
| VAP_VTE_CNTL__VTX_XY_FMT(0) // enable W division
| VAP_VTE_CNTL__VTX_Z_FMT(1) // disable W division
| VAP_VTE_CNTL__VTX_W0_FMT(1)
| VAP_VTE_CNTL__SERIAL_PROC_ENA(0)
);
T0V(VAP_CNTL_STATUS
, VAP_CNTL_STATUS__PVS_BYPASS(0)
);
//////////////////////////////////////////////////////////////////////////////
// VAP INDEX
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_INDEX_OFFSET, 0);
T0V(VAP_VF_MAX_VTX_INDX
, VAP_VF_MAX_VTX_INDX__MAX_INDX(0)
);
T0V(VAP_VF_MIN_VTX_INDX
, VAP_VF_MIN_VTX_INDX__MIN_INDX(0)
);
// light
mat4x4 t1 = translate(vec3(1, 0, 0));
mat4x4 s = scale(0.1f);
mat4x4 rz = rotate_y(theta * 2.f);
mat4x4 world_trans = rz * t1 * s;
mat4x4 trans = view_to_clip * world_trans;
_3d_light_inner(trans);
vec3 light_pos = world_trans * light_pos;
return light_pos;
}
void _3d_cube(const shaders& shaders,
const mat4x4& view_to_clip,
float theta,
const vec3& light_pos)
{
ib_rs_instructions(4);
//////////////////////////////////////////////////////////////////////////////
// VAP OUT
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_OUT_VTX_FMT_0
, VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1));
T0V(VAP_OUT_VTX_FMT_1
, VAP_OUT_VTX_FMT_1__TEX_0_COMP_CNT(4)
| VAP_OUT_VTX_FMT_1__TEX_1_COMP_CNT(4)
| VAP_OUT_VTX_FMT_1__TEX_2_COMP_CNT(4)
| VAP_OUT_VTX_FMT_1__TEX_3_COMP_CNT(4));
//
ib_zbuffer(ZBUFFER_RELOC_INDEX, 1600, 1); // less
int width = 1024;
int height = 1024;
int macrotile = 1;
int microtile = 1;
int clamp = 0; // wrap/repeat
ib_texture__1(TEXTUREBUFFER_RELOC_INDEX,
width, height,
macrotile, microtile,
clamp);
ib_vap_stream_cntl__323();
// shaders
T0V(US_PIXSIZE
, US_PIXSIZE__PIX_SIZE(4)
);
ib_ga_us(&shaders.fragment[CUBESPHERE_SHADER]);
ib_vap_pvs(&shaders.vertex[CUBESPHERE_SHADER]);
//////////////////////////////////////////////////////////////////////////////
// VAP
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_CLIP_CNTL
, VAP_CLIP_CNTL__PS_UCP_MODE(3)
);
T0V(VAP_VTE_CNTL
, VAP_VTE_CNTL__VPORT_X_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_X_OFFSET_ENA(1)
| VAP_VTE_CNTL__VPORT_Y_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_Y_OFFSET_ENA(1)
| VAP_VTE_CNTL__VPORT_Z_SCALE_ENA(0)
| VAP_VTE_CNTL__VPORT_Z_OFFSET_ENA(0)
| VAP_VTE_CNTL__VTX_XY_FMT(0) // enable W division
| VAP_VTE_CNTL__VTX_Z_FMT(1) // disable W division
| VAP_VTE_CNTL__VTX_W0_FMT(1)
| VAP_VTE_CNTL__SERIAL_PROC_ENA(0)
);
T0V(VAP_CNTL_STATUS
, VAP_CNTL_STATUS__PVS_BYPASS(0)
);
//////////////////////////////////////////////////////////////////////////////
// VAP INDEX
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_INDEX_OFFSET, 0);
T0V(VAP_VF_MAX_VTX_INDX
, VAP_VF_MAX_VTX_INDX__MAX_INDX(0)
);
T0V(VAP_VF_MIN_VTX_INDX
, VAP_VF_MIN_VTX_INDX__MIN_INDX(0)
);
//////////////////////////////////////////////////////////////////////////////
// matrix
//////////////////////////////////////////////////////////////////////////////
// cube
mat4x4 rx = rotate_x(1 * theta * 0.5f);
mat4x4 ry = rotate_y(0 * theta * 0.8f + 1.4f);
mat4x4 s = scale(0.9f);
mat4x4 world_trans = rx * ry * s;
mat4x4 trans = view_to_clip * world_trans;
_3d_cube_inner(trans, world_trans, light_pos);
}
int indirect_buffer(const shaders& shaders,
float theta)
{
int width = 1600;
int height = 1200;
int pitch = width;
ib_ix = 0;
ib_generic_initialization();
T0V(RB3D_BLENDCNTL, 0);
T0V(RB3D_ABLENDCNTL, 0);
ib_viewport(width, height);
ib_colorbuffer(COLORBUFFER_RELOC_INDEX, pitch, 0, 0);
T0V(GB_ENABLE, 0);
T0V(US_OUT_FMT_0
, US_OUT_FMT__OUT_FMT(0) // C4_8
| US_OUT_FMT__C0_SEL__BLUE
| US_OUT_FMT__C1_SEL__GREEN
| US_OUT_FMT__C2_SEL__RED
| US_OUT_FMT__C3_SEL__ALPHA
| US_OUT_FMT__OUT_SIGN(0)
);
T0V(US_OUT_FMT_1
, US_OUT_FMT__OUT_FMT(15) // render target is not used
);
T0V(US_OUT_FMT_2
, US_OUT_FMT__OUT_FMT(15) // render target is not used
);
T0V(US_OUT_FMT_3
, US_OUT_FMT__OUT_FMT(15) // render target is not used
);
load_pvs_shaders(shaders.vertex, shaders.vertex_length);
load_us_shaders(shaders.fragment, shaders.fragment_length);
//////////////////////////////////////////////////////////////////////////////
// DRAW
//////////////////////////////////////////////////////////////////////////////
mat4x4 aspect = scale(vec3(3.0f/4.0f, 1, 1));
mat4x4 p = perspective(0.01f, 5.0f,
0.001f, 0.999f,
0.5f, 2.0f);
mat4x4 t = translate(vec3(0, 0, 3));
mat4x4 view_to_clip = aspect * p * t;
_3d_clear(shaders);
vec3 light_pos = _3d_light(shaders, view_to_clip, theta);
_3d_cube(shaders, view_to_clip, theta, light_pos);
//////////////////////////////////////////////////////////////////////////////
// padding
//////////////////////////////////////////////////////////////////////////////
while ((ib_ix % 8) != 0) {
TU(0x80000000);
}
return ib_ix;
}
int _tile_texture(const shaders& shaders,
int input_reloc_index,
int output_reloc_index)
{
int width = 1024;
int height = 1024;
int pitch = width;
float x = (float)width * 0.5f;
float y = (float)height * 0.5f;
ib_ix = 0;
ib_generic_initialization();
ib_viewport(width, height);
ib_colorbuffer(output_reloc_index, pitch, 1, 1); // macrotile, microtile
T0V(US_OUT_FMT_0
, US_OUT_FMT__OUT_FMT(0) // C4_8
| US_OUT_FMT__C0_SEL__RED
| US_OUT_FMT__C1_SEL__GREEN
| US_OUT_FMT__C2_SEL__BLUE
| US_OUT_FMT__C3_SEL__ALPHA
| US_OUT_FMT__OUT_SIGN(0)
);
T0V(US_OUT_FMT_1
, US_OUT_FMT__OUT_FMT(15) // render target is not used
);
T0V(US_OUT_FMT_2
, US_OUT_FMT__OUT_FMT(15) // render target is not used
);
T0V(US_OUT_FMT_3
, US_OUT_FMT__OUT_FMT(15) // render target is not used
);
// shaders
load_pvs_shaders(shaders.vertex, shaders.vertex_length);
load_us_shaders(shaders.fragment, shaders.fragment_length);
// GA
T0V(GB_ENABLE
, GB_ENABLE__POINT_STUFF_ENABLE(1)
| GB_ENABLE__TEX0_SOURCE(2) // stuff with source texture coordinates s,t
);
T0Vf(GA_POINT_S0, 0.0f);
T0Vf(GA_POINT_T0, 1.0f);
T0Vf(GA_POINT_S1, 1.0f);
T0Vf(GA_POINT_T1, 0.0f);
//////////////////////////////////////////////////////////////////////////////
// RS
//////////////////////////////////////////////////////////////////////////////
int rs_instructions = 1;
ib_rs_instructions(0);
T0V(RS_IP_0
, RS_IP__TEX_PTR_S(0)
| RS_IP__TEX_PTR_T(1)
| RS_IP__TEX_PTR_R(62) // constant 0.0
| RS_IP__TEX_PTR_Q(63) // constant 1.0
);
T0V(RS_COUNT
, RS_COUNT__IT_COUNT(2)
| RS_COUNT__IC_COUNT(0)
| RS_COUNT__W_ADDR(0)
| RS_COUNT__HIRES_EN(1)
);
T0V(RS_INST_COUNT
, RS_INST_COUNT__INST_COUNT(rs_instructions - 1));
T0V(RS_INST_0
, RS_INST__TEX_ID(0)
| RS_INST__TEX_CN(1)
| RS_INST__TEX_ADDR(0)
);
//////////////////////////////////////////////////////////////////////////////
// VAP OUT
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_OUT_VTX_FMT_0
, VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1)
);
T0V(VAP_OUT_VTX_FMT_1
, 0
);
//
T0V(ZB_CNTL, 0);
T0V(ZB_ZSTENCILCNTL, 0);
//
int macrotile = 0;
int microtile = 0;
int clamp = 2; // clamp to [0.0, 1.0]
ib_texture__1(input_reloc_index,
width, height,
macrotile, microtile,
clamp);
ib_vap_stream_cntl__2();
T0V(US_PIXSIZE
, US_PIXSIZE__PIX_SIZE(1)
);
ib_ga_us(&shaders.fragment[TEXTURE_TILE_SHADER]);
ib_vap_pvs(&shaders.vertex[TEXTURE_TILE_SHADER]);
//////////////////////////////////////////////////////////////////////////////
// VAP
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_CLIP_CNTL
, VAP_CLIP_CNTL__CLIP_DISABLE(1)
);
T0V(VAP_VTE_CNTL
, VAP_VTE_CNTL__VTX_XY_FMT(1) // disable W division
| VAP_VTE_CNTL__VTX_Z_FMT(1) // disable W division
);
T0V(VAP_CNTL_STATUS
, VAP_CNTL_STATUS__PVS_BYPASS(0)
);
//////////////////////////////////////////////////////////////////////////////
// GA POINT SIZE
//////////////////////////////////////////////////////////////////////////////
T0V(GA_POINT_SIZE
, GA_POINT_SIZE__HEIGHT((int)(y * 12.0f))
| GA_POINT_SIZE__WIDTH((int)(x * 12.0f))
);
//////////////////////////////////////////////////////////////////////////////
// 3D_DRAW
//////////////////////////////////////////////////////////////////////////////
const int dwords_per_vtx = 2;
T0V(VAP_VTX_SIZE
, VAP_VTX_SIZE__DWORDS_PER_VTX(dwords_per_vtx)
);
const float center[] = {
x, y,
};
const int vertex_count = 1;
T3(_3D_DRAW_IMMD_2, (1 + vertex_count * dwords_per_vtx) - 1);
TU( VAP_VF_CNTL__PRIM_TYPE(1) // point list
| VAP_VF_CNTL__PRIM_WALK(3)
| VAP_VF_CNTL__INDEX_SIZE(0)
| VAP_VF_CNTL__VTX_REUSE_DIS(0)
| VAP_VF_CNTL__DUAL_INDEX_MODE(0)
| VAP_VF_CNTL__USE_ALT_NUM_VERTS(0)
| VAP_VF_CNTL__NUM_VERTICES(vertex_count)
);
for (int i = 0; i < 2; i++) {
TF(center[i]);
}
return ib_ix;
}
const char * textures[] = {
"../texture/butterfly_1024x1024_rgba8888.data",
};
const int textures_length = (sizeof (textures)) / (sizeof (textures[0]));
int main()
{
struct shaders shaders = {
.vertex = load_shaders(vertex_shader_paths, vertex_shader_paths_length),
.fragment = load_shaders(fragment_shader_paths, fragment_shader_paths_length),
.vertex_length = vertex_shader_paths_length,
.fragment_length = fragment_shader_paths_length,
};
void * rmmio = map_pci_resource2();
int fd = open("/dev/dri/card0", O_RDWR | O_CLOEXEC);
assert(fd != -1);
const int colorbuffer_size = 1600 * 1200 * 4;
int colorbuffer_handle[2];
int zbuffer_handle;
int * texturebuffer_handle;
int flush_handle;
int test_handle;
void * colorbuffer_ptr[2];
void * zbuffer_ptr;
void * test_ptr;
// colorbuffer
colorbuffer_handle[0] = create_buffer(fd, colorbuffer_size, &colorbuffer_ptr[0]);
colorbuffer_handle[1] = create_buffer(fd, colorbuffer_size, &colorbuffer_ptr[1]);
test_handle = create_buffer(fd, 1600 * 1200 * 4, &test_ptr);
zbuffer_handle = create_buffer(fd, colorbuffer_size, &zbuffer_ptr);
flush_handle = create_flush_buffer(fd);
texturebuffer_handle = load_textures(fd, textures, textures_length);
fprintf(stderr, "colorbuffer handle[0] %d\n", colorbuffer_handle[0]);
fprintf(stderr, "colorbuffer handle[1] %d\n", colorbuffer_handle[1]);
fprintf(stderr, "zbuffer handle %d\n", zbuffer_handle);
fprintf(stderr, "test handle %d\n", test_handle);
int colorbuffer_ix = 0;
float theta = 0;
{
int ib_dwords = _tile_texture(shaders,
TEXTUREBUFFER_RELOC_INDEX, // input
COLORBUFFER_RELOC_INDEX); // output
//int ib_dwords = indirect_buffer(shaders, theta);
printf("here2\n");
drm_radeon_cs(fd,
test_handle, // colorbuffer
zbuffer_handle, // unused
flush_handle,
texturebuffer_handle,
textures_length,
ib_dwords);
}
while (true) {
int ib_dwords = indirect_buffer(shaders, theta);
int ret = drm_radeon_cs(fd,
colorbuffer_handle[colorbuffer_ix],
zbuffer_handle,
flush_handle,
//texturebuffer_handle,
//textures_length,
&test_handle,
1,
ib_dwords);
if (ret == -1)
break;
primary_surface_address(rmmio, colorbuffer_ix);
// next state
theta += 0.01f;
colorbuffer_ix = (colorbuffer_ix + 1) & 1;
}
{
printf("test.data\n");
int out_fd = open("test.data", O_RDWR|O_CREAT, 0644);
assert(out_fd >= 0);
ssize_t write_length = write(out_fd, test_ptr, 1024 * 1024 * 4);
assert(write_length == 1024 * 1024 * 4);
close(out_fd);
}
close(fd);
}

546
src/particle.cpp Normal file
View File

@ -0,0 +1,546 @@
#include <assert.h>
#include <errno.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <stdlib.h>
#include <math.h>
#include <fcntl.h>
#include <sys/mman.h>
#include "r500/3d_registers.h"
#include "r500/3d_registers_undocumented.h"
#include "r500/3d_registers_bits.h"
#include "r500/indirect_buffer.h"
#include "r500/shader.h"
#include "r500/display_controller.h"
#include "drm/buffer.h"
#include "drm/drm.h"
#include "math/float_types.hpp"
#include "math/transform.hpp"
#include "math/constants.hpp"
#include "../model/model2.h"
#include "../model/plane.h"
#define CLEAR_SHADER 0
#define PLANE_SHADER 1
#define PARTICLE_SHADER 2
#define TEXTURE_TILE_SHADER 3
const char * vertex_shader_paths[] = {
"clear.vs.bin",
"particle_plane.vs.bin",
"particle_particle.vs.bin",
"texture_tile.vs.bin",
};
const int vertex_shader_paths_length = (sizeof (vertex_shader_paths)) / (sizeof (vertex_shader_paths[0]));
const char * fragment_shader_paths[] = {
"clear.fs.bin",
"particle_plane.fs.bin",
"particle_particle.fs.bin",
"texture_tile.fs.bin",
};
const int fragment_shader_paths_length = (sizeof (fragment_shader_paths)) / (sizeof (fragment_shader_paths[0]));
#define PLANE_TEXTURE 0
#define PARTICLE_TEXTURE 1
const char * textures[] = {
"../texture/plane_32x32_rgba8888.data",
"../texture/particle_32x32_rgba8888.data",
};
const int textures_length = (sizeof (textures)) / (sizeof (textures[0]));
struct shaders {
struct shader_offset * vertex;
struct shader_offset * fragment;
int vertex_length;
int fragment_length;
};
void _3d_clear(struct shaders& shaders)
{
ib_rs_instructions(0);
//////////////////////////////////////////////////////////////////////////////
// VAP OUT
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_OUT_VTX_FMT_0
, VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1));
T0V(VAP_OUT_VTX_FMT_1
, 0);
//
ib_zbuffer(ZBUFFER_RELOC_INDEX, 1600, 7); // always
ib_texture__0();
ib_vap_stream_cntl__2();
// shaders
T0V(US_PIXSIZE
, US_PIXSIZE__PIX_SIZE(1)
);
ib_ga_us(&shaders.fragment[CLEAR_SHADER]);
ib_vap_pvs(&shaders.vertex[CLEAR_SHADER]);
//////////////////////////////////////////////////////////////////////////////
// VAP INDEX
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_INDEX_OFFSET, 0);
T0V(VAP_VF_MAX_VTX_INDX
, VAP_VF_MAX_VTX_INDX__MAX_INDX(0)
);
T0V(VAP_VF_MIN_VTX_INDX
, VAP_VF_MIN_VTX_INDX__MIN_INDX(0)
);
//////////////////////////////////////////////////////////////////////////////
// VAP
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_CLIP_CNTL
, VAP_CLIP_CNTL__CLIP_DISABLE(1)
);
T0V(VAP_VTE_CNTL
, VAP_VTE_CNTL__VTX_XY_FMT(1) // disable W division
| VAP_VTE_CNTL__VTX_Z_FMT(1) // disable W division
);
T0V(VAP_CNTL_STATUS
, VAP_CNTL_STATUS__PVS_BYPASS(0)
);
//////////////////////////////////////////////////////////////////////////////
// GA POINT SIZE
//////////////////////////////////////////////////////////////////////////////
T0V(GA_POINT_SIZE
, GA_POINT_SIZE__HEIGHT(600 * 12)
| GA_POINT_SIZE__WIDTH(800 * 12)
);
//////////////////////////////////////////////////////////////////////////////
// 3D_DRAW
//////////////////////////////////////////////////////////////////////////////
const int dwords_per_vtx = 2;
T0V(VAP_VTX_SIZE
, VAP_VTX_SIZE__DWORDS_PER_VTX(dwords_per_vtx)
);
const float center[] = {
800.0f, 600.0f,
};
const int vertex_count = 1;
T3(_3D_DRAW_IMMD_2, (1 + vertex_count * dwords_per_vtx) - 1);
TU( VAP_VF_CNTL__PRIM_TYPE(1) // point list
| VAP_VF_CNTL__PRIM_WALK(3)
| VAP_VF_CNTL__INDEX_SIZE(0)
| VAP_VF_CNTL__VTX_REUSE_DIS(0)
| VAP_VF_CNTL__DUAL_INDEX_MODE(0)
| VAP_VF_CNTL__USE_ALT_NUM_VERTS(0)
| VAP_VF_CNTL__NUM_VERTICES(vertex_count)
);
for (int i = 0; i < 2; i++) {
TF(center[i]);
}
}
mat4x4 perspective(float low1, float high1,
float low2, float high2,
float low3, float high3)
{
float scale2 = (high2 - low2) / (high1 - low1);
float scale3 = (high3 - low3) / (high1 - low1);
mat4x4 m1 = mat4x4(1, 0, 0, 0,
0, 1, 0, 0,
0, 0, 1, -low1,
0, 0, 0, 1
);
mat4x4 m2 = mat4x4(1, 0, 0, 0,
0, 1, 0, 0,
0, 0, scale2, low2,
0, 0, scale3, low3
);
return m2 * m1;
}
void _3d_plane_inner(mat4x4 trans)
{
T0V(VAP_PVS_STATE_FLUSH_REG, 0x00000000);
//////////////////////////////////////////////////////////////////////////////
// VAP_PVS
//////////////////////////////////////////////////////////////////////////////
const float consts[] = {
// 0
trans[0][0], trans[0][1], trans[0][2], trans[0][3],
trans[1][0], trans[1][1], trans[1][2], trans[1][3],
trans[2][0], trans[2][1], trans[2][2], trans[2][3],
trans[3][0], trans[3][1], trans[3][2], trans[3][3],
};
ib_vap_pvs_const_cntl(consts, (sizeof (consts)));
//////////////////////////////////////////////////////////////////////////////
// 3D_DRAW
//////////////////////////////////////////////////////////////////////////////
const model * model = &plane_model;
const object * obj = model->object[0];
const int triangle_count = obj->triangle_count;
const int vertex_count = triangle_count * 3;
const int dwords_per_vtx = 5;
T0V(VAP_VTX_SIZE
, VAP_VTX_SIZE__DWORDS_PER_VTX(dwords_per_vtx)
);
T3(_3D_DRAW_IMMD_2, (1 + vertex_count * dwords_per_vtx) - 1);
TU( VAP_VF_CNTL__PRIM_TYPE(4)
| VAP_VF_CNTL__PRIM_WALK(3)
| VAP_VF_CNTL__INDEX_SIZE(0)
| VAP_VF_CNTL__VTX_REUSE_DIS(0)
| VAP_VF_CNTL__DUAL_INDEX_MODE(0)
| VAP_VF_CNTL__USE_ALT_NUM_VERTS(0)
| VAP_VF_CNTL__NUM_VERTICES(vertex_count)
);
for (int i = 0; i < triangle_count; i++) {
for (int j = 0; j < 3; j++) {
vec3 p = model->position[obj->triangle[i][j].position];
vec2 t = model->texture[obj->triangle[i][j].texture];
TF(p.x);
TF(p.y);
TF(p.z);
TF(t.x);
TF(t.y);
}
}
}
void _3d_plane(struct shaders& shaders,
const mat4x4& world_to_clip,
float theta)
{
ib_rs_instructions(1);
//////////////////////////////////////////////////////////////////////////////
// VAP OUT
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_OUT_VTX_FMT_0
, VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1));
T0V(VAP_OUT_VTX_FMT_1
, VAP_OUT_VTX_FMT_1__TEX_0_COMP_CNT(4));
//
ib_zbuffer(ZBUFFER_RELOC_INDEX, 1600, 1); // less
int width = 32;
int height = 32;
int macrotile = 0;
int microtile = 0;
int clamp = 0; // wrap/repeat
ib_texture__1(TEXTUREBUFFER_RELOC_INDEX + PLANE_TEXTURE,
width, height,
macrotile, microtile,
clamp);
ib_vap_stream_cntl__32();
// shaders
T0V(US_PIXSIZE
, US_PIXSIZE__PIX_SIZE(4)
);
ib_ga_us(&shaders.fragment[PLANE_SHADER]);
ib_vap_pvs(&shaders.vertex[PLANE_SHADER]);
//////////////////////////////////////////////////////////////////////////////
// VAP
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_CLIP_CNTL
, VAP_CLIP_CNTL__PS_UCP_MODE(3)
);
T0V(VAP_VTE_CNTL
, VAP_VTE_CNTL__VPORT_X_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_X_OFFSET_ENA(1)
| VAP_VTE_CNTL__VPORT_Y_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_Y_OFFSET_ENA(1)
| VAP_VTE_CNTL__VPORT_Z_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_Z_OFFSET_ENA(1)
| VAP_VTE_CNTL__VTX_XY_FMT(0) // enable W division
| VAP_VTE_CNTL__VTX_Z_FMT(0) // enable W division
| VAP_VTE_CNTL__VTX_W0_FMT(1)
| VAP_VTE_CNTL__SERIAL_PROC_ENA(0)
);
T0V(VAP_CNTL_STATUS
, VAP_CNTL_STATUS__PVS_BYPASS(0)
);
//////////////////////////////////////////////////////////////////////////////
// matrix
//////////////////////////////////////////////////////////////////////////////
mat4x4 s = scale(1.0f);
mat4x4 rx = rotate_x(-PI / 2.0f);
mat4x4 local_to_world = s * rx;
mat4x4 trans = world_to_clip * local_to_world;
_3d_plane_inner(trans);
}
void _3d_particle(struct shaders& shaders,
const mat4x4& world_to_clip,
float theta)
{
// enable blending
T0V(RB3D_BLENDCNTL
, RB3D_BLENDCNTL__ALPHA_BLEND_ENABLE__ENABLE
| RB3D_BLENDCNTL__READ_ENABLE(1)
| RB3D_BLENDCNTL__SRCBLEND__GL_ONE
| RB3D_BLENDCNTL__DESTBLEND__GL_ONE
| RB3D_BLENDCNTL__SRC_ALPHA_0_NO_READ(0)
| RB3D_BLENDCNTL__SRC_ALPHA_1_NO_READ(0)
);
ib_rs_instructions(1);
//////////////////////////////////////////////////////////////////////////////
// VAP OUT
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_OUT_VTX_FMT_0
, VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1));
T0V(VAP_OUT_VTX_FMT_1
, VAP_OUT_VTX_FMT_1__TEX_0_COMP_CNT(4));
//
ib_zbuffer(ZBUFFER_RELOC_INDEX, 1600, 1); // less
int width = 32;
int height = 32;
int macrotile = 0;
int microtile = 0;
int clamp = 0; // wrap/repeat
ib_texture__1(TEXTUREBUFFER_RELOC_INDEX + PARTICLE_TEXTURE,
width, height,
macrotile, microtile,
clamp);
ib_vap_stream_cntl__32();
// shaders
T0V(US_PIXSIZE
, US_PIXSIZE__PIX_SIZE(4)
);
ib_ga_us(&shaders.fragment[PLANE_SHADER]);
ib_vap_pvs(&shaders.vertex[PLANE_SHADER]);
//////////////////////////////////////////////////////////////////////////////
// VAP
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_CLIP_CNTL
, VAP_CLIP_CNTL__PS_UCP_MODE(3)
);
T0V(VAP_VTE_CNTL
, VAP_VTE_CNTL__VPORT_X_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_X_OFFSET_ENA(1)
| VAP_VTE_CNTL__VPORT_Y_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_Y_OFFSET_ENA(1)
| VAP_VTE_CNTL__VPORT_Z_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_Z_OFFSET_ENA(1)
| VAP_VTE_CNTL__VTX_XY_FMT(0) // enable W division
| VAP_VTE_CNTL__VTX_Z_FMT(0) // enable W division
| VAP_VTE_CNTL__VTX_W0_FMT(1)
| VAP_VTE_CNTL__SERIAL_PROC_ENA(0)
);
T0V(VAP_CNTL_STATUS
, VAP_CNTL_STATUS__PVS_BYPASS(0)
);
//////////////////////////////////////////////////////////////////////////////
// matrix
//////////////////////////////////////////////////////////////////////////////
mat4x4 s = scale(1.0f);
mat4x4 local_to_world = s;
mat4x4 trans = world_to_clip * local_to_world;
_3d_plane_inner(trans);
}
int indirect_buffer(shaders& shaders,
float theta)
{
int width = 1600;
int height = 1200;
int pitch = width;
ib_ix = 0;
ib_generic_initialization();
T0V(RB3D_BLENDCNTL, 0);
T0V(RB3D_ABLENDCNTL, 0);
ib_viewport(width, height);
ib_colorbuffer(COLORBUFFER_RELOC_INDEX, pitch, 0, 0);
T0V(GB_ENABLE, 0);
T0V(US_OUT_FMT_0
, US_OUT_FMT__OUT_FMT(0) // C4_8
| US_OUT_FMT__C0_SEL__BLUE
| US_OUT_FMT__C1_SEL__GREEN
| US_OUT_FMT__C2_SEL__RED
| US_OUT_FMT__C3_SEL__ALPHA
| US_OUT_FMT__OUT_SIGN(0)
);
T0V(US_OUT_FMT_1
, US_OUT_FMT__OUT_FMT(15) // render target is not used
);
T0V(US_OUT_FMT_2
, US_OUT_FMT__OUT_FMT(15) // render target is not used
);
T0V(US_OUT_FMT_2
, US_OUT_FMT__OUT_FMT(15) // render target is not used
);
load_pvs_shaders(shaders.vertex, shaders.vertex_length);
load_us_shaders(shaders.fragment, shaders.fragment_length);
//////////////////////////////////////////////////////////////////////////////
// DRAW
//////////////////////////////////////////////////////////////////////////////
mat4x4 aspect = scale(vec3(3.0f/4.0f, 1, 1));
mat4x4 p = perspective(0.01f, 3.0f,
0.001f, 0.999f,
1.0f, 3.0f);
mat4x4 t = translate(vec3(0, 0, 1));
mat4x4 rx = rotate_x(-PI / 8.0f);
mat4x4 ry = rotate_y(theta * 0.8f);
mat4x4 world_to_view = t * rx * ry;
mat4x4 world_to_clip = aspect * p * world_to_view;
_3d_clear(shaders);
_3d_plane(shaders, world_to_clip, theta);
_3d_particle(shaders, world_to_clip, theta);
//_3d_zbuffer(shaders);
//////////////////////////////////////////////////////////////////////////////
// padding
//////////////////////////////////////////////////////////////////////////////
while ((ib_ix % 8) != 0) {
TU(0x80000000);
}
return ib_ix;
}
int main()
{
struct shaders shaders = {
.vertex = load_shaders(vertex_shader_paths, vertex_shader_paths_length),
.fragment = load_shaders(fragment_shader_paths, fragment_shader_paths_length),
.vertex_length = vertex_shader_paths_length,
.fragment_length = fragment_shader_paths_length,
};
void * rmmio = map_pci_resource2();
int fd = open("/dev/dri/card0", O_RDWR | O_CLOEXEC);
assert(fd != -1);
const int colorbuffer_size = 1600 * 1200 * 4;
int colorbuffer_handle[2];
int zbuffer_handle;
int * texturebuffer_handle;
int flush_handle;
void * colorbuffer_ptr[2];
void * zbuffer_ptr;
// colorbuffer
colorbuffer_handle[0] = create_buffer(fd, colorbuffer_size, &colorbuffer_ptr[0]);
colorbuffer_handle[1] = create_buffer(fd, colorbuffer_size, &colorbuffer_ptr[1]);
zbuffer_handle = create_buffer(fd, colorbuffer_size, &zbuffer_ptr);
flush_handle = create_flush_buffer(fd);
texturebuffer_handle = load_textures(fd, textures, textures_length);
fprintf(stderr, "colorbuffer handle[0] %d\n", colorbuffer_handle[0]);
fprintf(stderr, "colorbuffer handle[1] %d\n", colorbuffer_handle[1]);
fprintf(stderr, "zbuffer handle %d\n", zbuffer_handle);
int colorbuffer_ix = 0;
float theta = PI * 0.5;
while (true) {
int ib_dwords = indirect_buffer(shaders, theta);
drm_radeon_cs(fd,
colorbuffer_handle[colorbuffer_ix],
zbuffer_handle,
flush_handle,
texturebuffer_handle,
textures_length,
ib_dwords);
primary_surface_address(rmmio, colorbuffer_ix);
// next state
theta += 0.01f;
colorbuffer_ix = (colorbuffer_ix + 1) & 1;
break;
}
{
printf("colorbuffer0.data\n");
int out_fd = open("colorbuffer0.data", O_RDWR|O_CREAT, 0644);
assert(out_fd >= 0);
ssize_t write_length = write(out_fd, colorbuffer_ptr[0], colorbuffer_size);
assert(write_length == colorbuffer_size);
close(out_fd);
}
{
printf("zbuffer.data\n");
int out_fd = open("zbuffer.data", O_RDWR|O_CREAT, 0644);
assert(out_fd >= 0);
ssize_t write_length = write(out_fd, zbuffer_ptr, colorbuffer_size);
assert(write_length == colorbuffer_size);
close(out_fd);
}
close(fd);
}

574
src/particle_oriented.cpp Normal file
View File

@ -0,0 +1,574 @@
#include <assert.h>
#include <errno.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <stdlib.h>
#include <math.h>
#include <fcntl.h>
#include <sys/mman.h>
#include "r500/3d_registers.h"
#include "r500/3d_registers_undocumented.h"
#include "r500/3d_registers_bits.h"
#include "r500/indirect_buffer.h"
#include "r500/shader.h"
#include "r500/display_controller.h"
#include "drm/buffer.h"
#include "drm/drm.h"
#include "math/float_types.hpp"
#include "math/transform.hpp"
#include "math/constants.hpp"
#include "../model/model2.h"
#include "../model/plane.h"
#define CLEAR_SHADER 0
#define PLANE_SHADER 1
#define PARTICLE_SHADER 2
#define TEXTURE_TILE_SHADER 3
const char * vertex_shader_paths[] = {
"clear.vs.bin",
"particle_plane.vs.bin",
"particle_particle.vs.bin",
"texture_tile.vs.bin",
};
const int vertex_shader_paths_length = (sizeof (vertex_shader_paths)) / (sizeof (vertex_shader_paths[0]));
const char * fragment_shader_paths[] = {
"clear.fs.bin",
"particle_plane.fs.bin",
"particle_particle.fs.bin",
"texture_tile.fs.bin",
};
const int fragment_shader_paths_length = (sizeof (fragment_shader_paths)) / (sizeof (fragment_shader_paths[0]));
#define PLANE_TEXTURE 0
#define PARTICLE_TEXTURE 1
const char * textures[] = {
"../texture/plane_32x32_rgba8888.data",
"../texture/particle_32x32_rgba8888.data",
};
const int textures_length = (sizeof (textures)) / (sizeof (textures[0]));
struct shaders {
struct shader_offset * vertex;
struct shader_offset * fragment;
int vertex_length;
int fragment_length;
};
void _3d_clear(struct shaders& shaders)
{
ib_rs_instructions(0);
//////////////////////////////////////////////////////////////////////////////
// VAP OUT
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_OUT_VTX_FMT_0
, VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1));
T0V(VAP_OUT_VTX_FMT_1
, 0);
//
ib_zbuffer(ZBUFFER_RELOC_INDEX, 1600, 7); // always
ib_texture__0();
ib_vap_stream_cntl__2();
// shaders
T0V(US_PIXSIZE
, US_PIXSIZE__PIX_SIZE(1)
);
ib_ga_us(&shaders.fragment[CLEAR_SHADER]);
ib_vap_pvs(&shaders.vertex[CLEAR_SHADER]);
//////////////////////////////////////////////////////////////////////////////
// VAP INDEX
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_INDEX_OFFSET, 0);
T0V(VAP_VF_MAX_VTX_INDX
, VAP_VF_MAX_VTX_INDX__MAX_INDX(0)
);
T0V(VAP_VF_MIN_VTX_INDX
, VAP_VF_MIN_VTX_INDX__MIN_INDX(0)
);
//////////////////////////////////////////////////////////////////////////////
// VAP
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_CLIP_CNTL
, VAP_CLIP_CNTL__CLIP_DISABLE(1)
);
T0V(VAP_VTE_CNTL
, VAP_VTE_CNTL__VTX_XY_FMT(1) // disable W division
| VAP_VTE_CNTL__VTX_Z_FMT(1) // disable W division
);
T0V(VAP_CNTL_STATUS
, VAP_CNTL_STATUS__PVS_BYPASS(0)
);
//////////////////////////////////////////////////////////////////////////////
// GA POINT SIZE
//////////////////////////////////////////////////////////////////////////////
T0V(GA_POINT_SIZE
, GA_POINT_SIZE__HEIGHT(600 * 12)
| GA_POINT_SIZE__WIDTH(800 * 12)
);
//////////////////////////////////////////////////////////////////////////////
// 3D_DRAW
//////////////////////////////////////////////////////////////////////////////
const int dwords_per_vtx = 2;
T0V(VAP_VTX_SIZE
, VAP_VTX_SIZE__DWORDS_PER_VTX(dwords_per_vtx)
);
const float center[] = {
800.0f, 600.0f,
};
const int vertex_count = 1;
T3(_3D_DRAW_IMMD_2, (1 + vertex_count * dwords_per_vtx) - 1);
TU( VAP_VF_CNTL__PRIM_TYPE(1) // point list
| VAP_VF_CNTL__PRIM_WALK(3)
| VAP_VF_CNTL__INDEX_SIZE(0)
| VAP_VF_CNTL__VTX_REUSE_DIS(0)
| VAP_VF_CNTL__DUAL_INDEX_MODE(0)
| VAP_VF_CNTL__USE_ALT_NUM_VERTS(0)
| VAP_VF_CNTL__NUM_VERTICES(vertex_count)
);
for (int i = 0; i < 2; i++) {
TF(center[i]);
}
}
mat4x4 perspective(float low1, float high1,
float low2, float high2,
float low3, float high3)
{
float scale2 = (high2 - low2) / (high1 - low1);
float scale3 = (high3 - low3) / (high1 - low1);
mat4x4 m1 = mat4x4(1, 0, 0, 0,
0, 1, 0, 0,
0, 0, 1, -low1,
0, 0, 0, 1
);
mat4x4 m2 = mat4x4(1, 0, 0, 0,
0, 1, 0, 0,
0, 0, scale2, low2,
0, 0, scale3, low3
);
return m2 * m1;
}
void _3d_plane_inner(mat4x4 trans)
{
//////////////////////////////////////////////////////////////////////////////
// 3D_DRAW
//////////////////////////////////////////////////////////////////////////////
const model * model = &plane_model;
const object * obj = model->object[0];
const int triangle_count = obj->triangle_count;
const int vertex_count = triangle_count * 3;
const int dwords_per_vtx = 5;
T0V(VAP_VTX_SIZE
, VAP_VTX_SIZE__DWORDS_PER_VTX(dwords_per_vtx)
);
T3(_3D_DRAW_IMMD_2, (1 + vertex_count * dwords_per_vtx) - 1);
TU( VAP_VF_CNTL__PRIM_TYPE(4)
| VAP_VF_CNTL__PRIM_WALK(3)
| VAP_VF_CNTL__INDEX_SIZE(0)
| VAP_VF_CNTL__VTX_REUSE_DIS(0)
| VAP_VF_CNTL__DUAL_INDEX_MODE(0)
| VAP_VF_CNTL__USE_ALT_NUM_VERTS(0)
| VAP_VF_CNTL__NUM_VERTICES(vertex_count)
);
for (int i = 0; i < triangle_count; i++) {
for (int j = 0; j < 3; j++) {
vec3 p = model->position[obj->triangle[i][j].position];
vec2 t = model->texture[obj->triangle[i][j].texture];
TF(p.x);
TF(p.y);
TF(p.z);
TF(t.x);
TF(t.y);
}
}
}
void _3d_plane(struct shaders& shaders,
const mat4x4& world_to_clip,
float theta)
{
ib_rs_instructions(1);
//////////////////////////////////////////////////////////////////////////////
// VAP OUT
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_OUT_VTX_FMT_0
, VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1));
T0V(VAP_OUT_VTX_FMT_1
, VAP_OUT_VTX_FMT_1__TEX_0_COMP_CNT(4));
//
ib_zbuffer(ZBUFFER_RELOC_INDEX, 1600, 1); // less
int width = 32;
int height = 32;
int macrotile = 0;
int microtile = 0;
int clamp = 0; // wrap/repeat
ib_texture__1(TEXTUREBUFFER_RELOC_INDEX + PLANE_TEXTURE,
width, height,
macrotile, microtile,
clamp);
ib_vap_stream_cntl__32();
// shaders
T0V(US_PIXSIZE
, US_PIXSIZE__PIX_SIZE(4)
);
ib_ga_us(&shaders.fragment[PLANE_SHADER]);
ib_vap_pvs(&shaders.vertex[PLANE_SHADER]);
//////////////////////////////////////////////////////////////////////////////
// VAP
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_CLIP_CNTL
, VAP_CLIP_CNTL__PS_UCP_MODE(3)
);
T0V(VAP_VTE_CNTL
, VAP_VTE_CNTL__VPORT_X_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_X_OFFSET_ENA(1)
| VAP_VTE_CNTL__VPORT_Y_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_Y_OFFSET_ENA(1)
| VAP_VTE_CNTL__VPORT_Z_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_Z_OFFSET_ENA(1)
| VAP_VTE_CNTL__VTX_XY_FMT(0) // enable W division
| VAP_VTE_CNTL__VTX_Z_FMT(0) // enable W division
| VAP_VTE_CNTL__VTX_W0_FMT(1)
| VAP_VTE_CNTL__SERIAL_PROC_ENA(0)
);
T0V(VAP_CNTL_STATUS
, VAP_CNTL_STATUS__PVS_BYPASS(0)
);
//////////////////////////////////////////////////////////////////////////////
// matrix
//////////////////////////////////////////////////////////////////////////////
mat4x4 s = scale(1.0f);
mat4x4 rx = rotate_x(-PI / 2.0f);
mat4x4 local_to_world = s * rx;
mat4x4 trans = world_to_clip * local_to_world;
T0V(VAP_PVS_STATE_FLUSH_REG, 0x00000000);
//////////////////////////////////////////////////////////////////////////////
// consts
//////////////////////////////////////////////////////////////////////////////
const float consts[] = {
// 0
trans[0][0], trans[0][1], trans[0][2], trans[0][3],
trans[1][0], trans[1][1], trans[1][2], trans[1][3],
trans[2][0], trans[2][1], trans[2][2], trans[2][3],
trans[3][0], trans[3][1], trans[3][2], trans[3][3],
};
ib_vap_pvs_const_cntl(consts, (sizeof (consts)));
// plane_inner
_3d_plane_inner(trans);
}
void _3d_particle(struct shaders& shaders,
const mat4x4& world_to_clip,
const mat4x4& world_to_view,
float theta)
{
// enable blending
T0V(RB3D_BLENDCNTL
, RB3D_BLENDCNTL__ALPHA_BLEND_ENABLE__ENABLE
| RB3D_BLENDCNTL__READ_ENABLE(1)
| RB3D_BLENDCNTL__SRCBLEND__GL_ONE
| RB3D_BLENDCNTL__DESTBLEND__GL_ONE
| RB3D_BLENDCNTL__SRC_ALPHA_0_NO_READ(0)
| RB3D_BLENDCNTL__SRC_ALPHA_1_NO_READ(0)
);
ib_rs_instructions(1);
//////////////////////////////////////////////////////////////////////////////
// VAP OUT
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_OUT_VTX_FMT_0
, VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1));
T0V(VAP_OUT_VTX_FMT_1
, VAP_OUT_VTX_FMT_1__TEX_0_COMP_CNT(4));
//
ib_zbuffer(ZBUFFER_RELOC_INDEX, 1600, 1); // less
int width = 32;
int height = 32;
int macrotile = 0;
int microtile = 0;
int clamp = 0; // wrap/repeat
ib_texture__1(TEXTUREBUFFER_RELOC_INDEX + PARTICLE_TEXTURE,
width, height,
macrotile, microtile,
clamp);
ib_vap_stream_cntl__32();
// shaders
T0V(US_PIXSIZE
, US_PIXSIZE__PIX_SIZE(4)
);
ib_ga_us(&shaders.fragment[PARTICLE_SHADER]);
ib_vap_pvs(&shaders.vertex[PARTICLE_SHADER]);
//////////////////////////////////////////////////////////////////////////////
// VAP
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_CLIP_CNTL
, VAP_CLIP_CNTL__PS_UCP_MODE(3)
);
T0V(VAP_VTE_CNTL
, VAP_VTE_CNTL__VPORT_X_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_X_OFFSET_ENA(1)
| VAP_VTE_CNTL__VPORT_Y_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_Y_OFFSET_ENA(1)
| VAP_VTE_CNTL__VPORT_Z_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_Z_OFFSET_ENA(1)
| VAP_VTE_CNTL__VTX_XY_FMT(0) // enable W division
| VAP_VTE_CNTL__VTX_Z_FMT(0) // enable W division
| VAP_VTE_CNTL__VTX_W0_FMT(1)
| VAP_VTE_CNTL__SERIAL_PROC_ENA(0)
);
T0V(VAP_CNTL_STATUS
, VAP_CNTL_STATUS__PVS_BYPASS(0)
);
//////////////////////////////////////////////////////////////////////////////
// matrix
//////////////////////////////////////////////////////////////////////////////
mat4x4 s = scale(1.0f);
mat4x4 local_to_world = s;
mat4x4 local_to_view = world_to_view * local_to_world;
mat4x4 trans = world_to_clip * local_to_world;
//////////////////////////////////////////////////////////////////////////////
// consts
//////////////////////////////////////////////////////////////////////////////
const float consts[] = {
// 0
trans[0][0], trans[0][1], trans[0][2], trans[0][3],
trans[1][0], trans[1][1], trans[1][2], trans[1][3],
trans[2][0], trans[2][1], trans[2][2], trans[2][3],
trans[3][0], trans[3][1], trans[3][2], trans[3][3],
// 4: particle_position
0, 0, 0, 0,
// 5: dx (right)
local_to_view[0][0], local_to_view[0][1], local_to_view[0][2], 0,
// 6: dy (up)
local_to_view[1][0], local_to_view[1][1], local_to_view[1][2], 0,
};
ib_vap_pvs_const_cntl(consts, (sizeof (consts)));
// plane_inner
_3d_plane_inner(trans);
}
int indirect_buffer(shaders& shaders,
float theta)
{
int width = 1600;
int height = 1200;
int pitch = width;
ib_ix = 0;
ib_generic_initialization();
T0V(RB3D_BLENDCNTL, 0);
T0V(RB3D_ABLENDCNTL, 0);
ib_viewport(width, height);
ib_colorbuffer(COLORBUFFER_RELOC_INDEX, pitch, 0, 0);
T0V(GB_ENABLE, 0);
T0V(US_OUT_FMT_0
, US_OUT_FMT__OUT_FMT(0) // C4_8
| US_OUT_FMT__C0_SEL__BLUE
| US_OUT_FMT__C1_SEL__GREEN
| US_OUT_FMT__C2_SEL__RED
| US_OUT_FMT__C3_SEL__ALPHA
| US_OUT_FMT__OUT_SIGN(0)
);
T0V(US_OUT_FMT_1
, US_OUT_FMT__OUT_FMT(15) // render target is not used
);
T0V(US_OUT_FMT_2
, US_OUT_FMT__OUT_FMT(15) // render target is not used
);
T0V(US_OUT_FMT_2
, US_OUT_FMT__OUT_FMT(15) // render target is not used
);
load_pvs_shaders(shaders.vertex, shaders.vertex_length);
load_us_shaders(shaders.fragment, shaders.fragment_length);
//////////////////////////////////////////////////////////////////////////////
// DRAW
//////////////////////////////////////////////////////////////////////////////
mat4x4 aspect = scale(vec3(3.0f/4.0f, 1, 1));
mat4x4 p = perspective(0.01f, 3.0f,
0.001f, 0.999f,
1.0f, 3.0f);
mat4x4 t = translate(vec3(0, 0, 1));
mat4x4 rx = rotate_x(-PI / 8.0f);
mat4x4 ry = rotate_y(theta * 0.8f);
mat4x4 world_to_view = t * rx * ry;
mat4x4 world_to_clip = aspect * p * world_to_view;
_3d_clear(shaders);
_3d_plane(shaders, world_to_clip, theta);
_3d_particle(shaders, world_to_clip, world_to_view, theta);
//_3d_zbuffer(shaders);
//////////////////////////////////////////////////////////////////////////////
// padding
//////////////////////////////////////////////////////////////////////////////
while ((ib_ix % 8) != 0) {
TU(0x80000000);
}
return ib_ix;
}
int main()
{
struct shaders shaders = {
.vertex = load_shaders(vertex_shader_paths, vertex_shader_paths_length),
.fragment = load_shaders(fragment_shader_paths, fragment_shader_paths_length),
.vertex_length = vertex_shader_paths_length,
.fragment_length = fragment_shader_paths_length,
};
void * rmmio = map_pci_resource2();
int fd = open("/dev/dri/card0", O_RDWR | O_CLOEXEC);
assert(fd != -1);
const int colorbuffer_size = 1600 * 1200 * 4;
int colorbuffer_handle[2];
int zbuffer_handle;
int * texturebuffer_handle;
int flush_handle;
void * colorbuffer_ptr[2];
void * zbuffer_ptr;
// colorbuffer
colorbuffer_handle[0] = create_buffer(fd, colorbuffer_size, &colorbuffer_ptr[0]);
colorbuffer_handle[1] = create_buffer(fd, colorbuffer_size, &colorbuffer_ptr[1]);
zbuffer_handle = create_buffer(fd, colorbuffer_size, &zbuffer_ptr);
flush_handle = create_flush_buffer(fd);
texturebuffer_handle = load_textures(fd, textures, textures_length);
fprintf(stderr, "colorbuffer handle[0] %d\n", colorbuffer_handle[0]);
fprintf(stderr, "colorbuffer handle[1] %d\n", colorbuffer_handle[1]);
fprintf(stderr, "zbuffer handle %d\n", zbuffer_handle);
int colorbuffer_ix = 0;
float theta = PI * 0.5;
while (true) {
int ib_dwords = indirect_buffer(shaders, theta);
drm_radeon_cs(fd,
colorbuffer_handle[colorbuffer_ix],
zbuffer_handle,
flush_handle,
texturebuffer_handle,
textures_length,
ib_dwords);
primary_surface_address(rmmio, colorbuffer_ix);
// next state
theta += 0.01f;
colorbuffer_ix = (colorbuffer_ix + 1) & 1;
}
{
printf("colorbuffer0.data\n");
int out_fd = open("colorbuffer0.data", O_RDWR|O_CREAT, 0644);
assert(out_fd >= 0);
ssize_t write_length = write(out_fd, colorbuffer_ptr[0], colorbuffer_size);
assert(write_length == colorbuffer_size);
close(out_fd);
}
{
printf("zbuffer.data\n");
int out_fd = open("zbuffer.data", O_RDWR|O_CREAT, 0644);
assert(out_fd >= 0);
ssize_t write_length = write(out_fd, zbuffer_ptr, colorbuffer_size);
assert(write_length == colorbuffer_size);
close(out_fd);
}
close(fd);
}

View File

@ -0,0 +1,698 @@
#include <assert.h>
#include <errno.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <stdlib.h>
#include <math.h>
#include <fcntl.h>
#include <sys/mman.h>
#include "r500/3d_registers.h"
#include "r500/3d_registers_undocumented.h"
#include "r500/3d_registers_bits.h"
#include "r500/indirect_buffer.h"
#include "r500/shader.h"
#include "r500/display_controller.h"
#include "drm/buffer.h"
#include "drm/drm.h"
#include "math/float_types.hpp"
#include "math/transform.hpp"
#include "math/constants.hpp"
#include "../model/model2.h"
#include "../model/plane.h"
#define CLEAR_SHADER 0
#define PLANE_SHADER 1
#define PARTICLE_SHADER 2
#define TEXTURE_TILE_SHADER 3
const char * vertex_shader_paths[] = {
"clear.vs.bin",
"particle_plane.vs.bin",
"particle_particle_animated.vs.bin",
"texture_tile.vs.bin",
};
const int vertex_shader_paths_length = (sizeof (vertex_shader_paths)) / (sizeof (vertex_shader_paths[0]));
const char * fragment_shader_paths[] = {
"clear.fs.bin",
"particle_plane.fs.bin",
"particle_particle.fs.bin",
"texture_tile.fs.bin",
};
const int fragment_shader_paths_length = (sizeof (fragment_shader_paths)) / (sizeof (fragment_shader_paths[0]));
#define PLANE_TEXTURE 0
#define PARTICLE_TEXTURE 1
const char * textures[] = {
"../texture/plane_32x32_rgba8888.data",
"../texture/particle_32x32_rgba8888.data",
};
const int textures_length = (sizeof (textures)) / (sizeof (textures[0]));
struct shaders {
struct shader_offset * vertex;
struct shader_offset * fragment;
int vertex_length;
int fragment_length;
};
static inline uint32_t xorshift32(uint32_t state)
{
uint32_t x = state;
x ^= x << 13;
x ^= x >> 17;
x ^= x << 5;
return x;
}
static inline float xorshift32f(uint32_t& state)
{
state = xorshift32(state);
return (float)(state & 0xffffff) * (1.0f / 16777215.0f);
}
const float max_age = 3.0f;
struct particle {
vec3 position;
float time;
float delta;
vec3 velocity;
};
void _3d_clear(const shaders& shaders)
{
ib_rs_instructions(0);
//////////////////////////////////////////////////////////////////////////////
// VAP OUT
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_OUT_VTX_FMT_0
, VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1));
T0V(VAP_OUT_VTX_FMT_1
, 0);
//
ib_zbuffer(ZBUFFER_RELOC_INDEX, 1600, 7); // always
ib_texture__0();
ib_vap_stream_cntl__2();
// shaders
T0V(US_PIXSIZE
, US_PIXSIZE__PIX_SIZE(1)
);
ib_ga_us(&shaders.fragment[CLEAR_SHADER]);
ib_vap_pvs(&shaders.vertex[CLEAR_SHADER]);
//////////////////////////////////////////////////////////////////////////////
// VAP INDEX
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_INDEX_OFFSET, 0);
T0V(VAP_VF_MAX_VTX_INDX
, VAP_VF_MAX_VTX_INDX__MAX_INDX(0)
);
T0V(VAP_VF_MIN_VTX_INDX
, VAP_VF_MIN_VTX_INDX__MIN_INDX(0)
);
//////////////////////////////////////////////////////////////////////////////
// VAP
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_CLIP_CNTL
, VAP_CLIP_CNTL__CLIP_DISABLE(1)
);
T0V(VAP_VTE_CNTL
, VAP_VTE_CNTL__VTX_XY_FMT(1) // disable W division
| VAP_VTE_CNTL__VTX_Z_FMT(1) // disable W division
);
T0V(VAP_CNTL_STATUS
, VAP_CNTL_STATUS__PVS_BYPASS(0)
);
//////////////////////////////////////////////////////////////////////////////
// GA POINT SIZE
//////////////////////////////////////////////////////////////////////////////
T0V(GA_POINT_SIZE
, GA_POINT_SIZE__HEIGHT(600 * 12)
| GA_POINT_SIZE__WIDTH(800 * 12)
);
//////////////////////////////////////////////////////////////////////////////
// 3D_DRAW
//////////////////////////////////////////////////////////////////////////////
const int dwords_per_vtx = 2;
T0V(VAP_VTX_SIZE
, VAP_VTX_SIZE__DWORDS_PER_VTX(dwords_per_vtx)
);
const float center[] = {
800.0f, 600.0f,
};
const int vertex_count = 1;
T3(_3D_DRAW_IMMD_2, (1 + vertex_count * dwords_per_vtx) - 1);
TU( VAP_VF_CNTL__PRIM_TYPE(1) // point list
| VAP_VF_CNTL__PRIM_WALK(3)
| VAP_VF_CNTL__INDEX_SIZE(0)
| VAP_VF_CNTL__VTX_REUSE_DIS(0)
| VAP_VF_CNTL__DUAL_INDEX_MODE(0)
| VAP_VF_CNTL__USE_ALT_NUM_VERTS(0)
| VAP_VF_CNTL__NUM_VERTICES(vertex_count)
);
for (int i = 0; i < 2; i++) {
TF(center[i]);
}
}
mat4x4 perspective(float low1, float high1,
float low2, float high2,
float low3, float high3)
{
float scale2 = (high2 - low2) / (high1 - low1);
float scale3 = (high3 - low3) / (high1 - low1);
mat4x4 m1 = mat4x4(1, 0, 0, 0,
0, 1, 0, 0,
0, 0, 1, -low1,
0, 0, 0, 1
);
mat4x4 m2 = mat4x4(1, 0, 0, 0,
0, 1, 0, 0,
0, 0, scale2, low2,
0, 0, scale3, low3
);
return m2 * m1;
}
void _3d_plane_inner(mat4x4 trans)
{
//////////////////////////////////////////////////////////////////////////////
// 3D_DRAW
//////////////////////////////////////////////////////////////////////////////
const model * model = &plane_model;
const object * obj = model->object[0];
const int triangle_count = obj->triangle_count;
const int vertex_count = triangle_count * 3;
const int dwords_per_vtx = 5;
T0V(VAP_VTX_SIZE
, VAP_VTX_SIZE__DWORDS_PER_VTX(dwords_per_vtx)
);
T3(_3D_DRAW_IMMD_2, (1 + vertex_count * dwords_per_vtx) - 1);
TU( VAP_VF_CNTL__PRIM_TYPE(4)
| VAP_VF_CNTL__PRIM_WALK(3)
| VAP_VF_CNTL__INDEX_SIZE(0)
| VAP_VF_CNTL__VTX_REUSE_DIS(0)
| VAP_VF_CNTL__DUAL_INDEX_MODE(0)
| VAP_VF_CNTL__USE_ALT_NUM_VERTS(0)
| VAP_VF_CNTL__NUM_VERTICES(vertex_count)
);
for (int i = 0; i < triangle_count; i++) {
for (int j = 0; j < 3; j++) {
vec3 p = model->position[obj->triangle[i][j].position];
vec2 t = model->texture[obj->triangle[i][j].texture];
TF(p.x);
TF(p.y);
TF(p.z);
TF(t.x);
TF(t.y);
}
}
}
void _3d_plane(const shaders& shaders,
const mat4x4& world_to_clip,
float theta)
{
ib_rs_instructions(1);
//////////////////////////////////////////////////////////////////////////////
// VAP OUT
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_OUT_VTX_FMT_0
, VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1));
T0V(VAP_OUT_VTX_FMT_1
, VAP_OUT_VTX_FMT_1__TEX_0_COMP_CNT(4));
//
ib_zbuffer(ZBUFFER_RELOC_INDEX, 1600, 1); // less
int width = 32;
int height = 32;
int macrotile = 0;
int microtile = 0;
int clamp = 0; // wrap/repeat
ib_texture__1(TEXTUREBUFFER_RELOC_INDEX + PLANE_TEXTURE,
width, height,
macrotile, microtile,
clamp);
ib_vap_stream_cntl__32();
// shaders
T0V(US_PIXSIZE
, US_PIXSIZE__PIX_SIZE(4)
);
ib_ga_us(&shaders.fragment[PLANE_SHADER]);
ib_vap_pvs(&shaders.vertex[PLANE_SHADER]);
//////////////////////////////////////////////////////////////////////////////
// VAP
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_CLIP_CNTL
, VAP_CLIP_CNTL__PS_UCP_MODE(3)
);
T0V(VAP_VTE_CNTL
, VAP_VTE_CNTL__VPORT_X_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_X_OFFSET_ENA(1)
| VAP_VTE_CNTL__VPORT_Y_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_Y_OFFSET_ENA(1)
| VAP_VTE_CNTL__VPORT_Z_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_Z_OFFSET_ENA(1)
| VAP_VTE_CNTL__VTX_XY_FMT(0) // enable W division
| VAP_VTE_CNTL__VTX_Z_FMT(0) // enable W division
| VAP_VTE_CNTL__VTX_W0_FMT(1)
| VAP_VTE_CNTL__SERIAL_PROC_ENA(0)
);
T0V(VAP_CNTL_STATUS
, VAP_CNTL_STATUS__PVS_BYPASS(0)
);
//////////////////////////////////////////////////////////////////////////////
// matrix
//////////////////////////////////////////////////////////////////////////////
mat4x4 s = scale(1.0f);
mat4x4 rx = rotate_x(-PI / 2.0f);
mat4x4 local_to_world = s * rx;
mat4x4 trans = world_to_clip * local_to_world;
//////////////////////////////////////////////////////////////////////////////
// consts
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_PVS_STATE_FLUSH_REG, 0x00000000);
const float consts[] = {
// 0
trans[0][0], trans[0][1], trans[0][2], trans[0][3],
trans[1][0], trans[1][1], trans[1][2], trans[1][3],
trans[2][0], trans[2][1], trans[2][2], trans[2][3],
trans[3][0], trans[3][1], trans[3][2], trans[3][3],
};
ib_vap_pvs_const_cntl(consts, (sizeof (consts)));
// plane_inner
_3d_plane_inner(trans);
}
void _3d_particle(const shaders& shaders,
const mat4x4& world_to_clip,
const mat4x4& world_to_view,
const particle * particles,
const int particles_length,
const float theta)
{
// enable blending
T0V(RB3D_BLENDCNTL
, RB3D_BLENDCNTL__ALPHA_BLEND_ENABLE__ENABLE
| RB3D_BLENDCNTL__READ_ENABLE(1)
| RB3D_BLENDCNTL__SRCBLEND__GL_ONE
| RB3D_BLENDCNTL__DESTBLEND__GL_ONE
| RB3D_BLENDCNTL__SRC_ALPHA_0_NO_READ(0)
| RB3D_BLENDCNTL__SRC_ALPHA_1_NO_READ(0)
);
ib_rs_instructions(1);
//////////////////////////////////////////////////////////////////////////////
// VAP OUT
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_OUT_VTX_FMT_0
, VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1));
T0V(VAP_OUT_VTX_FMT_1
, VAP_OUT_VTX_FMT_1__TEX_0_COMP_CNT(4));
//
//ib_zbuffer(ZBUFFER_RELOC_INDEX, 1600, 1); // less
T0V(ZB_CNTL
, 0
);
T0V(ZB_ZSTENCILCNTL
, 0
);
int width = 32;
int height = 32;
int macrotile = 0;
int microtile = 0;
int clamp = 0; // wrap/repeat
ib_texture__1(TEXTUREBUFFER_RELOC_INDEX + PARTICLE_TEXTURE,
width, height,
macrotile, microtile,
clamp);
ib_vap_stream_cntl__32();
// shaders
T0V(US_PIXSIZE
, US_PIXSIZE__PIX_SIZE(4)
);
ib_ga_us(&shaders.fragment[PARTICLE_SHADER]);
ib_vap_pvs(&shaders.vertex[PARTICLE_SHADER]);
//////////////////////////////////////////////////////////////////////////////
// VAP
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_CLIP_CNTL
, VAP_CLIP_CNTL__PS_UCP_MODE(3)
);
T0V(VAP_VTE_CNTL
, VAP_VTE_CNTL__VPORT_X_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_X_OFFSET_ENA(1)
| VAP_VTE_CNTL__VPORT_Y_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_Y_OFFSET_ENA(1)
| VAP_VTE_CNTL__VPORT_Z_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_Z_OFFSET_ENA(1)
| VAP_VTE_CNTL__VTX_XY_FMT(0) // enable W division
| VAP_VTE_CNTL__VTX_Z_FMT(0) // enable W division
| VAP_VTE_CNTL__VTX_W0_FMT(1)
| VAP_VTE_CNTL__SERIAL_PROC_ENA(0)
);
T0V(VAP_CNTL_STATUS
, VAP_CNTL_STATUS__PVS_BYPASS(0)
);
//////////////////////////////////////////////////////////////////////////////
// matrix
//////////////////////////////////////////////////////////////////////////////
mat4x4 s = scale(1.0f);
mat4x4 local_to_world = s;
mat4x4 local_to_view = world_to_view * local_to_world;
mat4x4 local_to_clip = world_to_clip * local_to_world;
//////////////////////////////////////////////////////////////////////////////
// consts
//////////////////////////////////////////////////////////////////////////////
for (int i = 0; i < particles_length; i++) {
T0V(VAP_PVS_STATE_FLUSH_REG, 0x00000000);
const vec3 position = particles[i].position;
// 6: position
const float scale = 0.005f;
//const float position_consts[] = { position.x, position.y, position.z, scale };
//ib_vap_pvs_const_offset(position_consts, (sizeof (position_consts)), 6);
const float consts[] = {
// 0: local space to clip space transformation matrix
local_to_clip[0][0], local_to_clip[0][1], local_to_clip[0][2], local_to_clip[0][3],
local_to_clip[1][0], local_to_clip[1][1], local_to_clip[1][2], local_to_clip[1][3],
local_to_clip[2][0], local_to_clip[2][1], local_to_clip[2][2], local_to_clip[2][3],
local_to_clip[3][0], local_to_clip[3][1], local_to_clip[3][2], local_to_clip[3][3],
// 4: dx ("right" change of basis vector)
local_to_view[0][0], local_to_view[0][1], local_to_view[0][2], 0,
// 5: dy ("up" change of basis vector)
local_to_view[1][0], local_to_view[1][1], local_to_view[1][2], 0,
// 6: particle position, scale
position.x, position.y, position.z, scale
};
ib_vap_pvs_const_cntl(consts, (sizeof (consts)));
// plane_inner
_3d_plane_inner(local_to_clip);
}
}
int indirect_buffer(const shaders& shaders,
const particle * particles,
const int particles_length,
float theta)
{
int width = 1600;
int height = 1200;
int pitch = width;
ib_ix = 0;
ib_generic_initialization();
T0V(RB3D_BLENDCNTL, 0);
T0V(RB3D_ABLENDCNTL, 0);
ib_viewport(width, height);
ib_colorbuffer(COLORBUFFER_RELOC_INDEX, pitch, 0, 0);
T0V(GB_ENABLE, 0);
T0V(US_OUT_FMT_0
, US_OUT_FMT__OUT_FMT(0) // C4_8
| US_OUT_FMT__C0_SEL__BLUE
| US_OUT_FMT__C1_SEL__GREEN
| US_OUT_FMT__C2_SEL__RED
| US_OUT_FMT__C3_SEL__ALPHA
| US_OUT_FMT__OUT_SIGN(0)
);
T0V(US_OUT_FMT_1
, US_OUT_FMT__OUT_FMT(15) // render target is not used
);
T0V(US_OUT_FMT_2
, US_OUT_FMT__OUT_FMT(15) // render target is not used
);
T0V(US_OUT_FMT_2
, US_OUT_FMT__OUT_FMT(15) // render target is not used
);
load_pvs_shaders(shaders.vertex, shaders.vertex_length);
load_us_shaders(shaders.fragment, shaders.fragment_length);
//////////////////////////////////////////////////////////////////////////////
// DRAW
//////////////////////////////////////////////////////////////////////////////
mat4x4 aspect = scale(vec3(3.0f/4.0f, 1, 1));
mat4x4 p = perspective(0.01f, 3.0f,
0.001f, 0.999f,
1.0f, 3.0f);
mat4x4 t = translate(vec3(0, 0, 1));
mat4x4 rx = rotate_x(-PI / 8.0f);
mat4x4 ry = rotate_y(theta * 0.8f);
mat4x4 world_to_view = t * rx * ry;
mat4x4 world_to_clip = aspect * p * world_to_view;
_3d_clear(shaders);
_3d_plane(shaders, world_to_clip, theta);
_3d_particle(shaders,
world_to_clip,
world_to_view,
particles,
particles_length,
theta);
//////////////////////////////////////////////////////////////////////////////
// padding
//////////////////////////////////////////////////////////////////////////////
while ((ib_ix % 8) != 0) {
TU(0x80000000);
}
assert((unsigned int)ib_ix < (sizeof (ib)) / (sizeof (ib[0])));
return ib_ix;
}
void reset_particle(particle& p)
{
//vec3 pos = normalize(p.position);
p.position = normalize(vec3(p.velocity.x,
0,
p.velocity.z)) * 20.0f;
//printf("position %f %f %f\n", p.position.x, p.position.y, p.position.z);
p.velocity = vec3(p.velocity.x,
2.0f * p.delta,
p.velocity.z);
//printf("velocity %f %f %f\n\n", p.velocity.x, p.velocity.y, p.velocity.z);
}
void init_particles(particle * particles, const int particles_length)
{
uint32_t state = 0x12345678;
const float rl = 1.0f / (float)(particles_length);
for (int i = 0; i < particles_length; i++) {
float fi = ((float)i);
float sx = xorshift32f(state) * 2.0f - 1.0f;
float sy = xorshift32f(state) * 2.0f - 1.0f;
float sz = xorshift32f(state) * 2.0f - 1.0f;
float delta = xorshift32f(state) * 0.5f + 0.5f;
float vx = xorshift32f(state) * 2.0f - 1.0f;
float vz = xorshift32f(state) * 2.0f - 1.0f;
particles[i].time = max_age * sinf(fi * rl * 2) * 0.5f + 0.5f;
particles[i].delta = delta;
particles[i].position.x = sx;
particles[i].position.y = sy;
particles[i].position.z = sz;
particles[i].velocity = normalize(vec3(vx * 0.5f, 0.0f, vz * 0.5f));
}
}
int main()
{
struct shaders shaders = {
.vertex = load_shaders(vertex_shader_paths, vertex_shader_paths_length),
.fragment = load_shaders(fragment_shader_paths, fragment_shader_paths_length),
.vertex_length = vertex_shader_paths_length,
.fragment_length = fragment_shader_paths_length,
};
void * rmmio = map_pci_resource2();
int fd = open("/dev/dri/card0", O_RDWR | O_CLOEXEC);
assert(fd != -1);
const int colorbuffer_size = 1600 * 1200 * 4;
int colorbuffer_handle[2];
int zbuffer_handle;
int * texturebuffer_handle;
int flush_handle;
void * colorbuffer_ptr[2];
void * zbuffer_ptr;
// colorbuffer
colorbuffer_handle[0] = create_buffer(fd, colorbuffer_size, &colorbuffer_ptr[0]);
colorbuffer_handle[1] = create_buffer(fd, colorbuffer_size, &colorbuffer_ptr[1]);
zbuffer_handle = create_buffer(fd, colorbuffer_size, &zbuffer_ptr);
flush_handle = create_flush_buffer(fd);
texturebuffer_handle = load_textures(fd, textures, textures_length);
fprintf(stderr, "colorbuffer handle[0] %d\n", colorbuffer_handle[0]);
fprintf(stderr, "colorbuffer handle[1] %d\n", colorbuffer_handle[1]);
fprintf(stderr, "zbuffer handle %d\n", zbuffer_handle);
int colorbuffer_ix = 0;
float theta = PI * 0.5;
particle particles[1000] = {};
const int particles_length = (sizeof (particles)) / (sizeof (particles[0]));
init_particles(particles, particles_length);
while (true) {
int ib_dwords = indirect_buffer(shaders,
particles,
particles_length,
theta);
int ret = drm_radeon_cs(fd,
colorbuffer_handle[colorbuffer_ix],
zbuffer_handle,
flush_handle,
texturebuffer_handle,
textures_length,
ib_dwords);
if (ret == -1)
break;
primary_surface_address(rmmio, colorbuffer_ix);
// next state
theta += 0.01f;
colorbuffer_ix = (colorbuffer_ix + 1) & 1;
//
// update particles
//
for (int i = 0; i < particles_length; i++) {
if (particles[i].time <= 0) {
particles[i].time += max_age;
reset_particle(particles[i]);
} else {
particles[i].time -= 0.01f;
particles[i].position += vec3(particles[i].velocity.x * 0.9f,
particles[i].velocity.y * 5.0f,
particles[i].velocity.z * 0.9f);
particles[i].velocity += vec3(0, -0.04, 0);
if (particles[i].position.y < 0) {
particles[i].position.y = fabsf(particles[i].position.y);
particles[i].velocity.y *= -0.6f;
}
}
}
}
{
printf("colorbuffer0.data\n");
int out_fd = open("colorbuffer0.data", O_RDWR|O_CREAT, 0644);
assert(out_fd >= 0);
ssize_t write_length = write(out_fd, colorbuffer_ptr[0], colorbuffer_size);
assert(write_length == colorbuffer_size);
close(out_fd);
}
{
printf("zbuffer.data\n");
int out_fd = open("zbuffer.data", O_RDWR|O_CREAT, 0644);
assert(out_fd >= 0);
ssize_t write_length = write(out_fd, zbuffer_ptr, colorbuffer_size);
assert(write_length == colorbuffer_size);
close(out_fd);
}
close(fd);
}

View File

@ -0,0 +1,774 @@
#include <assert.h>
#include <errno.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <stdlib.h>
#include <math.h>
#include <fcntl.h>
#include <sys/mman.h>
#include "r500/3d_registers.h"
#include "r500/3d_registers_undocumented.h"
#include "r500/3d_registers_bits.h"
#include "r500/indirect_buffer.h"
#include "r500/shader.h"
#include "r500/display_controller.h"
#include "drm/buffer.h"
#include "drm/drm.h"
#include "math/float_types.hpp"
#include "math/transform.hpp"
#include "math/constants.hpp"
#include "../model/model2.h"
#define CLEAR_SHADER 0
#define PLANE_SHADER 1
#define PARTICLE_SHADER 2
#define TEXTURE_TILE_SHADER 3
const char * vertex_shader_paths[] = {
"clear.vs.bin",
"particle_plane_fan.vs.bin",
"particle_particle_animated_fan.vs.bin",
"texture_tile.vs.bin",
};
const int vertex_shader_paths_length = (sizeof (vertex_shader_paths)) / (sizeof (vertex_shader_paths[0]));
const char * fragment_shader_paths[] = {
"clear.fs.bin",
"particle_plane.fs.bin",
"particle_particle.fs.bin",
"texture_tile.fs.bin",
};
const int fragment_shader_paths_length = (sizeof (fragment_shader_paths)) / (sizeof (fragment_shader_paths[0]));
#define PLANE_TEXTURE 0
#define PARTICLE_TEXTURE 1
const char * textures[] = {
"../texture/plane_32x32_rgba8888.data",
"../texture/particle_32x32_rgba8888.data",
};
const int textures_length = (sizeof (textures)) / (sizeof (textures[0]));
struct shaders {
struct shader_offset * vertex;
struct shader_offset * fragment;
int vertex_length;
int fragment_length;
};
static inline uint32_t xorshift32(uint32_t state)
{
uint32_t x = state;
x ^= x << 13;
x ^= x >> 17;
x ^= x << 5;
return x;
}
static inline float xorshift32f(uint32_t& state)
{
state = xorshift32(state);
return (float)(state & 0xffffff) * (1.0f / 16777215.0f);
}
const float max_age = 3.0f;
struct particle {
vec3 position;
float time;
float delta;
vec3 velocity;
};
void _3d_clear(const shaders& shaders)
{
ib_rs_instructions(0);
//////////////////////////////////////////////////////////////////////////////
// VAP OUT
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_OUT_VTX_FMT_0
, VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1));
T0V(VAP_OUT_VTX_FMT_1
, 0);
//
ib_zbuffer(ZBUFFER_RELOC_INDEX, 1600, 7); // always
ib_texture__0();
ib_vap_stream_cntl__2();
// shaders
T0V(US_PIXSIZE
, US_PIXSIZE__PIX_SIZE(1)
);
ib_ga_us(&shaders.fragment[CLEAR_SHADER]);
ib_vap_pvs(&shaders.vertex[CLEAR_SHADER]);
//////////////////////////////////////////////////////////////////////////////
// VAP INDEX
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_INDEX_OFFSET, 0);
T0V(VAP_VF_MAX_VTX_INDX
, VAP_VF_MAX_VTX_INDX__MAX_INDX(0)
);
T0V(VAP_VF_MIN_VTX_INDX
, VAP_VF_MIN_VTX_INDX__MIN_INDX(0)
);
//////////////////////////////////////////////////////////////////////////////
// VAP
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_CLIP_CNTL
, VAP_CLIP_CNTL__CLIP_DISABLE(1)
);
T0V(VAP_VTE_CNTL
, VAP_VTE_CNTL__VTX_XY_FMT(1) // disable W division
| VAP_VTE_CNTL__VTX_Z_FMT(1) // disable W division
);
T0V(VAP_CNTL_STATUS
, VAP_CNTL_STATUS__PVS_BYPASS(0)
);
//////////////////////////////////////////////////////////////////////////////
// GA POINT SIZE
//////////////////////////////////////////////////////////////////////////////
T0V(GA_POINT_SIZE
, GA_POINT_SIZE__HEIGHT(600 * 12)
| GA_POINT_SIZE__WIDTH(800 * 12)
);
//////////////////////////////////////////////////////////////////////////////
// 3D_DRAW
//////////////////////////////////////////////////////////////////////////////
const int dwords_per_vtx = 2;
T0V(VAP_VTX_SIZE
, VAP_VTX_SIZE__DWORDS_PER_VTX(dwords_per_vtx)
);
const float center[] = {
800.0f, 600.0f,
};
const int vertex_count = 1;
T3(_3D_DRAW_IMMD_2, (1 + vertex_count * dwords_per_vtx) - 1);
TU( VAP_VF_CNTL__PRIM_TYPE(1) // point list
| VAP_VF_CNTL__PRIM_WALK(3)
| VAP_VF_CNTL__INDEX_SIZE(0)
| VAP_VF_CNTL__VTX_REUSE_DIS(0)
| VAP_VF_CNTL__DUAL_INDEX_MODE(0)
| VAP_VF_CNTL__USE_ALT_NUM_VERTS(0)
| VAP_VF_CNTL__NUM_VERTICES(vertex_count)
);
for (int i = 0; i < 2; i++) {
TF(center[i]);
}
}
mat4x4 perspective(float low1, float high1,
float low2, float high2,
float low3, float high3)
{
float scale2 = (high2 - low2) / (high1 - low1);
float scale3 = (high3 - low3) / (high1 - low1);
mat4x4 m1 = mat4x4(1, 0, 0, 0,
0, 1, 0, 0,
0, 0, 1, -low1,
0, 0, 0, 1
);
mat4x4 m2 = mat4x4(1, 0, 0, 0,
0, 1, 0, 0,
0, 0, scale2, low2,
0, 0, scale3, low3
);
return m2 * m1;
}
void _3d_plane_inner()
{
//////////////////////////////////////////////////////////////////////////////
// 3D_DRAW
//////////////////////////////////////////////////////////////////////////////
const int dwords_per_vtx = 2;
T0V(VAP_VTX_SIZE
, VAP_VTX_SIZE__DWORDS_PER_VTX(dwords_per_vtx)
);
const vec2 vertices[] = {
{0.0, 0.0f},
{1.0, 0.0f},
{1.0, 1.0f},
{0.0, 1.0f},
};
const int vertex_count = 4;
T3(_3D_DRAW_IMMD_2, (1 + vertex_count * dwords_per_vtx) - 1);
TU( VAP_VF_CNTL__PRIM_TYPE(5) // triangle fan
| VAP_VF_CNTL__PRIM_WALK(3)
| VAP_VF_CNTL__INDEX_SIZE(0)
| VAP_VF_CNTL__VTX_REUSE_DIS(0)
| VAP_VF_CNTL__DUAL_INDEX_MODE(0)
| VAP_VF_CNTL__USE_ALT_NUM_VERTS(0)
| VAP_VF_CNTL__NUM_VERTICES(vertex_count)
);
for (int i = 0; i < vertex_count; i++) {
TF(vertices[i].x);
TF(vertices[i].y);
}
}
void _3d_particle_inner()
{
T0V(VAP_VF_MAX_VTX_INDX
, VAP_VF_MAX_VTX_INDX__MAX_INDX(3)
);
T0V(VAP_VF_MIN_VTX_INDX
, VAP_VF_MIN_VTX_INDX__MIN_INDX(0)
);
//////////////////////////////////////////////////////////////////////////////
// AOS
//////////////////////////////////////////////////////////////////////////////
T3(_3D_LOAD_VBPNTR, (3 - 1));
TU( // VAP_VTX_NUM_ARRAYS
VAP_VTX_NUM_ARRAYS__VTX_NUM_ARRAYS(1)
| VAP_VTX_NUM_ARRAYS__VC_FORCE_PREFETCH(1)
);
TU( // VAP_VTX_AOS_ATTR01
VAP_VTX_AOS_ATTR__VTX_AOS_COUNT0(2)
| VAP_VTX_AOS_ATTR__VTX_AOS_STRIDE0(2)
);
TU( // VAP_VTX_AOS_ADDR0
(4 * 0);
);
T3(_NOP, 0);
TU(VERTEXBUFFER_RELOC_INDEX * 4); // index into relocs array for VAP_VTX_AOS_ADDR0
//////////////////////////////////////////////////////////////////////////////
// 3D_DRAW
//////////////////////////////////////////////////////////////////////////////
const int vertex_count = 4;
T3(_3D_DRAW_VBUF_2, (1 - 1));
TU( VAP_VF_CNTL__PRIM_TYPE(5) // triangle fan
| VAP_VF_CNTL__PRIM_WALK(2) // vertex list (data fetched from memory)
| VAP_VF_CNTL__INDEX_SIZE(0)
| VAP_VF_CNTL__VTX_REUSE_DIS(0)
| VAP_VF_CNTL__DUAL_INDEX_MODE(0)
| VAP_VF_CNTL__USE_ALT_NUM_VERTS(0)
| VAP_VF_CNTL__NUM_VERTICES(vertex_count)
);
}
void _3d_plane(const shaders& shaders,
const mat4x4& world_to_clip,
float theta)
{
ib_rs_instructions(1);
//////////////////////////////////////////////////////////////////////////////
// VAP OUT
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_OUT_VTX_FMT_0
, VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1));
T0V(VAP_OUT_VTX_FMT_1
, VAP_OUT_VTX_FMT_1__TEX_0_COMP_CNT(4));
//
ib_zbuffer(ZBUFFER_RELOC_INDEX, 1600, 1); // less
int width = 32;
int height = 32;
int macrotile = 0;
int microtile = 0;
int clamp = 0; // wrap/repeat
ib_texture__1(TEXTUREBUFFER_RELOC_INDEX + PLANE_TEXTURE,
width, height,
macrotile, microtile,
clamp);
ib_vap_stream_cntl__2();
// shaders
T0V(US_PIXSIZE
, US_PIXSIZE__PIX_SIZE(4)
);
ib_ga_us(&shaders.fragment[PLANE_SHADER]);
ib_vap_pvs(&shaders.vertex[PLANE_SHADER]);
//////////////////////////////////////////////////////////////////////////////
// VAP
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_CLIP_CNTL
, VAP_CLIP_CNTL__PS_UCP_MODE(3)
);
T0V(VAP_VTE_CNTL
, VAP_VTE_CNTL__VPORT_X_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_X_OFFSET_ENA(1)
| VAP_VTE_CNTL__VPORT_Y_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_Y_OFFSET_ENA(1)
| VAP_VTE_CNTL__VPORT_Z_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_Z_OFFSET_ENA(1)
| VAP_VTE_CNTL__VTX_XY_FMT(0) // enable W division
| VAP_VTE_CNTL__VTX_Z_FMT(0) // enable W division
| VAP_VTE_CNTL__VTX_W0_FMT(1)
| VAP_VTE_CNTL__SERIAL_PROC_ENA(0)
);
T0V(VAP_CNTL_STATUS
, VAP_CNTL_STATUS__PVS_BYPASS(0)
);
//////////////////////////////////////////////////////////////////////////////
// matrix
//////////////////////////////////////////////////////////////////////////////
mat4x4 s = scale(1.0f);
mat4x4 rx = rotate_x(-PI / 2.0f);
mat4x4 local_to_world = s * rx;
mat4x4 trans = world_to_clip * local_to_world;
//////////////////////////////////////////////////////////////////////////////
// consts
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_PVS_STATE_FLUSH_REG, 0x00000000);
const float consts[] = {
// 0
trans[0][0], trans[0][1], trans[0][2], trans[0][3],
trans[1][0], trans[1][1], trans[1][2], trans[1][3],
trans[2][0], trans[2][1], trans[2][2], trans[2][3],
trans[3][0], trans[3][1], trans[3][2], trans[3][3],
// 4
-2.0f, 0, 0, 0,
};
ib_vap_pvs_const_cntl(consts, (sizeof (consts)));
// plane_inner
_3d_plane_inner();
}
void _3d_particle(const shaders& shaders,
const mat4x4& world_to_clip,
const mat4x4& world_to_view,
const particle * particles,
const int particles_length,
const float theta)
{
// enable blending
T0V(RB3D_BLENDCNTL
, RB3D_BLENDCNTL__ALPHA_BLEND_ENABLE__ENABLE
| RB3D_BLENDCNTL__READ_ENABLE(1)
| RB3D_BLENDCNTL__SRCBLEND__GL_ONE
| RB3D_BLENDCNTL__DESTBLEND__GL_ONE
| RB3D_BLENDCNTL__SRC_ALPHA_0_NO_READ(0)
| RB3D_BLENDCNTL__SRC_ALPHA_1_NO_READ(0)
);
ib_rs_instructions(1);
//////////////////////////////////////////////////////////////////////////////
// VAP OUT
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_OUT_VTX_FMT_0
, VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1));
T0V(VAP_OUT_VTX_FMT_1
, VAP_OUT_VTX_FMT_1__TEX_0_COMP_CNT(4));
//
//ib_zbuffer(ZBUFFER_RELOC_INDEX, 1600, 1); // less
T0V(ZB_CNTL
, 0
);
T0V(ZB_ZSTENCILCNTL
, 0
);
int width = 32;
int height = 32;
int macrotile = 0;
int microtile = 0;
int clamp = 0; // wrap/repeat
ib_texture__1(TEXTUREBUFFER_RELOC_INDEX + PARTICLE_TEXTURE,
width, height,
macrotile, microtile,
clamp);
ib_vap_stream_cntl__2();
// shaders
T0V(US_PIXSIZE
, US_PIXSIZE__PIX_SIZE(4)
);
ib_ga_us(&shaders.fragment[PARTICLE_SHADER]);
ib_vap_pvs(&shaders.vertex[PARTICLE_SHADER]);
//////////////////////////////////////////////////////////////////////////////
// VAP
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_CLIP_CNTL
, VAP_CLIP_CNTL__PS_UCP_MODE(3)
);
T0V(VAP_VTE_CNTL
, VAP_VTE_CNTL__VPORT_X_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_X_OFFSET_ENA(1)
| VAP_VTE_CNTL__VPORT_Y_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_Y_OFFSET_ENA(1)
| VAP_VTE_CNTL__VPORT_Z_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_Z_OFFSET_ENA(1)
| VAP_VTE_CNTL__VTX_XY_FMT(0) // enable W division
| VAP_VTE_CNTL__VTX_Z_FMT(0) // enable W division
| VAP_VTE_CNTL__VTX_W0_FMT(1)
| VAP_VTE_CNTL__SERIAL_PROC_ENA(0)
);
T0V(VAP_CNTL_STATUS
, VAP_CNTL_STATUS__PVS_BYPASS(0)
);
//////////////////////////////////////////////////////////////////////////////
// matrix
//////////////////////////////////////////////////////////////////////////////
mat4x4 s = scale(1.0f);
mat4x4 local_to_world = s;
mat4x4 local_to_view = world_to_view * local_to_world;
mat4x4 trans = world_to_clip * local_to_world;
//////////////////////////////////////////////////////////////////////////////
// consts
//////////////////////////////////////////////////////////////////////////////
for (int i = 0; i < particles_length; i++) {
T0V(VAP_PVS_STATE_FLUSH_REG, 0x00000000);
const vec3 position = particles[i].position;
// 6: position
const float scale = 0.005f;
//const float position_consts[] = { position.x, position.y, position.z, scale };
//ib_vap_pvs_const_offset(position_consts, (sizeof (position_consts)), 6);
const float consts[] = {
// 0
trans[0][0], trans[0][1], trans[0][2], trans[0][3],
trans[1][0], trans[1][1], trans[1][2], trans[1][3],
trans[2][0], trans[2][1], trans[2][2], trans[2][3],
trans[3][0], trans[3][1], trans[3][2], trans[3][3],
// 4: dx (right)
local_to_view[0][0], local_to_view[0][1], local_to_view[0][2], 0,
// 5: dy (up)
local_to_view[1][0], local_to_view[1][1], local_to_view[1][2], 0,
// 6: xyz:position w:scale
position.x, position.y, position.z, scale,
// 7:
-2.0, 0, 0, 0,
};
ib_vap_pvs_const_cntl(consts, (sizeof (consts)));
// plane_inner
//_3d_plane_inner();
_3d_particle_inner();
}
}
int indirect_buffer(const shaders& shaders,
const particle * particles,
const int particles_length,
float theta)
{
int width = 1600;
int height = 1200;
int pitch = width;
ib_ix = 0;
ib_generic_initialization();
T0V(RB3D_BLENDCNTL, 0);
T0V(RB3D_ABLENDCNTL, 0);
ib_viewport(width, height);
ib_colorbuffer(COLORBUFFER_RELOC_INDEX, pitch, 0, 0);
T0V(GB_ENABLE, 0);
T0V(US_OUT_FMT_0
, US_OUT_FMT__OUT_FMT(0) // C4_8
| US_OUT_FMT__C0_SEL__BLUE
| US_OUT_FMT__C1_SEL__GREEN
| US_OUT_FMT__C2_SEL__RED
| US_OUT_FMT__C3_SEL__ALPHA
| US_OUT_FMT__OUT_SIGN(0)
);
T0V(US_OUT_FMT_1
, US_OUT_FMT__OUT_FMT(15) // render target is not used
);
T0V(US_OUT_FMT_2
, US_OUT_FMT__OUT_FMT(15) // render target is not used
);
T0V(US_OUT_FMT_2
, US_OUT_FMT__OUT_FMT(15) // render target is not used
);
load_pvs_shaders(shaders.vertex, shaders.vertex_length);
load_us_shaders(shaders.fragment, shaders.fragment_length);
//////////////////////////////////////////////////////////////////////////////
// DRAW
//////////////////////////////////////////////////////////////////////////////
mat4x4 aspect = scale(vec3(3.0f/4.0f, 1, 1));
mat4x4 p = perspective(0.01f, 3.0f,
0.001f, 0.999f,
1.0f, 3.0f);
mat4x4 t = translate(vec3(0, 0, 1));
mat4x4 rx = rotate_x(-PI / 8.0f);
mat4x4 ry = rotate_y(theta * 0.8f);
mat4x4 world_to_view = t * rx * ry;
mat4x4 world_to_clip = aspect * p * world_to_view;
_3d_clear(shaders);
_3d_plane(shaders, world_to_clip, theta);
_3d_particle(shaders,
world_to_clip,
world_to_view,
particles,
particles_length,
theta);
//////////////////////////////////////////////////////////////////////////////
// padding
//////////////////////////////////////////////////////////////////////////////
while ((ib_ix % 8) != 0) {
TU(0x80000000);
}
assert((unsigned int)ib_ix < (sizeof (ib)) / (sizeof (ib[0])));
return ib_ix;
}
void reset_particle(particle& p)
{
//vec3 pos = normalize(p.position);
p.position = normalize(vec3(p.velocity.x,
0,
p.velocity.z)) * 20.0f;
//printf("position %f %f %f\n", p.position.x, p.position.y, p.position.z);
p.velocity = vec3(p.velocity.x,
2.0f * p.delta,
p.velocity.z);
//printf("velocity %f %f %f\n\n", p.velocity.x, p.velocity.y, p.velocity.z);
}
void init_particles(particle * particles, const int particles_length)
{
uint32_t state = 0x12345678;
const float rl = 1.0f / (float)(particles_length);
for (int i = 0; i < particles_length; i++) {
float fi = ((float)i);
float sx = xorshift32f(state) * 2.0f - 1.0f;
float sy = xorshift32f(state) * 2.0f - 1.0f;
float sz = xorshift32f(state) * 2.0f - 1.0f;
float delta = xorshift32f(state) * 0.5f + 0.5f;
float vx = xorshift32f(state) * 2.0f - 1.0f;
float vz = xorshift32f(state) * 2.0f - 1.0f;
particles[i].time = max_age * sinf(fi * rl * 2) * 0.5f + 0.5f;
particles[i].delta = delta;
particles[i].position.x = sx;
particles[i].position.y = sy;
particles[i].position.z = sz;
particles[i].velocity = normalize(vec3(vx * 0.5f, 0.0f, vz * 0.5f));
}
}
int init_particles_vertexbuffer(int fd, int particles_length)
{
const vec2 vertices[] = {
{0.0, 0.0f},
{1.0, 0.0f},
{1.0, 1.0f},
{0.0, 1.0f},
};
const int vertex_count = 4;
const int size = particles_length * vertex_count * 2 * (sizeof (float));
void * ptr;
int handle = create_buffer(fd, size, &ptr);
float * ptrf = (float*)ptr;
int ix = 0;
for (int j = 0; j < particles_length; j++) {
for (int i = 0; i < vertex_count; i++) {
ptrf[ix++] = vertices[i].x;
ptrf[ix++] = vertices[i].y;
}
}
munmap(ptr, size);
printf("init vertexbuffer %d %d\n", ix, size);
return handle;
}
int main()
{
struct shaders shaders = {
.vertex = load_shaders(vertex_shader_paths, vertex_shader_paths_length),
.fragment = load_shaders(fragment_shader_paths, fragment_shader_paths_length),
.vertex_length = vertex_shader_paths_length,
.fragment_length = fragment_shader_paths_length,
};
void * rmmio = map_pci_resource2();
int fd = open("/dev/dri/card0", O_RDWR | O_CLOEXEC);
assert(fd != -1);
const int colorbuffer_size = 1600 * 1200 * 4;
int colorbuffer_handle[2];
int zbuffer_handle;
int * texturebuffer_handle;
//int flush_handle;
int vertexbuffer_handle;
void * colorbuffer_ptr[2];
void * zbuffer_ptr;
// colorbuffer
colorbuffer_handle[0] = create_buffer(fd, colorbuffer_size, &colorbuffer_ptr[0]);
colorbuffer_handle[1] = create_buffer(fd, colorbuffer_size, &colorbuffer_ptr[1]);
zbuffer_handle = create_buffer(fd, colorbuffer_size, &zbuffer_ptr);
//flush_handle = create_flush_buffer(fd);
texturebuffer_handle = load_textures(fd, textures, textures_length);
fprintf(stderr, "colorbuffer handle[0] %d\n", colorbuffer_handle[0]);
fprintf(stderr, "colorbuffer handle[1] %d\n", colorbuffer_handle[1]);
fprintf(stderr, "zbuffer handle %d\n", zbuffer_handle);
int colorbuffer_ix = 0;
float theta = PI * 0.5;
particle particles[10] = {};
const int particles_length = (sizeof (particles)) / (sizeof (particles[0]));
vertexbuffer_handle = init_particles_vertexbuffer(fd, particles_length);
init_particles(particles, particles_length);
fprintf(stderr, "vertexbuffer handle %d\n", vertexbuffer_handle);
while (true) {
int ib_dwords = indirect_buffer(shaders,
particles,
particles_length,
theta);
int ret = drm_radeon_cs(fd,
colorbuffer_handle[colorbuffer_ix],
zbuffer_handle,
vertexbuffer_handle,
texturebuffer_handle,
textures_length,
ib_dwords);
if (ret == -1)
break;
primary_surface_address(rmmio, colorbuffer_ix);
// next state
theta += 0.01f;
colorbuffer_ix = (colorbuffer_ix + 1) & 1;
//
// update particles
//
for (int i = 0; i < particles_length; i++) {
if (particles[i].time <= 0) {
particles[i].time += max_age;
reset_particle(particles[i]);
} else {
particles[i].time -= 0.01f;
particles[i].position += vec3(particles[i].velocity.x * 0.9f,
particles[i].velocity.y * 5.0f,
particles[i].velocity.z * 0.9f);
particles[i].velocity += vec3(0, -0.04, 0);
if (particles[i].position.y < 0) {
particles[i].position.y = fabsf(particles[i].position.y);
particles[i].velocity.y *= -0.6f;
}
}
}
}
{
printf("colorbuffer0.data\n");
int out_fd = open("colorbuffer0.data", O_RDWR|O_CREAT, 0644);
assert(out_fd >= 0);
ssize_t write_length = write(out_fd, colorbuffer_ptr[0], colorbuffer_size);
assert(write_length == colorbuffer_size);
close(out_fd);
}
{
printf("zbuffer.data\n");
int out_fd = open("zbuffer.data", O_RDWR|O_CREAT, 0644);
assert(out_fd >= 0);
ssize_t write_length = write(out_fd, zbuffer_ptr, colorbuffer_size);
assert(write_length == colorbuffer_size);
close(out_fd);
}
close(fd);
}

View File

@ -0,0 +1,806 @@
#include <assert.h>
#include <errno.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <stdlib.h>
#include <math.h>
#include <fcntl.h>
#include <sys/mman.h>
#include "r500/3d_registers.h"
#include "r500/3d_registers_undocumented.h"
#include "r500/3d_registers_bits.h"
#include "r500/indirect_buffer.h"
#include "r500/shader.h"
#include "r500/display_controller.h"
#include "drm/buffer.h"
#include "drm/drm.h"
#include "math/float_types.hpp"
#include "math/transform.hpp"
#include "math/constants.hpp"
#include "../model/model2.h"
#define CLEAR_SHADER 0
#define PLANE_SHADER 1
#define PARTICLE_SHADER 2
#define TEXTURE_TILE_SHADER 3
const char * vertex_shader_paths[] = {
"clear.vs.bin",
"particle_plane_fan.vs.bin",
"particle_particle_animated_quad_vbuf.vs.bin",
"texture_tile.vs.bin",
};
const int vertex_shader_paths_length = (sizeof (vertex_shader_paths)) / (sizeof (vertex_shader_paths[0]));
const char * fragment_shader_paths[] = {
"clear.fs.bin",
"particle_plane.fs.bin",
"particle_particle.fs.bin",
"texture_tile.fs.bin",
};
const int fragment_shader_paths_length = (sizeof (fragment_shader_paths)) / (sizeof (fragment_shader_paths[0]));
#define PLANE_TEXTURE 0
#define PARTICLE_TEXTURE 1
const char * textures[] = {
"../texture/plane_32x32_rgba8888.data",
"../texture/particle_32x32_rgba8888.data",
};
const int textures_length = (sizeof (textures)) / (sizeof (textures[0]));
struct shaders {
struct shader_offset * vertex;
struct shader_offset * fragment;
int vertex_length;
int fragment_length;
};
static inline uint32_t xorshift32(uint32_t state)
{
uint32_t x = state;
x ^= x << 13;
x ^= x >> 17;
x ^= x << 5;
return x;
}
static inline float xorshift32f(uint32_t& state)
{
state = xorshift32(state);
return (float)(state & 0xffffff) * (1.0f / 16777215.0f);
}
const float max_age = 3.0f;
struct particle {
vec3 position;
float time;
float delta;
vec3 velocity;
};
void _3d_clear(const shaders& shaders)
{
ib_rs_instructions(0);
//////////////////////////////////////////////////////////////////////////////
// VAP OUT
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_OUT_VTX_FMT_0
, VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1));
T0V(VAP_OUT_VTX_FMT_1
, 0);
//
ib_zbuffer(ZBUFFER_RELOC_INDEX, 1600, 7); // always
ib_texture__0();
ib_vap_stream_cntl__2();
// shaders
T0V(US_PIXSIZE
, US_PIXSIZE__PIX_SIZE(1)
);
ib_ga_us(&shaders.fragment[CLEAR_SHADER]);
ib_vap_pvs(&shaders.vertex[CLEAR_SHADER]);
//////////////////////////////////////////////////////////////////////////////
// VAP INDEX
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_INDEX_OFFSET, 0);
T0V(VAP_VF_MAX_VTX_INDX
, VAP_VF_MAX_VTX_INDX__MAX_INDX(0)
);
T0V(VAP_VF_MIN_VTX_INDX
, VAP_VF_MIN_VTX_INDX__MIN_INDX(0)
);
//////////////////////////////////////////////////////////////////////////////
// VAP
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_CLIP_CNTL
, VAP_CLIP_CNTL__CLIP_DISABLE(1)
);
T0V(VAP_VTE_CNTL
, VAP_VTE_CNTL__VTX_XY_FMT(1) // disable W division
| VAP_VTE_CNTL__VTX_Z_FMT(1) // disable W division
);
T0V(VAP_CNTL_STATUS
, VAP_CNTL_STATUS__PVS_BYPASS(0)
);
//////////////////////////////////////////////////////////////////////////////
// GA POINT SIZE
//////////////////////////////////////////////////////////////////////////////
T0V(GA_POINT_SIZE
, GA_POINT_SIZE__HEIGHT(600 * 12)
| GA_POINT_SIZE__WIDTH(800 * 12)
);
//////////////////////////////////////////////////////////////////////////////
// 3D_DRAW
//////////////////////////////////////////////////////////////////////////////
const int dwords_per_vtx = 2;
T0V(VAP_VTX_SIZE
, VAP_VTX_SIZE__DWORDS_PER_VTX(dwords_per_vtx)
);
const float center[] = {
800.0f, 600.0f,
};
const int vertex_count = 1;
T3(_3D_DRAW_IMMD_2, (1 + vertex_count * dwords_per_vtx) - 1);
TU( VAP_VF_CNTL__PRIM_TYPE(1) // point list
| VAP_VF_CNTL__PRIM_WALK(3)
| VAP_VF_CNTL__INDEX_SIZE(0)
| VAP_VF_CNTL__VTX_REUSE_DIS(0)
| VAP_VF_CNTL__DUAL_INDEX_MODE(0)
| VAP_VF_CNTL__USE_ALT_NUM_VERTS(0)
| VAP_VF_CNTL__NUM_VERTICES(vertex_count)
);
for (int i = 0; i < 2; i++) {
TF(center[i]);
}
}
mat4x4 perspective(float low1, float high1,
float low2, float high2,
float low3, float high3)
{
float scale2 = (high2 - low2) / (high1 - low1);
float scale3 = (high3 - low3) / (high1 - low1);
mat4x4 m1 = mat4x4(1, 0, 0, 0,
0, 1, 0, 0,
0, 0, 1, -low1,
0, 0, 0, 1
);
mat4x4 m2 = mat4x4(1, 0, 0, 0,
0, 1, 0, 0,
0, 0, scale2, low2,
0, 0, scale3, low3
);
return m2 * m1;
}
void _3d_plane_inner()
{
//////////////////////////////////////////////////////////////////////////////
// 3D_DRAW
//////////////////////////////////////////////////////////////////////////////
const int dwords_per_vtx = 2;
T0V(VAP_VTX_SIZE
, VAP_VTX_SIZE__DWORDS_PER_VTX(dwords_per_vtx)
);
const vec2 vertices[] = {
{0.0, 0.0f},
{1.0, 0.0f},
{1.0, 1.0f},
{0.0, 1.0f},
};
const int vertex_count = 4;
T3(_3D_DRAW_IMMD_2, (1 + vertex_count * dwords_per_vtx) - 1);
TU( VAP_VF_CNTL__PRIM_TYPE(5) // triangle fan
| VAP_VF_CNTL__PRIM_WALK(3)
| VAP_VF_CNTL__INDEX_SIZE(0)
| VAP_VF_CNTL__VTX_REUSE_DIS(0)
| VAP_VF_CNTL__DUAL_INDEX_MODE(0)
| VAP_VF_CNTL__USE_ALT_NUM_VERTS(0)
| VAP_VF_CNTL__NUM_VERTICES(vertex_count)
);
for (int i = 0; i < vertex_count; i++) {
TF(vertices[i].x);
TF(vertices[i].y);
}
}
void _3d_particle_inner(int particles_length, int position_offset)
{
const int vertex_count = 4 * particles_length;
assert(vertex_count <= 0xffffff);
//////////////////////////////////////////////////////////////////////////////
// VF
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_VF_MAX_VTX_INDX
, VAP_VF_MAX_VTX_INDX__MAX_INDX(vertex_count - 1)
);
T0V(VAP_VF_MIN_VTX_INDX
, VAP_VF_MIN_VTX_INDX__MIN_INDX(0)
);
//////////////////////////////////////////////////////////////////////////////
// AOS
//////////////////////////////////////////////////////////////////////////////
T3(_3D_LOAD_VBPNTR, (4 - 1));
TU( // VAP_VTX_NUM_ARRAYS
VAP_VTX_NUM_ARRAYS__VTX_NUM_ARRAYS(2)
| VAP_VTX_NUM_ARRAYS__VC_FORCE_PREFETCH(1)
);
TU( // VAP_VTX_AOS_ATTR01
VAP_VTX_AOS_ATTR__VTX_AOS_COUNT0(3)
| VAP_VTX_AOS_ATTR__VTX_AOS_STRIDE0(3)
| VAP_VTX_AOS_ATTR__VTX_AOS_COUNT1(2)
| VAP_VTX_AOS_ATTR__VTX_AOS_STRIDE1(2)
);
TU( // VAP_VTX_AOS_ADDR0
(4 * position_offset);
);
TU( // VAP_VTX_AOS_ADDR1
(4 * 0);
);
T3(_NOP, 0);
TU(VERTEXBUFFER_RELOC_INDEX * 4); // index into relocs array for VAP_VTX_AOS_ADDR0
T3(_NOP, 0);
TU(VERTEXBUFFER_RELOC_INDEX * 4); // index into relocs array for VAP_VTX_AOS_ADDR0
//////////////////////////////////////////////////////////////////////////////
// 3D_DRAW
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_ALT_NUM_VERTICES,
vertex_count);
T3(_3D_DRAW_VBUF_2, (1 - 1));
TU( VAP_VF_CNTL__PRIM_TYPE(13) // quad list
| VAP_VF_CNTL__PRIM_WALK(2) // vertex list (data fetched from memory)
| VAP_VF_CNTL__INDEX_SIZE(0)
| VAP_VF_CNTL__VTX_REUSE_DIS(0)
| VAP_VF_CNTL__DUAL_INDEX_MODE(0)
| VAP_VF_CNTL__USE_ALT_NUM_VERTS(1)
| VAP_VF_CNTL__NUM_VERTICES(0)
);
}
void _3d_plane(const shaders& shaders,
const mat4x4& world_to_clip,
float theta)
{
ib_rs_instructions(1);
//////////////////////////////////////////////////////////////////////////////
// VAP OUT
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_OUT_VTX_FMT_0
, VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1));
T0V(VAP_OUT_VTX_FMT_1
, VAP_OUT_VTX_FMT_1__TEX_0_COMP_CNT(4));
//
ib_zbuffer(ZBUFFER_RELOC_INDEX, 1600, 1); // less
int width = 32;
int height = 32;
int macrotile = 0;
int microtile = 0;
int clamp = 0; // wrap/repeat
ib_texture__1(TEXTUREBUFFER_RELOC_INDEX + PLANE_TEXTURE,
width, height,
macrotile, microtile,
clamp);
ib_vap_stream_cntl__2();
// shaders
T0V(US_PIXSIZE
, US_PIXSIZE__PIX_SIZE(4)
);
ib_ga_us(&shaders.fragment[PLANE_SHADER]);
ib_vap_pvs(&shaders.vertex[PLANE_SHADER]);
//////////////////////////////////////////////////////////////////////////////
// VAP
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_CLIP_CNTL
, VAP_CLIP_CNTL__PS_UCP_MODE(3)
);
T0V(VAP_VTE_CNTL
, VAP_VTE_CNTL__VPORT_X_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_X_OFFSET_ENA(1)
| VAP_VTE_CNTL__VPORT_Y_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_Y_OFFSET_ENA(1)
| VAP_VTE_CNTL__VPORT_Z_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_Z_OFFSET_ENA(1)
| VAP_VTE_CNTL__VTX_XY_FMT(0) // enable W division
| VAP_VTE_CNTL__VTX_Z_FMT(0) // enable W division
| VAP_VTE_CNTL__VTX_W0_FMT(1)
| VAP_VTE_CNTL__SERIAL_PROC_ENA(0)
);
T0V(VAP_CNTL_STATUS
, VAP_CNTL_STATUS__PVS_BYPASS(0)
);
//////////////////////////////////////////////////////////////////////////////
// matrix
//////////////////////////////////////////////////////////////////////////////
mat4x4 s = scale(1.0f);
mat4x4 rx = rotate_x(-PI / 2.0f);
mat4x4 local_to_world = s * rx;
mat4x4 trans = world_to_clip * local_to_world;
//////////////////////////////////////////////////////////////////////////////
// consts
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_PVS_STATE_FLUSH_REG, 0x00000000);
const float consts[] = {
// 0
trans[0][0], trans[0][1], trans[0][2], trans[0][3],
trans[1][0], trans[1][1], trans[1][2], trans[1][3],
trans[2][0], trans[2][1], trans[2][2], trans[2][3],
trans[3][0], trans[3][1], trans[3][2], trans[3][3],
// 4
-2.0f, 0, 0, 0,
};
ib_vap_pvs_const_cntl(consts, (sizeof (consts)));
// plane_inner
_3d_plane_inner();
}
void _3d_particle(const shaders& shaders,
const mat4x4& world_to_clip,
const mat4x4& world_to_view,
const particle * particles,
const int particles_length,
const float theta,
float * vertexbuffer_ptr)
{
// enable blending
T0V(RB3D_BLENDCNTL
, RB3D_BLENDCNTL__ALPHA_BLEND_ENABLE__ENABLE
| RB3D_BLENDCNTL__READ_ENABLE(1)
| RB3D_BLENDCNTL__SRCBLEND__GL_ONE
| RB3D_BLENDCNTL__DESTBLEND__GL_ONE
| RB3D_BLENDCNTL__SRC_ALPHA_0_NO_READ(0)
| RB3D_BLENDCNTL__SRC_ALPHA_1_NO_READ(0)
);
ib_rs_instructions(1);
//////////////////////////////////////////////////////////////////////////////
// VAP OUT
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_OUT_VTX_FMT_0
, VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1));
T0V(VAP_OUT_VTX_FMT_1
, VAP_OUT_VTX_FMT_1__TEX_0_COMP_CNT(4));
//
//ib_zbuffer(ZBUFFER_RELOC_INDEX, 1600, 1); // less
T0V(ZB_CNTL
, 0
);
T0V(ZB_ZSTENCILCNTL
, 0
);
int width = 32;
int height = 32;
int macrotile = 0;
int microtile = 0;
int clamp = 0; // wrap/repeat
ib_texture__1(TEXTUREBUFFER_RELOC_INDEX + PARTICLE_TEXTURE,
width, height,
macrotile, microtile,
clamp);
ib_vap_stream_cntl__32();
// shaders
T0V(US_PIXSIZE
, US_PIXSIZE__PIX_SIZE(4)
);
ib_ga_us(&shaders.fragment[PARTICLE_SHADER]);
ib_vap_pvs(&shaders.vertex[PARTICLE_SHADER]);
//////////////////////////////////////////////////////////////////////////////
// VAP
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_CLIP_CNTL
, VAP_CLIP_CNTL__PS_UCP_MODE(3)
);
T0V(VAP_VTE_CNTL
, VAP_VTE_CNTL__VPORT_X_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_X_OFFSET_ENA(1)
| VAP_VTE_CNTL__VPORT_Y_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_Y_OFFSET_ENA(1)
| VAP_VTE_CNTL__VPORT_Z_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_Z_OFFSET_ENA(1)
| VAP_VTE_CNTL__VTX_XY_FMT(0) // enable W division
| VAP_VTE_CNTL__VTX_Z_FMT(0) // enable W division
| VAP_VTE_CNTL__VTX_W0_FMT(1)
| VAP_VTE_CNTL__SERIAL_PROC_ENA(0)
);
T0V(VAP_CNTL_STATUS
, VAP_CNTL_STATUS__PVS_BYPASS(0)
);
//////////////////////////////////////////////////////////////////////////////
// matrix
//////////////////////////////////////////////////////////////////////////////
mat4x4 s = scale(1.0f);
mat4x4 local_to_world = s;
mat4x4 local_to_view = world_to_view * local_to_world;
mat4x4 trans = world_to_clip * local_to_world;
//////////////////////////////////////////////////////////////////////////////
// consts
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_PVS_STATE_FLUSH_REG, 0x00000000);
const float scale = 0.005f;
const float consts[] = {
// 0
trans[0][0], trans[0][1], trans[0][2], trans[0][3],
trans[1][0], trans[1][1], trans[1][2], trans[1][3],
trans[2][0], trans[2][1], trans[2][2], trans[2][3],
trans[3][0], trans[3][1], trans[3][2], trans[3][3],
// 4: dx (right)
local_to_view[0][0], local_to_view[0][1], local_to_view[0][2], 0,
// 5: dy (up)
local_to_view[1][0], local_to_view[1][1], local_to_view[1][2], 0,
// 6: xyz:position w:scale
0, 0, 0, scale,
// 7:
-2.0, 0, 0, 0,
};
ib_vap_pvs_const_cntl(consts, (sizeof (consts)));
int offset = particles_length * 4 * 2;
int ix = 0;
for (int i = 0; i < particles_length; i++) {
const vec3& position = particles[i].position;
for (int j = 0; j < 4; j++) {
vertexbuffer_ptr[offset + ix] = position.x;
ix++;
vertexbuffer_ptr[offset + ix] = position.y;
ix++;
vertexbuffer_ptr[offset + ix] = position.z;
ix++;
};
}
asm volatile ("" ::: "memory");
_3d_particle_inner(particles_length, offset);
}
int indirect_buffer(const shaders& shaders,
const particle * particles,
const int particles_length,
float theta,
float * vertexbuffer_ptr)
{
int width = 1600;
int height = 1200;
int pitch = width;
ib_ix = 0;
ib_generic_initialization();
T0V(RB3D_BLENDCNTL, 0);
T0V(RB3D_ABLENDCNTL, 0);
ib_viewport(width, height);
ib_colorbuffer(COLORBUFFER_RELOC_INDEX, pitch, 0, 0);
T0V(GB_ENABLE, 0);
T0V(US_OUT_FMT_0
, US_OUT_FMT__OUT_FMT(0) // C4_8
| US_OUT_FMT__C0_SEL__BLUE
| US_OUT_FMT__C1_SEL__GREEN
| US_OUT_FMT__C2_SEL__RED
| US_OUT_FMT__C3_SEL__ALPHA
| US_OUT_FMT__OUT_SIGN(0)
);
T0V(US_OUT_FMT_1
, US_OUT_FMT__OUT_FMT(15) // render target is not used
);
T0V(US_OUT_FMT_2
, US_OUT_FMT__OUT_FMT(15) // render target is not used
);
T0V(US_OUT_FMT_2
, US_OUT_FMT__OUT_FMT(15) // render target is not used
);
load_pvs_shaders(shaders.vertex, shaders.vertex_length);
load_us_shaders(shaders.fragment, shaders.fragment_length);
//////////////////////////////////////////////////////////////////////////////
// DRAW
//////////////////////////////////////////////////////////////////////////////
mat4x4 aspect = scale(vec3(3.0f/4.0f, 1, 1));
mat4x4 p = perspective(0.01f, 3.0f,
0.001f, 0.999f,
1.0f, 3.0f);
mat4x4 t = translate(vec3(0, 0, 1));
mat4x4 rx = rotate_x(-PI / 8.0f);
mat4x4 ry = rotate_y(theta * 0.8f);
mat4x4 world_to_view = t * rx * ry;
mat4x4 world_to_clip = aspect * p * world_to_view;
_3d_clear(shaders);
_3d_plane(shaders, world_to_clip, theta);
_3d_particle(shaders,
world_to_clip,
world_to_view,
particles,
particles_length,
theta,
vertexbuffer_ptr);
//////////////////////////////////////////////////////////////////////////////
// padding
//////////////////////////////////////////////////////////////////////////////
while ((ib_ix % 8) != 0) {
TU(0x80000000);
}
assert((unsigned int)ib_ix < (sizeof (ib)) / (sizeof (ib[0])));
return ib_ix;
}
void reset_particle(particle& p)
{
//vec3 pos = normalize(p.position);
p.position = normalize(vec3(p.velocity.x,
0,
p.velocity.z)) * 20.0f;
//printf("position %f %f %f\n", p.position.x, p.position.y, p.position.z);
p.velocity = vec3(p.velocity.x,
2.0f * p.delta,
p.velocity.z);
//printf("velocity %f %f %f\n\n", p.velocity.x, p.velocity.y, p.velocity.z);
}
void init_particles(particle * particles, const int particles_length)
{
uint32_t state = 0x12345678;
const float rl = 1.0f / (float)(particles_length);
for (int i = 0; i < particles_length; i++) {
float fi = ((float)i);
float sx = xorshift32f(state) * 2.0f - 1.0f;
float sy = xorshift32f(state) * 2.0f - 1.0f;
float sz = xorshift32f(state) * 2.0f - 1.0f;
float delta = xorshift32f(state) * 0.5f + 0.5f;
float vx = xorshift32f(state) * 2.0f - 1.0f;
float vz = xorshift32f(state) * 2.0f - 1.0f;
particles[i].time = max_age * sinf(fi * rl * 2) * 0.5f + 0.5f;
particles[i].delta = delta;
particles[i].position.x = sx;
particles[i].position.y = sy;
particles[i].position.z = sz;
particles[i].velocity = normalize(vec3(vx * 0.5f, 0.0f, vz * 0.5f));
}
}
int init_particles_vertexbuffer(int fd, int particles_length, float ** ptr_out)
{
const vec2 vertices[] = {
{0.0, 0.0f},
{1.0, 0.0f},
{1.0, 1.0f},
{0.0, 1.0f},
};
const int vertex_count = 4;
const int size = particles_length * vertex_count * 2 * (sizeof (float))
+ particles_length * vertex_count * 3 * (sizeof (float));
printf("%d size %d\n", particles_length, size);
void * ptr;
int handle = create_buffer(fd, size, &ptr);
float * ptrf = (float*)ptr;
int ix = 0;
for (int j = 0; j < particles_length; j++) {
for (int i = 0; i < vertex_count; i++) {
ptrf[ix++] = vertices[i].x;
ptrf[ix++] = vertices[i].y;
}
}
printf("init vertexbuffer %d %d\n", ix, size);
assert(ptr_out != NULL);
*ptr_out = ptrf;
return handle;
}
int main()
{
struct shaders shaders = {
.vertex = load_shaders(vertex_shader_paths, vertex_shader_paths_length),
.fragment = load_shaders(fragment_shader_paths, fragment_shader_paths_length),
.vertex_length = vertex_shader_paths_length,
.fragment_length = fragment_shader_paths_length,
};
void * rmmio = map_pci_resource2();
int fd = open("/dev/dri/card0", O_RDWR | O_CLOEXEC);
assert(fd != -1);
const int colorbuffer_size = 1600 * 1200 * 4;
int colorbuffer_handle[2];
int zbuffer_handle;
int * texturebuffer_handle;
//int flush_handle;
int vertexbuffer_handle;
void * colorbuffer_ptr[2];
void * zbuffer_ptr;
float * vertexbuffer_ptr;
// colorbuffer
colorbuffer_handle[0] = create_buffer(fd, colorbuffer_size, &colorbuffer_ptr[0]);
colorbuffer_handle[1] = create_buffer(fd, colorbuffer_size, &colorbuffer_ptr[1]);
zbuffer_handle = create_buffer(fd, colorbuffer_size, &zbuffer_ptr);
//flush_handle = create_flush_buffer(fd);
texturebuffer_handle = load_textures(fd, textures, textures_length);
fprintf(stderr, "colorbuffer handle[0] %d\n", colorbuffer_handle[0]);
fprintf(stderr, "colorbuffer handle[1] %d\n", colorbuffer_handle[1]);
fprintf(stderr, "zbuffer handle %d\n", zbuffer_handle);
int colorbuffer_ix = 0;
float theta = PI * 0.5;
particle particles[512 * 256] = {};
const int particles_length = (sizeof (particles)) / (sizeof (particles[0]));
vertexbuffer_handle = init_particles_vertexbuffer(fd, particles_length, &vertexbuffer_ptr);
init_particles(particles, particles_length);
fprintf(stderr, "vertexbuffer handle %d\n", vertexbuffer_handle);
while (true) {
int ib_dwords = indirect_buffer(shaders,
particles,
particles_length,
theta,
vertexbuffer_ptr);
int ret = drm_radeon_cs(fd,
colorbuffer_handle[colorbuffer_ix],
zbuffer_handle,
vertexbuffer_handle,
texturebuffer_handle,
textures_length,
ib_dwords);
if (ret == -1)
break;
primary_surface_address(rmmio, colorbuffer_ix);
// next state
theta += 0.01f;
colorbuffer_ix = (colorbuffer_ix + 1) & 1;
//
// update particles
//
for (int i = 0; i < particles_length; i++) {
if (particles[i].time <= 0) {
particles[i].time += max_age;
reset_particle(particles[i]);
} else {
particles[i].time -= 0.01f;
particles[i].position += vec3(particles[i].velocity.x * 0.9f,
particles[i].velocity.y * 5.0f,
particles[i].velocity.z * 0.9f);
particles[i].velocity += vec3(0, -0.04, 0);
if (particles[i].position.y < 0) {
particles[i].position.y = fabsf(particles[i].position.y);
particles[i].velocity.y *= -0.6f;
}
}
}
}
{
printf("colorbuffer0.data\n");
int out_fd = open("colorbuffer0.data", O_RDWR|O_CREAT, 0644);
assert(out_fd >= 0);
ssize_t write_length = write(out_fd, colorbuffer_ptr[0], colorbuffer_size);
assert(write_length == colorbuffer_size);
close(out_fd);
}
{
printf("zbuffer.data\n");
int out_fd = open("zbuffer.data", O_RDWR|O_CREAT, 0644);
assert(out_fd >= 0);
ssize_t write_length = write(out_fd, zbuffer_ptr, colorbuffer_size);
assert(write_length == colorbuffer_size);
close(out_fd);
}
close(fd);
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,10 @@
-- temp[0]: texture coordinate
TEX TEX_SEM_WAIT TEX_SEM_ACQUIRE
temp[0].rgba = LD tex[0].rgba temp[0].rgaa ;
OUT TEX_SEM_WAIT
src0.a = temp[0] ,
src0.rgb = temp[0] :
out[0].a = MAX src0.a src0.a ,
out[0].rgb = MAX src0.rgb src0.rgb ;

Binary file not shown.

View File

@ -0,0 +1,30 @@
-- const[0-3]: transform matrix
-- const[4]: particle_position
-- const[5]: dx
-- const[6]: dy
-- input[0]: position coordinate
-- input[1]: texture coordinate
--
-- dot(m[0], v), dot(m[1], v), dot(m[2], v), dot(m[3], v)
--
-- ppos = particle_position
temp[0].xyz = VE_ADD const[4].xyz_ const[4].000_ ;
-- ppos = position.xxx * dx + ppos
temp[0].xyz = VE_MAD input[0].xxx_ const[5].xyz_ temp[0].xyz_ ;
-- ppos = position.yyy * dy + ppos
temp[0].xyz = VE_MAD input[0].yyy_ const[6].xyz_ temp[0].xyz_ ;
-- scale
temp[0].xyzw = VE_MUL temp[0].xyz1 temp[0].1111 ;
-- ppos = transform_matrix * ppos
temp[1].x = VE_DOT const[0].xyzw temp[0].xyzw ;
temp[1].y = VE_DOT const[1].xyzw temp[0].xyzw ;
temp[1].z = VE_DOT const[2].xyzw temp[0].xyzw ;
temp[1].w = VE_DOT const[3].xyzw temp[0].xyzw ;
out[0].xyzw = VE_MAD temp[1].xyzw temp[1].1111 temp[1].0000 ;
out[1].xyzw = VE_MAX input[1].xyzw input[1].xyzw ;

Binary file not shown.

View File

@ -0,0 +1,32 @@
-- const[0-3] : transform matrix
-- const[4] : dx
-- const[5] : dy
-- const[6].xyz: particle_position
-- const[6].w : scale
-- input[0]: position coordinate
-- input[1]: texture coordinate
--
-- dot(m[0], v), dot(m[1], v), dot(m[2], v), dot(m[3], v)
--
-- ppos = particle_position
temp[0].xyz = VE_ADD const[6].xyz_ const[6].000_ ;
-- ppos = position.xxx * dx + ppos
temp[0].xyz = VE_MAD input[0].xxx_ const[4].xyz_ temp[0].xyz_ ;
-- ppos = position.yyy * dy + ppos
temp[0].xyz = VE_MAD input[0].yyy_ const[5].xyz_ temp[0].xyz_ ;
-- ppos *= scale
temp[0].xyzw = VE_MUL temp[0].xyz1 const[6].www1 ;
-- ppos = transform_matrix * ppos
temp[1].x = VE_DOT const[0].xyzw temp[0].xyzw ;
temp[1].y = VE_DOT const[1].xyzw temp[0].xyzw ;
temp[1].z = VE_DOT const[2].xyzw temp[0].xyzw ;
temp[1].w = VE_DOT const[3].xyzw temp[0].xyzw ;
out[0].xyzw = VE_MAD temp[1].xyzw temp[1].1111 temp[1].0000 ;
out[1].xyzw = VE_MAX input[1].xyzw input[1].xyzw ;

Binary file not shown.

View File

@ -0,0 +1,37 @@
-- const[0-3] : transform matrix
-- const[4] : dx
-- const[5] : dy
-- const[6].xyz: particle_position
-- const[6].w : scale
-- const[7].x : -2.0
-- input[0]: texture coordinate
--
-- dot(m[0], v), dot(m[1], v), dot(m[2], v), dot(m[3], v)
--
-- calculate position from texture coordinate
-- x = y * -2 + 1
-- y = x * -2 + 1
temp[2].xy = VE_MAD input[0].yx__ const[7].xx__ input[0].11__ ;
-- ppos = particle_position
temp[0].xyz = VE_ADD const[6].xyz_ const[6].000_ ;
-- ppos = position.xxx * dx + ppos
temp[0].xyz = VE_MAD temp[2].xxx_ const[4].xyz_ temp[0].xyz_ ;
-- ppos = position.yyy * dy + ppos
temp[0].xyz = VE_MAD temp[2].yyy_ const[5].xyz_ temp[0].xyz_ ;
-- ppos *= scale
temp[0].xyzw = VE_MUL temp[0].xyz1 const[6].www1 ;
-- ppos = transform_matrix * ppos
temp[1].x = VE_DOT const[0].xyzw temp[0].xyzw ;
temp[1].y = VE_DOT const[1].xyzw temp[0].xyzw ;
temp[1].z = VE_DOT const[2].xyzw temp[0].xyzw ;
temp[1].w = VE_DOT const[3].xyzw temp[0].xyzw ;
out[0].xyzw = VE_MAD temp[1].xyzw temp[1].1111 temp[1].0000 ;
out[1].xyzw = VE_MAX input[0].xy00 input[0].xy00 ;

Binary file not shown.

View File

@ -0,0 +1,38 @@
-- const[0-3] : transform matrix
-- const[4] : dx
-- const[5] : dy
---- const[6].xyz: particle_position
-- const[6].w : scale
-- const[7].x : -2.0
-- input[0]: particle position
-- input[1]: texture coordinate
--
-- dot(m[0], v), dot(m[1], v), dot(m[2], v), dot(m[3], v)
--
-- calculate position from texture coordinate
-- x = y * -2 + 1
-- y = x * -2 + 1
temp[2].xy = VE_MAD input[1].yx__ const[7].xx__ input[1].11__ ;
-- ppos = particle_position
temp[0].xyz = VE_ADD input[0].xyz_ input[0].000_ ;
-- ppos = position.xxx * dx + ppos
temp[0].xyz = VE_MAD temp[2].xxx_ const[4].xyz_ temp[0].xyz_ ;
-- ppos = position.yyy * dy + ppos
temp[0].xyz = VE_MAD temp[2].yyy_ const[5].xyz_ temp[0].xyz_ ;
-- ppos = vec4(ppos.xyz * scale.xyz, age)
temp[0].xyzw = VE_MAD temp[0].xyz0 const[6].www0 input[0].000w ;
-- ppos = transform_matrix * ppos
temp[1].x = VE_DOT const[0].xyzw temp[0].xyz1 ;
temp[1].y = VE_DOT const[1].xyzw temp[0].xyz1 ;
temp[1].z = VE_DOT const[2].xyzw temp[0].xyz1 ;
temp[1].w = VE_DOT const[3].xyzw temp[0].xyz1 ;
out[0].xyzw = VE_ADD temp[1].xyzw temp[1].0000 ;
out[1].xyzw = VE_ADD input[1].xy00 temp[0].00zw ; -- age

Binary file not shown.

View File

@ -0,0 +1,43 @@
-- temp[0].xy: texture coordinate
-- temp[0].a : age
TEX TEX_SEM_WAIT TEX_SEM_ACQUIRE
temp[2].rgb = LD tex[0].rgba temp[0].rgaa ;
-- i = vec3(0.25, 0.40625, 0.5625);
src0.a = float(40) , -- 0.25
src1.a = float(45) , -- 0.40625
src2.rgb = float(49) : -- 0.5625
temp[1].rgb = MAD src0.a10 src1.1a0 src2.00r ;
-- t = i + vec3(td)
src0.a = temp[0] ,
src0.rgb = temp[1] ,
src1.rgb = temp[0] :
temp[1].rgb = MAD src1.bbb src0.aaa src0.rgb ;
-- j = fract(t)
src0.rgb = temp[1] :
temp[1].rgb = FRC src0.rgb ;
-- k = cos(j * 2π)
src0.rgb = temp[1] :
COS src0.r ,
temp[1].r = SOP ;
src0.rgb = temp[1] :
COS src0.g ,
temp[1].g = SOP ;
src0.rgb = temp[1] :
COS src0.b ,
temp[1].b = SOP ;
-- l = k * vec3(0.5, 0.5, 0.5) + vec3(0.5, 0.5, 0.5)
src0.rgb = temp[1] ,
src1.rgb = float(48) : -- 0.5
temp[1].rgb = MAD src0.rgb src1.rrr src1.rrr ;
OUT TEX_SEM_WAIT
src0.rgb = temp[2] ,
src1.rgb = temp[1] :
out[0].a = MAX src0.1 src0.1 ,
out[0].rgb = MAD src0.rgb src1.rgb src0.000 ;

Binary file not shown.

121
src/particle_physics.fs.asm Normal file
View File

@ -0,0 +1,121 @@
-- temp[0].rgb : position
-- temp[0].a : age
-- temp[1].rgb : velocity
-- temp[1].a : delta
-- temp[2].rgb : reset__position
-- temp[2].a : reset__age
-- temp[3].rgb : reset__velocity
-- temp[3].a : reset__delta
-- temp[4].rgb : update__position
-- temp[4].a : update__age
-- temp[5].rgb : update__velocity
-- temp[5].a : update__delta
-- temp[6].rgb : temp
-- velocity_scale.rgb = vec3(0.003 , 0.01, 0.003)
-- delta_age = 0.01
-- const[0] = { velocity_scale.rgb, delta_age }
-- gravity = -0.05
-- velocity_attenuation = -0.7
-- const[1] = { velocity_attenuation, gravity, max_age, 0 }
-- out[0].rgb : position
-- out[0].a : age
TEX
temp[0].rgba = LD tex[0].rgba temp[0].rgaa ;
TEX TEX_SEM_WAIT TEX_SEM_ACQUIRE
temp[1].rgba = LD tex[1].rgba temp[0].rgaa ;
-- update_particle (position)
TEX_SEM_WAIT
src0.a = temp[0] , -- age
src0.rgb = temp[0] , -- position
src1.a = const[0] , -- delta_age
src1.rgb = const[0] , -- scale
src2.rgb = temp[1] : -- velocity
temp[4].a = MAD src0.a src0.1 src1.a , -- update__age = (age * 1) - delta_age
temp[4].rgb = MAD src2.rgb src1.rgb src0.rgb ; -- update__position = (velocity * scale) + position
-- update_particle (velocity gravity)
-- p.velocity.y += -0.05;
src0.rgb = temp[1] , -- velocity
src1.rgb = const[1] : -- gravity (g)
temp[5].rgb = MAD src0.rgb src1.111 src1.0g0 ;
-- update_particle (velocity bounce)
-- p.velocity.y *= -0.7;
src0.rgb = temp[5] , -- velocity <gravity>
src1.rgb = const[1] : -- velocity_attenuation (r)
temp[6].rgb = MAD src0.rgb src1.1r1 src1.000 ;
-- update_particle (velocity bounce)
-- p.velocity = (p.position.y >= 0) ? temp[5] : temp[6]
src0.rgb = temp[5] , -- velocity <gravity>
src1.rgb = temp[6] , -- velocity <bounce>
src2.rgb = temp[4] : -- position
temp[5].rgb = CMP src0.rgb src1.rgb src2.ggg ;
-- position.y = abs(position.y)
src0.rgb = temp[4] :
temp[4].g = MAX |src0.0g0| |src0.0g0| ;
--
-- reset
--
-- normalize(vec3(velocity.x, 0, velocity.z))
src0.rgb = temp[1] : -- velocity
DP3 src0.r0b src0.r0b ,
temp[2].a = DP ;
src0.a = temp[2] :
temp[2].a = RSQ |src0.a| ;
src0.a = temp[2] ,
src0.rgb = temp[1] , -- velocity
src1.a = temp[1] : -- delta
temp[2].rgb = MAD src0.r0b src0.a0a src1.0a0 ;
-- age = age + max_age
-- reset__position = reset__position * 20
src0.a = temp[0] , -- age
src1.rgb = const[1] , -- max_age
src2.a = const[1] , -- reset_radius
src0.rgb = temp[2] : -- reset__position
temp[2].a = MAD src0.a src0.1 src1.b ,
temp[2].rgb = MAD src0.rgb src2.aaa src2.000 ;
-- reset__velocity
-- (p.velocity.x * 0.5 + 0.5)
-- velocity.xz = velocity.xz
src0.rgb = temp[1] , -- velocity.x
src1.a = float(48) : -- 0.5
temp[3].a = MAD src0.r src1.a src1.a ,
temp[3].rgb = MAX src0.r0b src0.r0b ;
-- reset__velocity
-- velocity.y = (temp[3].a * delta + 2.0)
src0.a = temp[1] , -- delta
src1.a = float(56) , -- 1.0
src2.a = temp[3] : --
temp[3].g = MAD src2.0a0 src0.0a0 src1.0a0 ;
OUT
src0.a = temp[4] , -- update__age
src1.a = temp[2] , -- reset__age
src2.a = temp[0] , -- age
src0.rgb = temp[4] , -- update__position
src1.rgb = temp[2] : -- reset__position
out[0].a = CMP src0.a src1.a src2.a ,
out[0].rgb = CMP src0.rgb src1.rgb src2.aaa ;
OUT TEX_SEM_WAIT
src0.a = temp[1] , -- delta
src2.a = temp[0] , -- age
src0.rgb = temp[5] , -- update__velocity
src1.rgb = temp[3] : -- reset__velocity
out[1].a = MAX src0.a src0.a , -- constant
out[1].rgb = CMP src0.rgb src1.rgb src2.aaa ;

BIN
src/particle_physics.fs.bin Normal file

Binary file not shown.

View File

@ -0,0 +1,2 @@
out[0].xyzw = VE_ADD input[0].xyzw input[0].0000 ;
out[1].xyzw = VE_ADD input[0].xy00 const[0].xy00 ;

BIN
src/particle_physics.vs.bin Normal file

Binary file not shown.

10
src/particle_plane.fs.asm Normal file
View File

@ -0,0 +1,10 @@
-- temp[0]: texture coordinate
TEX TEX_SEM_WAIT TEX_SEM_ACQUIRE
temp[0].rgba = LD tex[0].rgba temp[0].rgaa ;
OUT TEX_SEM_WAIT
src0.a = temp[0] ,
src0.rgb = temp[0] :
out[0].a = MAX src0.a src0.a ,
out[0].rgb = MAX src0.rgb src0.rgb ;

BIN
src/particle_plane.fs.bin Normal file

Binary file not shown.

15
src/particle_plane.vs.asm Normal file
View File

@ -0,0 +1,15 @@
-- const[0-3]: transform matrix
-- input[0]: position coordinate
-- input[1]: texture coordinate
--
-- dot(m[0], v), dot(m[1], v), dot(m[2], v), dot(m[3], v)
--
temp[1].x = VE_DOT const[0].xyzw input[0].xyzw ;
temp[1].y = VE_DOT const[1].xyzw input[0].xyzw ;
temp[1].z = VE_DOT const[2].xyzw input[0].xyzw ;
temp[1].w = VE_DOT const[3].xyzw input[0].xyzw ;
out[0].xyzw = VE_MAD temp[1].xyzw temp[1].1111 temp[1].0000 ;
out[1].xyzw = VE_MAX input[1].xyzw input[1].xyzw ;

BIN
src/particle_plane.vs.bin Normal file

Binary file not shown.

View File

@ -0,0 +1,20 @@
-- const[0-3]: transform matrix
-- const[4].x: -2.0
-- input[0]: texture coordinate
--
-- dot(m[0], v), dot(m[1], v), dot(m[2], v), dot(m[3], v)
--
-- calculate position from texture coordinate
-- x = y * -2 + 1
-- y = x * -2 + 1
temp[1].xyzw = VE_MAD input[0].yx00 const[4].xx00 input[0].1101 ;
temp[2].x = VE_DOT const[0].xyzw temp[1].xyzw ;
temp[2].y = VE_DOT const[1].xyzw temp[1].xyzw ;
temp[2].z = VE_DOT const[2].xyzw temp[1].xyzw ;
temp[2].w = VE_DOT const[3].xyzw temp[1].xyzw ;
out[0].xyzw = VE_MAD temp[2].xyzw temp[2].1111 temp[2].0000 ;
out[1].xyzw = VE_MAX input[0].xy00 input[0].xy00 ;

Binary file not shown.

View File

@ -2,8 +2,10 @@
#define _NOP 0x10
#define _3D_LOAD_VBPNTR 0x2f
#define _INDX_BUFFER 0x33
#define _3D_DRAW_VBUF_2 0x34
#define _3D_DRAW_IMMD_2 0x35
#define _3D_DRAW_INDX_2 0x36
#define TYPE_0_COUNT(c) (((c) & 0x3fff) << 16)
#define TYPE_0_ONE_REG (1 << 15)
@ -11,3 +13,11 @@
#define TYPE_3_COUNT(c) (((c) & 0x3fff) << 16)
#define TYPE_3_OPCODE(o) (((o) & 0xff) << 8)
#define INDX_BUFFER__0__ONE_REG_WR(n) (((n) & 1) << 31)
#define INDX_BUFFER__0__SKIP_COUNT(n) (((n) & 0x7) << 16)
#define INDX_BUFFER__0__DESTINATION(n) (((n) & 0x1fff) << 0)
#define INDX_BUFFER__1__BUFFER_BASE(n) ((n) << 0)
#define INDX_BUFFER__2__BUFFER_SIZE(n) ((n) << 0)

View File

@ -9,7 +9,7 @@
#include "3d_registers_undocumented.h"
#include "3d_registers_bits.h"
union u32_f32 ib[16384];
union u32_f32 ib[16384 * 100];
volatile int ib_ix;
void ib_generic_initialization()
@ -54,8 +54,6 @@ void ib_generic_initialization()
, RB3D_CCTL__INDEPENDENT_COLORFORMAT_ENABLE(1)
);
T0V(RB3D_ROPCNTL, 0x00000000);
T0V(RB3D_BLENDCNTL, 0x00000000);
T0V(RB3D_ABLENDCNTL, 0x00000000);
T0V(RB3D_COLOR_CHANNEL_MASK
, RB3D_COLOR_CHANNEL_MASK__BLUE_MASK(1)
| RB3D_COLOR_CHANNEL_MASK__GREEN_MASK(1)
@ -136,11 +134,6 @@ void ib_generic_initialization()
| GA_ROUND_MODE__GEOMETRY_MASK(0)
);
T0Vf(GA_POINT_S0, 0.0f);
T0Vf(GA_POINT_T0, 1.0f);
T0Vf(GA_POINT_S1, 1.0f);
T0Vf(GA_POINT_T1, 0.0f);
T0V(GA_COLOR_CONTROL
, GA_COLOR_CONTROL__RGB0_SHADING(2)
| GA_COLOR_CONTROL__ALPHA0_SHADING(2)
@ -160,7 +153,6 @@ void ib_generic_initialization()
);
T0V(GB_SELECT, 0x00000000);
T0V(GB_ENABLE, 0x00000000);
T0V(GB_MSPOS0
, GB_MSPOS0__MS_X0(6)
@ -278,33 +270,10 @@ void ib_generic_initialization()
T0V(US_W_FMT
, US_W_FMT__W_FMT(0) // W is always zero
);
//////////////////////////////////////////////////////////////////////////////
// SC
//////////////////////////////////////////////////////////////////////////////
T0V(SC_SCISSOR0
, SC_SCISSOR0__XS0(0)
| SC_SCISSOR0__YS0(0)
);
T0V(SC_SCISSOR1
, SC_SCISSOR1__XS1(1600 - 1)
| SC_SCISSOR1__YS1(1200 - 1)
);
//////////////////////////////////////////////////////////////////////////////
// VAP
//////////////////////////////////////////////////////////////////////////////
T0Vf(VAP_VPORT_XSCALE, 800.0f);
T0Vf(VAP_VPORT_XOFFSET, 800.0f);
T0Vf(VAP_VPORT_YSCALE, -600.0f);
T0Vf(VAP_VPORT_YOFFSET, 600.0f);
T0Vf(VAP_VPORT_ZSCALE, 0.5f);
T0Vf(VAP_VPORT_ZOFFSET, 0.5f);
}
void ib_colorbuffer(int reloc_index)
void ib_colorbuffer(int reloc_index, int pitch,
int macrotile, int microtile)
{
//////////////////////////////////////////////////////////////////////////////
@ -318,8 +287,10 @@ void ib_colorbuffer(int reloc_index)
TU(reloc_index * 4); // index into relocs array
T0V(RB3D_COLORPITCH0
, RB3D_COLORPITCH__COLORPITCH(1600 >> 1)
| RB3D_COLORPITCH__COLORFORMAT(6) // ARGB8888
, RB3D_COLORPITCH__COLORPITCH(pitch >> 1)
| RB3D_COLORPITCH__COLORTILE(macrotile)
| RB3D_COLORPITCH__COLORMICROTILE(microtile)
| RB3D_COLORPITCH__COLORFORMAT__ARGB8888
);
// The COLORPITCH NOP is ignored/not applied due to
// RADEON_CS_KEEP_TILING_FLAGS, but is still required.
@ -327,7 +298,102 @@ void ib_colorbuffer(int reloc_index)
TU(reloc_index * 4); // index into relocs array
}
void ib_zbuffer(int reloc_index, int zfunc)
void ib_colorbuffer2(int buffer_index,
int reloc_index,
int pitch,
int macrotile, int microtile,
int colorformat)
{
assert(buffer_index >= 0 && buffer_index <= 3);
int reg_offset = buffer_index * 4;
//////////////////////////////////////////////////////////////////////////////
// CB
//////////////////////////////////////////////////////////////////////////////
T0V(RB3D_COLOROFFSET0 + reg_offset
, 0x00000000 // value replaced by kernel from relocs
);
T3(_NOP, 0);
TU(reloc_index * 4); // index into relocs array
T0V(RB3D_COLORPITCH0 + reg_offset
, RB3D_COLORPITCH__COLORPITCH(pitch >> 1)
| RB3D_COLORPITCH__COLORTILE(macrotile)
| RB3D_COLORPITCH__COLORMICROTILE(microtile)
| RB3D_COLORPITCH__COLORFORMAT(colorformat)
);
// The COLORPITCH NOP is ignored/not applied due to
// RADEON_CS_KEEP_TILING_FLAGS, but is still required.
T3(_NOP, 0);
TU(reloc_index * 4); // index into relocs array
}
void ib_colorbuffer3(int buffer_index,
int reloc_index,
int offset,
int pitch,
int macrotile, int microtile,
int colorformat)
{
assert(buffer_index >= 0 && buffer_index <= 3);
int reg_offset = buffer_index * 4;
//////////////////////////////////////////////////////////////////////////////
// CB
//////////////////////////////////////////////////////////////////////////////
T0V(RB3D_COLOROFFSET0 + reg_offset
, offset // value replaced by kernel from relocs
);
T3(_NOP, 0);
TU(reloc_index * 4); // index into relocs array
T0V(RB3D_COLORPITCH0 + reg_offset
, RB3D_COLORPITCH__COLORPITCH(pitch >> 1)
| RB3D_COLORPITCH__COLORTILE(macrotile)
| RB3D_COLORPITCH__COLORMICROTILE(microtile)
| RB3D_COLORPITCH__COLORFORMAT(colorformat)
);
// The COLORPITCH NOP is ignored/not applied due to
// RADEON_CS_KEEP_TILING_FLAGS, but is still required.
T3(_NOP, 0);
TU(reloc_index * 4); // index into relocs array
}
void ib_viewport(int width, int height)
{
//////////////////////////////////////////////////////////////////////////////
// SC
//////////////////////////////////////////////////////////////////////////////
T0V(SC_SCISSOR0
, SC_SCISSOR0__XS0(0)
| SC_SCISSOR0__YS0(0)
);
T0V(SC_SCISSOR1
, SC_SCISSOR1__XS1(width - 1)
| SC_SCISSOR1__YS1(height - 1)
);
//////////////////////////////////////////////////////////////////////////////
// VAP
//////////////////////////////////////////////////////////////////////////////
float x = ((float)width) * 0.5f;
float y = ((float)height) * 0.5f;
T0Vf(VAP_VPORT_XSCALE, x);
T0Vf(VAP_VPORT_XOFFSET, x);
T0Vf(VAP_VPORT_YSCALE, -y);
T0Vf(VAP_VPORT_YOFFSET, y);
T0Vf(VAP_VPORT_ZSCALE, 0.5f);
T0Vf(VAP_VPORT_ZOFFSET, 0.5f);
}
void ib_zbuffer(int reloc_index, int pitch, int zfunc)
{
//////////////////////////////////////////////////////////////////////////////
// ZB
@ -349,7 +415,7 @@ void ib_zbuffer(int reloc_index, int zfunc)
TU(reloc_index * 4); // index into relocs array
T0V(ZB_DEPTHPITCH
, ZB_DEPTHPITCH__DEPTHPITCH(1600 >> 2)
, ZB_DEPTHPITCH__DEPTHPITCH(pitch >> 2)
| ZB_DEPTHPITCH__DEPTHMACROTILE(1)
| ZB_DEPTHPITCH__DEPTHMICROTILE(1)
);
@ -518,7 +584,10 @@ void ib_texture__0()
T0V(TX_ENABLE, 0x00000000);
}
void ib_texture__1(int reloc_index)
void ib_texture__1(int reloc_index,
int width, int height,
int macrotile, int microtile,
int clamp)
{
//////////////////////////////////////////////////////////////////////////////
// TX
@ -528,17 +597,21 @@ void ib_texture__1(int reloc_index)
T0V(TX_ENABLE
, TX_ENABLE__TEX_0_ENABLE__ENABLE);
T0V(TX_FILTER0_0
, TX_FILTER0__MAG_FILTER__LINEAR
, TX_FILTER0__CLAMP_S(clamp)
| TX_FILTER0__CLAMP_T(clamp)
| TX_FILTER0__MAG_FILTER__LINEAR
| TX_FILTER0__MIN_FILTER__LINEAR
);
T0V(TX_FILTER1_0
, TX_FILTER1__LOD_BIAS(1)
| TX_FILTER1__BORDER_FIX(1)
);
T0V(TX_BORDER_COLOR_0, 0);
T0V(TX_FORMAT0_0
, TX_FORMAT0__TXWIDTH(1024 - 1)
| TX_FORMAT0__TXHEIGHT(1024 - 1)
, TX_FORMAT0__TXWIDTH(width - 1)
| TX_FORMAT0__TXHEIGHT(height - 1)
);
T0V(TX_FORMAT1_0
@ -552,10 +625,110 @@ void ib_texture__1(int reloc_index)
T0V(TX_FORMAT2_0, 0);
T0V(TX_OFFSET_0
//, TX_OFFSET__MACRO_TILE(1)
//| TX_OFFSET__MICRO_TILE(1)
, TX_OFFSET__MACRO_TILE(macrotile)
| TX_OFFSET__MICRO_TILE(microtile)
);
T3(_NOP, 0);
TU(reloc_index * 4); // index into relocs array
}
void ib_texture__1_float32(int reloc_index,
int width, int height,
int macrotile, int microtile,
int clamp)
{
//////////////////////////////////////////////////////////////////////////////
// TX
//////////////////////////////////////////////////////////////////////////////
T0V(TX_INVALTAGS, 0x00000000);
T0V(TX_ENABLE
, TX_ENABLE__TEX_0_ENABLE__ENABLE);
T0V(TX_FILTER0_0
, TX_FILTER0__CLAMP_S(clamp)
| TX_FILTER0__CLAMP_T(clamp)
| TX_FILTER0__MAG_FILTER__POINT
| TX_FILTER0__MIN_FILTER__POINT
| TX_FILTER0__ID(0)
);
T0V(TX_FILTER1_0
, TX_FILTER1__LOD_BIAS(1)
| TX_FILTER1__BORDER_FIX(1)
);
T0V(TX_BORDER_COLOR_0, 0);
T0V(TX_FORMAT0_0
, TX_FORMAT0__TXWIDTH(width - 1)
| TX_FORMAT0__TXHEIGHT(height - 1)
);
T0V(TX_FORMAT1_0
, TX_FORMAT1__TXFORMAT__TX_FMT_32F_32F_32F_32F
| TX_FORMAT1__SEL_ALPHA(3)
| TX_FORMAT1__SEL_RED(0)
| TX_FORMAT1__SEL_GREEN(1)
| TX_FORMAT1__SEL_BLUE(2)
| TX_FORMAT1__TEX_COORD_TYPE__2D
);
T0V(TX_FORMAT2_0, 0);
T0V(TX_OFFSET_0
, TX_OFFSET__MACRO_TILE(macrotile)
| TX_OFFSET__MICRO_TILE(microtile)
);
T3(_NOP, 0);
TU(reloc_index * 4); // index into relocs array
}
void ib_texture2(int texture_index,
int reloc_index,
int width, int height,
int macrotile, int microtile,
int clamp,
int txformat)
{
assert(texture_index >= 0 && texture_index <= 15);
int texture_offset = texture_index * 4;
T0V(TX_FILTER0_0 + texture_offset
, TX_FILTER0__CLAMP_S(clamp)
| TX_FILTER0__CLAMP_T(clamp)
| TX_FILTER0__MAG_FILTER__POINT
| TX_FILTER0__MIN_FILTER__POINT
| TX_FILTER0__ID(texture_index)
);
T0V(TX_FILTER1_0 + texture_offset
, TX_FILTER1__LOD_BIAS(1)
| TX_FILTER1__BORDER_FIX(0)
);
T0V(TX_BORDER_COLOR_0 + texture_offset
, 0
);
T0V(TX_FORMAT0_0 + texture_offset
, TX_FORMAT0__TXWIDTH(width - 1)
| TX_FORMAT0__TXHEIGHT(height - 1)
);
T0V(TX_FORMAT1_0 + texture_offset
, TX_FORMAT1__TXFORMAT(txformat)
| TX_FORMAT1__SEL_ALPHA(3)
| TX_FORMAT1__SEL_RED(0)
| TX_FORMAT1__SEL_GREEN(1)
| TX_FORMAT1__SEL_BLUE(2)
| TX_FORMAT1__TEX_COORD_TYPE__2D
);
T0V(TX_FORMAT2_0 + texture_offset
, 0
);
T0V(TX_OFFSET_0 + texture_offset
, TX_OFFSET__MACRO_TILE(macrotile)
| TX_OFFSET__MICRO_TILE(microtile)
);
T3(_NOP, 0);
TU(reloc_index * 4); // index into relocs array
@ -617,7 +790,7 @@ void ib_vap_pvs_const_cntl(const float * consts, int size)
T0V(VAP_PVS_CONST_CNTL
, VAP_PVS_CONST_CNTL__PVS_CONST_BASE_OFFSET(0)
| VAP_PVS_CONST_CNTL__PVS_MAX_CONST_ADDR((size / 4) - 1)
| VAP_PVS_CONST_CNTL__PVS_MAX_CONST_ADDR(consts_length - 1)
);
T0V(VAP_PVS_VECTOR_INDX_REG
@ -629,6 +802,34 @@ void ib_vap_pvs_const_cntl(const float * consts, int size)
TF(consts[i]);
}
void ib_vap_pvs_const_offset(const float * consts, int size, int offset)
{
assert(size % 16 == 0);
const int consts_length = size / 4;
T0V(VAP_PVS_VECTOR_INDX_REG
, VAP_PVS_VECTOR_INDX_REG__OCTWORD_OFFSET(1024 + offset)
);
T0_ONE_REG(VAP_PVS_VECTOR_DATA_REG_128, (consts_length - 1));
for (int i = 0; i < consts_length; i++)
TF(consts[i]);
}
void ib_ga_consts(const float * consts, int consts_length, int index)
{
assert(consts_length % 4 == 0);
T0V(GA_US_VECTOR_INDEX
, GA_US_VECTOR_INDEX__INDEX(index)
| GA_US_VECTOR_INDEX__TYPE(1)
);
T0_ONE_REG(GA_US_VECTOR_DATA, (consts_length - 1));
for (int i = 0; i < consts_length; i++)
TF(consts[i]);
}
void ib_vap_stream_cntl__2()
{
//////////////////////////////////////////////////////////////////////////////
@ -671,6 +872,66 @@ void ib_vap_stream_cntl__3()
);
}
void ib_vap_stream_cntl__32()
{
//////////////////////////////////////////////////////////////////////////////
// VAP_PROG_STREAM_CNTL
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_PROG_STREAM_CNTL_0
, VAP_PROG_STREAM_CNTL__DATA_TYPE_0__FLOAT_3
| VAP_PROG_STREAM_CNTL__SKIP_DWORDS_0(0)
| VAP_PROG_STREAM_CNTL__DST_VEC_LOC_0(0)
| VAP_PROG_STREAM_CNTL__LAST_VEC_0(0)
| VAP_PROG_STREAM_CNTL__DATA_TYPE_1__FLOAT_2
| VAP_PROG_STREAM_CNTL__SKIP_DWORDS_1(0)
| VAP_PROG_STREAM_CNTL__DST_VEC_LOC_1(1)
| VAP_PROG_STREAM_CNTL__LAST_VEC_1(1)
);
T0V(VAP_PROG_STREAM_CNTL_EXT_0
, VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_X_0__SELECT_X
| VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_Y_0__SELECT_Y
| VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_Z_0__SELECT_Z
| VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_W_0__SELECT_FP_ONE
| VAP_PROG_STREAM_CNTL_EXT__WRITE_ENA_0(0b1111) // XYZW
| VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_X_1__SELECT_X
| VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_Y_1__SELECT_Y
| VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_Z_1__SELECT_FP_ZERO
| VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_W_1__SELECT_FP_ONE
| VAP_PROG_STREAM_CNTL_EXT__WRITE_ENA_1(0b1111) // XYZW
);
}
void ib_vap_stream_cntl__42()
{
//////////////////////////////////////////////////////////////////////////////
// VAP_PROG_STREAM_CNTL
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_PROG_STREAM_CNTL_0
, VAP_PROG_STREAM_CNTL__DATA_TYPE_0__FLOAT_4
| VAP_PROG_STREAM_CNTL__SKIP_DWORDS_0(0)
| VAP_PROG_STREAM_CNTL__DST_VEC_LOC_0(0)
| VAP_PROG_STREAM_CNTL__LAST_VEC_0(0)
| VAP_PROG_STREAM_CNTL__DATA_TYPE_1__FLOAT_2
| VAP_PROG_STREAM_CNTL__SKIP_DWORDS_1(0)
| VAP_PROG_STREAM_CNTL__DST_VEC_LOC_1(1)
| VAP_PROG_STREAM_CNTL__LAST_VEC_1(1)
);
T0V(VAP_PROG_STREAM_CNTL_EXT_0
, VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_X_0__SELECT_X
| VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_Y_0__SELECT_Y
| VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_Z_0__SELECT_Z
| VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_W_0__SELECT_W
| VAP_PROG_STREAM_CNTL_EXT__WRITE_ENA_0(0b1111) // XYZW
| VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_X_1__SELECT_X
| VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_Y_1__SELECT_Y
| VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_Z_1__SELECT_FP_ZERO
| VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_W_1__SELECT_FP_ONE
| VAP_PROG_STREAM_CNTL_EXT__WRITE_ENA_1(0b1111) // XYZW
);
}
void ib_vap_stream_cntl__323()
{
//////////////////////////////////////////////////////////////////////////////

View File

@ -7,23 +7,23 @@
#define T0(address, count) \
do { \
ib[ib_ix++].u32 = TYPE_0_COUNT(count) | TYPE_0_BASE_INDEX(address >> 2); \
ib[ib_ix++].u32 = TYPE_0_COUNT(count) | TYPE_0_BASE_INDEX((address) >> 2); \
} while (0);
#define T0_ONE_REG(address, count) \
do { \
ib[ib_ix++].u32 = TYPE_0_COUNT(count) | TYPE_0_ONE_REG | TYPE_0_BASE_INDEX(address >> 2); \
ib[ib_ix++].u32 = TYPE_0_COUNT(count) | TYPE_0_ONE_REG | TYPE_0_BASE_INDEX((address) >> 2); \
} while (0);
#define T0V(address, value) \
do { \
ib[ib_ix++].u32 = TYPE_0_COUNT(0) | TYPE_0_BASE_INDEX(address >> 2); \
ib[ib_ix++].u32 = TYPE_0_COUNT(0) | TYPE_0_BASE_INDEX((address) >> 2); \
ib[ib_ix++].u32 = value; \
} while (0);
#define T0Vf(address, value) \
do { \
ib[ib_ix++].u32 = TYPE_0_COUNT(0) | TYPE_0_BASE_INDEX(address >> 2); \
ib[ib_ix++].u32 = TYPE_0_COUNT(0) | TYPE_0_BASE_INDEX((address) >> 2); \
ib[ib_ix++].f32 = value; \
} while (0);
@ -51,20 +51,49 @@ union u32_f32 {
float f32;
};
extern union u32_f32 ib[16384];
extern union u32_f32 ib[16384 * 100];
extern volatile int ib_ix;
void ib_generic_initialization();
void ib_colorbuffer(int reloc_index);
void ib_zbuffer(int reloc_index, int zfunc);
void ib_viewport(int width, int height);
void ib_colorbuffer(int reloc_index, int pitch, int macrotile, int microtile);
void ib_colorbuffer2(int buffer_index,
int reloc_index,
int pitch,
int macrotile, int microtile,
int colorformat);
void ib_colorbuffer3(int buffer_index,
int reloc_index,
int offset,
int pitch,
int macrotile, int microtile,
int colorformat);
void ib_zbuffer(int reloc_index, int pitch, int zfunc);
void ib_rs_instructions(int count);
void ib_texture__0();
void ib_texture__1(int reloc_index);
void ib_texture__1(int reloc_index,
int width, int height,
int macrotile, int microtile,
int clamp);
void ib_texture__1_float32(int reloc_index,
int width, int height,
int macrotile, int microtile,
int clamp);
void ib_texture2(int texture_index,
int reloc_index,
int width, int height,
int macrotile, int microtile,
int clamp,
int txformat);
void ib_vap_pvs(struct shader_offset * offset);
void ib_ga_us(struct shader_offset * offset);
void ib_vap_pvs_const_cntl(const float * consts, int size);
void ib_vap_pvs_const_offset(const float * consts, int size, int offset);
void ib_ga_consts(const float * consts, int consts_length, int index);
void ib_vap_stream_cntl__2();
void ib_vap_stream_cntl__3();
void ib_vap_stream_cntl__32();
void ib_vap_stream_cntl__42();
void ib_vap_stream_cntl__323();
#ifdef __cplusplus

8
src/texture_tile.fs.asm Normal file
View File

@ -0,0 +1,8 @@
TEX TEX_SEM_WAIT TEX_SEM_ACQUIRE
temp[0].rgba = LD tex[0].rgba temp[0].rgaa ;
OUT TEX_SEM_WAIT
src0.a = temp[0],
src0.rgb = temp[0] :
out[0].a = MAX src0.a src0.a ,
out[0].rgb = MAX src0.rgb src0.rgb ;

BIN
src/texture_tile.fs.bin Normal file

Binary file not shown.

1
src/texture_tile.vs.asm Normal file
View File

@ -0,0 +1 @@
out[0].xyzw = VE_ADD input[0].xyzw input[0].0000 ;

BIN
src/texture_tile.vs.bin Normal file

Binary file not shown.

293
src/tx_rt.cpp Normal file
View File

@ -0,0 +1,293 @@
#include <assert.h>
#include <errno.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <stdlib.h>
#include <math.h>
#include <fcntl.h>
#include <sys/mman.h>
#include "r500/3d_registers.h"
#include "r500/3d_registers_undocumented.h"
#include "r500/3d_registers_bits.h"
#include "r500/indirect_buffer.h"
#include "r500/shader.h"
#include "r500/display_controller.h"
#include "drm/buffer.h"
#include "drm/drm.h"
#include "math/float_types.hpp"
#include "math/transform.hpp"
#include "math/constants.hpp"
#include "../model/model2.h"
#include "../model/plane.h"
#define TEXTURE_TILE_SHADER 0
const char * vertex_shader_paths[] = {
"tx_rt.vs.bin",
};
const int vertex_shader_paths_length = (sizeof (vertex_shader_paths)) / (sizeof (vertex_shader_paths[0]));
const char * fragment_shader_paths[] = {
"tx_rt.fs.bin",
};
const int fragment_shader_paths_length = (sizeof (fragment_shader_paths)) / (sizeof (fragment_shader_paths[0]));
#define PARTICLE_TEXTURE 0
const char * textures[] = {
"../texture/butterfly_1024x1024_rgba8888.data",
};
const int textures_length = (sizeof (textures)) / (sizeof (textures[0]));
struct shaders {
struct shader_offset * vertex;
struct shader_offset * fragment;
int vertex_length;
int fragment_length;
};
int _tile_texture(const shaders& shaders,
int input_reloc_index,
int output_reloc_index)
{
int viewport_width = 1600;
int viewport_height = 1200;
int texture_width = 1024;
int texture_height = 1024;
float vx = ((float)viewport_width) * 0.5f;
float vy = ((float)viewport_height) * 0.5f;
float tx = 0.5f / ((float)texture_width);
float ty = 0.5f / ((float)texture_height);
ib_ix = 0;
ib_generic_initialization();
T0V(SC_SCISSOR0
, SC_SCISSOR0__XS0(0)
| SC_SCISSOR0__YS0(0)
);
T0V(SC_SCISSOR1
, SC_SCISSOR1__XS1(viewport_width - 1)
| SC_SCISSOR1__YS1(viewport_height - 1)
);
T0Vf(VAP_VPORT_XSCALE, (float)viewport_width);
T0Vf(VAP_VPORT_YSCALE, (float)viewport_height);
ib_colorbuffer(output_reloc_index, viewport_width, 0, 0); // macrotile, microtile
T0V(US_OUT_FMT_0
, US_OUT_FMT__OUT_FMT(0) // C4_8
| US_OUT_FMT__C0_SEL__BLUE
| US_OUT_FMT__C1_SEL__GREEN
| US_OUT_FMT__C2_SEL__RED
| US_OUT_FMT__C3_SEL__ALPHA
| US_OUT_FMT__OUT_SIGN(0)
);
T0V(US_OUT_FMT_1
, US_OUT_FMT__OUT_FMT(15) // render target is not used
);
T0V(US_OUT_FMT_2
, US_OUT_FMT__OUT_FMT(15) // render target is not used
);
T0V(US_OUT_FMT_3
, US_OUT_FMT__OUT_FMT(15) // render target is not used
);
// shaders
load_pvs_shaders(shaders.vertex, shaders.vertex_length);
load_us_shaders(shaders.fragment, shaders.fragment_length);
// GA
T0V(GB_ENABLE
, 0
);
//////////////////////////////////////////////////////////////////////////////
// RS
//////////////////////////////////////////////////////////////////////////////
ib_rs_instructions(1);
//////////////////////////////////////////////////////////////////////////////
// VAP OUT
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_OUT_VTX_FMT_0
, VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1)
);
T0V(VAP_OUT_VTX_FMT_1
, VAP_OUT_VTX_FMT_1__TEX_0_COMP_CNT(4)
);
//
T0V(ZB_CNTL, 0);
T0V(ZB_ZSTENCILCNTL, 0);
//
int macrotile = 0;
int microtile = 0;
int clamp = 2; // clamp to [0.0, 1.0]
ib_texture__1(input_reloc_index,
texture_width, texture_height,
macrotile, microtile,
clamp);
ib_vap_stream_cntl__2();
T0V(US_PIXSIZE
, US_PIXSIZE__PIX_SIZE(1)
);
ib_ga_us(&shaders.fragment[TEXTURE_TILE_SHADER]);
ib_vap_pvs(&shaders.vertex[TEXTURE_TILE_SHADER]);
// fragment constants
const float fragment_consts[] = {
tx, ty, 0, 0,
};
int fragment_consts_length = (sizeof (fragment_consts)) / (sizeof (fragment_consts[0]));
T0V(GA_US_VECTOR_INDEX
, GA_US_VECTOR_INDEX__INDEX(0)
| GA_US_VECTOR_INDEX__TYPE(1)
);
T0_ONE_REG(GA_US_VECTOR_DATA, (fragment_consts_length - 1));
for (int i = 0; i < fragment_consts_length; i++)
TF(fragment_consts[i]);
//////////////////////////////////////////////////////////////////////////////
// VAP
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_CLIP_CNTL
, VAP_CLIP_CNTL__CLIP_DISABLE(1)
);
T0V(VAP_VTE_CNTL
, VAP_VTE_CNTL__VPORT_X_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_Y_SCALE_ENA(1)
| VAP_VTE_CNTL__VTX_XY_FMT(1) // disable W division
| VAP_VTE_CNTL__VTX_Z_FMT(1) // disable W division
);
T0V(VAP_CNTL_STATUS
, VAP_CNTL_STATUS__PVS_BYPASS(0)
);
//////////////////////////////////////////////////////////////////////////////
// GA POINT SIZE
//////////////////////////////////////////////////////////////////////////////
T0V(GA_POINT_SIZE
, GA_POINT_SIZE__HEIGHT((int)(vy * 12.0f))
| GA_POINT_SIZE__WIDTH((int)(vx * 12.0f))
);
//////////////////////////////////////////////////////////////////////////////
// 3D_DRAW
//////////////////////////////////////////////////////////////////////////////
const int dwords_per_vtx = 2;
T0V(VAP_VTX_SIZE
, VAP_VTX_SIZE__DWORDS_PER_VTX(dwords_per_vtx)
);
const float vertices[] = {
0.0f, 0.0f,
1.0f, 0.0f,
1.0f, 1.0f,
0.0f, 1.0f,
};
const int vertex_count = 4;
T3(_3D_DRAW_IMMD_2, (1 + vertex_count * dwords_per_vtx) - 1);
TU( VAP_VF_CNTL__PRIM_TYPE(5) // triangle fan
| VAP_VF_CNTL__PRIM_WALK(3)
| VAP_VF_CNTL__INDEX_SIZE(0)
| VAP_VF_CNTL__VTX_REUSE_DIS(0)
| VAP_VF_CNTL__DUAL_INDEX_MODE(0)
| VAP_VF_CNTL__USE_ALT_NUM_VERTS(0)
| VAP_VF_CNTL__NUM_VERTICES(vertex_count)
);
for (int i = 0; i < vertex_count * 2; i++) {
TF(vertices[i]);
}
return ib_ix;
}
int main()
{
struct shaders shaders = {
.vertex = load_shaders(vertex_shader_paths, vertex_shader_paths_length),
.fragment = load_shaders(fragment_shader_paths, fragment_shader_paths_length),
.vertex_length = vertex_shader_paths_length,
.fragment_length = fragment_shader_paths_length,
};
void * rmmio = map_pci_resource2();
int fd = open("/dev/dri/card0", O_RDWR | O_CLOEXEC);
assert(fd != -1);
const int colorbuffer_size = 1600 * 1200 * 4;
int colorbuffer_handle[2];
int zbuffer_handle;
int * texturebuffer_handle;
int flush_handle;
void * colorbuffer_ptr[2];
void * zbuffer_ptr;
// colorbuffer
colorbuffer_handle[0] = create_buffer(fd, colorbuffer_size, &colorbuffer_ptr[0]);
colorbuffer_handle[1] = create_buffer(fd, colorbuffer_size, &colorbuffer_ptr[1]);
zbuffer_handle = create_buffer(fd, colorbuffer_size, &zbuffer_ptr);
flush_handle = create_flush_buffer(fd);
texturebuffer_handle = load_textures(fd, textures, textures_length);
fprintf(stderr, "colorbuffer handle[0] %d\n", colorbuffer_handle[0]);
fprintf(stderr, "colorbuffer handle[1] %d\n", colorbuffer_handle[1]);
fprintf(stderr, "zbuffer handle %d\n", zbuffer_handle);
int colorbuffer_ix = 0;
float theta = PI * 0.5;
while (true) {
//int ib_dwords = indirect_buffer(shaders, theta);
int ib_dwords = _tile_texture(shaders,
TEXTUREBUFFER_RELOC_INDEX, // input
COLORBUFFER_RELOC_INDEX); // output
int ret = drm_radeon_cs(fd,
colorbuffer_handle[colorbuffer_ix],
zbuffer_handle,
flush_handle,
texturebuffer_handle,
textures_length,
ib_dwords);
if (ret == -1)
break;
primary_surface_address(rmmio, colorbuffer_ix);
// next state
theta += 0.01f;
colorbuffer_ix = (colorbuffer_ix + 1) & 1;
break;
}
close(fd);
}

8
src/tx_rt.fs.asm Normal file
View File

@ -0,0 +1,8 @@
TEX TEX_SEM_WAIT TEX_SEM_ACQUIRE
temp[0].rgba = LD tex[0].rgba temp[0].rgaa ;
OUT TEX_SEM_WAIT
src0.a = temp[0],
src0.rgb = temp[0] :
out[0].a = MAX src0.a src0.a ,
out[0].rgb = MAX src0.rgb src0.rgb ;

BIN
src/tx_rt.fs.bin Normal file

Binary file not shown.

2
src/tx_rt.vs.asm Normal file
View File

@ -0,0 +1,2 @@
out[0].xyzw = VE_ADD input[0].xyzw input[0].0000 ;
out[1].xyzw = VE_ADD input[0].xyzw input[0].0000 ;

BIN
src/tx_rt.vs.bin Normal file

Binary file not shown.

335
src/tx_rt_float.cpp Normal file
View File

@ -0,0 +1,335 @@
#include <assert.h>
#include <errno.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <stdlib.h>
#include <math.h>
#include <fcntl.h>
#include <sys/mman.h>
#include "r500/3d_registers.h"
#include "r500/3d_registers_undocumented.h"
#include "r500/3d_registers_bits.h"
#include "r500/indirect_buffer.h"
#include "r500/shader.h"
#include "r500/display_controller.h"
#include "drm/buffer.h"
#include "drm/drm.h"
#include "math/float_types.hpp"
#include "math/transform.hpp"
#include "math/constants.hpp"
#include "../model/model2.h"
#include "../model/plane.h"
#define TEXTURE_TILE_SHADER 0
const char * vertex_shader_paths[] = {
"tx_rt_float.vs.bin",
};
const int vertex_shader_paths_length = (sizeof (vertex_shader_paths)) / (sizeof (vertex_shader_paths[0]));
const char * fragment_shader_paths[] = {
"tx_rt_float.fs.bin",
};
const int fragment_shader_paths_length = (sizeof (fragment_shader_paths)) / (sizeof (fragment_shader_paths[0]));
#define PARTICLE_TEXTURE 0
const char * textures[] = {
"../texture/butterfly_1024x1024_rgba8888.data",
};
const int textures_length = (sizeof (textures)) / (sizeof (textures[0]));
struct shaders {
struct shader_offset * vertex;
struct shader_offset * fragment;
int vertex_length;
int fragment_length;
};
const int floatbuffer_width = 64;
const int floatbuffer_height = 64;
int _floatbuffer(const shaders& shaders,
int input_reloc_index,
int output_reloc_index)
{
int viewport_width = floatbuffer_width;
int viewport_height = floatbuffer_height;
int texture_width = floatbuffer_width;
int texture_height = floatbuffer_height;
float vx = ((float)viewport_width) * 0.5f;
float vy = ((float)viewport_height) * 0.5f;
float tx = 0.5f / ((float)texture_width);
float ty = 0.5f / ((float)texture_height);
ib_ix = 0;
ib_generic_initialization();
printf("vp %d %d \n", viewport_width, viewport_height);
T0V(SC_SCISSOR0
, SC_SCISSOR0__XS0(0)
| SC_SCISSOR0__YS0(0)
);
T0V(SC_SCISSOR1
, SC_SCISSOR1__XS1(viewport_width - 1)
| SC_SCISSOR1__YS1(viewport_height - 1)
);
T0Vf(VAP_VPORT_XSCALE, (float)viewport_width);
T0Vf(VAP_VPORT_YSCALE, (float)viewport_height);
ib_colorbuffer(output_reloc_index, viewport_width, 0, 0); // macrotile, microtile
T0V(RB3D_COLORPITCH0
, RB3D_COLORPITCH__COLORPITCH(viewport_width >> 1)
| RB3D_COLORPITCH__COLORTILE(0)
| RB3D_COLORPITCH__COLORMICROTILE(0)
| RB3D_COLORPITCH__COLORFORMAT__ARGB32323232
);
T0V(US_OUT_FMT_0
, US_OUT_FMT__OUT_FMT(21) // C4_32_FP
| US_OUT_FMT__C0_SEL__RED
| US_OUT_FMT__C1_SEL__GREEN
| US_OUT_FMT__C2_SEL__BLUE
| US_OUT_FMT__C3_SEL__ALPHA
| US_OUT_FMT__OUT_SIGN(0)
);
T0V(US_OUT_FMT_1
, US_OUT_FMT__OUT_FMT(15) // render target is not used
);
T0V(US_OUT_FMT_2
, US_OUT_FMT__OUT_FMT(15) // render target is not used
);
T0V(US_OUT_FMT_3
, US_OUT_FMT__OUT_FMT(15) // render target is not used
);
// shaders
load_pvs_shaders(shaders.vertex, shaders.vertex_length);
load_us_shaders(shaders.fragment, shaders.fragment_length);
// GA
T0V(GB_ENABLE
, 0
);
//////////////////////////////////////////////////////////////////////////////
// RS
//////////////////////////////////////////////////////////////////////////////
ib_rs_instructions(1);
//////////////////////////////////////////////////////////////////////////////
// VAP OUT
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_OUT_VTX_FMT_0
, VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1)
);
T0V(VAP_OUT_VTX_FMT_1
, VAP_OUT_VTX_FMT_1__TEX_0_COMP_CNT(4)
);
//
T0V(ZB_CNTL, 0);
T0V(ZB_ZSTENCILCNTL, 0);
//
int macrotile = 0;
int microtile = 0;
int clamp = 2; // clamp to [0.0, 1.0]
ib_texture__1_float32(input_reloc_index,
texture_width, texture_height,
macrotile, microtile,
clamp);
ib_vap_stream_cntl__2();
T0V(US_PIXSIZE
, US_PIXSIZE__PIX_SIZE(1)
);
ib_ga_us(&shaders.fragment[TEXTURE_TILE_SHADER]);
ib_vap_pvs(&shaders.vertex[TEXTURE_TILE_SHADER]);
const float vertex_consts[] = {
tx, ty, 0, 0,
};
const int vertex_consts_size = (sizeof (vertex_consts));
ib_vap_pvs_const_cntl(vertex_consts, vertex_consts_size);
// fragment constants
const float fragment_consts[] = {
1234.0f, 0, 0, 0,
};
int fragment_consts_length = (sizeof (fragment_consts)) / (sizeof (fragment_consts[0]));
ib_ga_consts(fragment_consts, fragment_consts_length, 0);
//////////////////////////////////////////////////////////////////////////////
// VAP
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_CLIP_CNTL
, VAP_CLIP_CNTL__CLIP_DISABLE(1)
);
T0V(VAP_VTE_CNTL
, VAP_VTE_CNTL__VPORT_X_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_Y_SCALE_ENA(1)
| VAP_VTE_CNTL__VTX_XY_FMT(1) // disable W division
| VAP_VTE_CNTL__VTX_Z_FMT(1) // disable W division
);
T0V(VAP_CNTL_STATUS
, VAP_CNTL_STATUS__PVS_BYPASS(0)
);
//////////////////////////////////////////////////////////////////////////////
// 3D_DRAW
//////////////////////////////////////////////////////////////////////////////
const int dwords_per_vtx = 2;
T0V(VAP_VTX_SIZE
, VAP_VTX_SIZE__DWORDS_PER_VTX(dwords_per_vtx)
);
const float vertices[] = {
0.0f, 0.0f,
1.0f, 0.0f,
1.0f, 1.0f,
0.0f, 1.0f,
};
const int vertex_count = 4;
T3(_3D_DRAW_IMMD_2, (1 + vertex_count * dwords_per_vtx) - 1);
TU( VAP_VF_CNTL__PRIM_TYPE(5) // triangle fan
| VAP_VF_CNTL__PRIM_WALK(3)
| VAP_VF_CNTL__INDEX_SIZE(0)
| VAP_VF_CNTL__VTX_REUSE_DIS(0)
| VAP_VF_CNTL__DUAL_INDEX_MODE(0)
| VAP_VF_CNTL__USE_ALT_NUM_VERTS(0)
| VAP_VF_CNTL__NUM_VERTICES(vertex_count)
);
for (int i = 0; i < vertex_count * 2; i++) {
TF(vertices[i]);
}
return ib_ix;
}
void floatbuffer_data(void * ptr, int size)
{
float * f32 = (float*)ptr;
int vector_length = size / (4 * 4);
int offset = 16384;
for (int i = 0; i < vector_length; i++) {
f32[i * 4 + 0] = offset--;
f32[i * 4 + 1] = offset--;
f32[i * 4 + 2] = offset--;
f32[i * 4 + 3] = offset--;
}
}
void floatbuffer_compare(void * a, void * b, int size)
{
float * a_f32 = (float*)a;
float * b_f32 = (float*)b;
int vector_length = size / (4 * 4);
int offset = 1;
for (int i = 0; i < vector_length; i++) {
if ( (a_f32[i * 4 + 0] != b_f32[i * 4 + 0])
|| (a_f32[i * 4 + 1] != b_f32[i * 4 + 1])
|| (a_f32[i * 4 + 2] != b_f32[i * 4 + 2])
|| (a_f32[i * 4 + 3] != b_f32[i * 4 + 3])) {
printf("a[%d] = [% 2.02f % 2.02f % 2.02f % 2.02f] ; ", i,
a_f32[i * 4 + 0], a_f32[i * 4 + 1], a_f32[i * 4 + 2], a_f32[i * 4 + 3]);
printf("b[%d] = [% 2.02f % 2.02f % 2.02f % 2.02f] \n", i,
b_f32[i * 4 + 0], b_f32[i * 4 + 1], b_f32[i * 4 + 2], b_f32[i * 4 + 3]);
}
}
}
int main()
{
struct shaders shaders = {
.vertex = load_shaders(vertex_shader_paths, vertex_shader_paths_length),
.fragment = load_shaders(fragment_shader_paths, fragment_shader_paths_length),
.vertex_length = vertex_shader_paths_length,
.fragment_length = fragment_shader_paths_length,
};
void * rmmio = map_pci_resource2();
int fd = open("/dev/dri/card0", O_RDWR | O_CLOEXEC);
assert(fd != -1);
const int colorbuffer_size = 1600 * 1200 * 4;
const int floatbuffer_size = floatbuffer_width * floatbuffer_height * 4 * 4;
const int floatbuffer_count = 2;
int colorbuffer_handle[2];
int zbuffer_handle;
int * texturebuffer_handle;
int flush_handle;
int floatbuffer_handle[floatbuffer_count];
void * colorbuffer_ptr[2];
void * floatbuffer_ptr[floatbuffer_count];
void * zbuffer_ptr;
// colorbuffer
colorbuffer_handle[0] = create_buffer(fd, colorbuffer_size, &colorbuffer_ptr[0]);
colorbuffer_handle[1] = create_buffer(fd, colorbuffer_size, &colorbuffer_ptr[1]);
zbuffer_handle = create_buffer(fd, colorbuffer_size, &zbuffer_ptr);
flush_handle = create_flush_buffer(fd);
texturebuffer_handle = load_textures(fd, textures, textures_length);
floatbuffer_handle[0] = create_buffer(fd, floatbuffer_size, &floatbuffer_ptr[0]);
floatbuffer_handle[1] = create_buffer(fd, floatbuffer_size, &floatbuffer_ptr[1]);
fprintf(stderr, "colorbuffer handle[0] %d\n", colorbuffer_handle[0]);
fprintf(stderr, "colorbuffer handle[1] %d\n", colorbuffer_handle[1]);
fprintf(stderr, "floatbuffer handle[0] %d\n", floatbuffer_handle[0]);
fprintf(stderr, "floatbuffer handle[1] %d\n", floatbuffer_handle[1]);
fprintf(stderr, "zbuffer handle %d\n", zbuffer_handle);
int colorbuffer_ix = 0;
floatbuffer_data(floatbuffer_ptr[0], floatbuffer_size);
while (true) {
int ib_dwords = _floatbuffer(shaders,
TEXTUREBUFFER_RELOC_INDEX + 0, // input
COLORBUFFER_RELOC_INDEX); // output
int ret = drm_radeon_cs(fd,
floatbuffer_handle[1],
zbuffer_handle,
flush_handle,
floatbuffer_handle,
floatbuffer_count,
ib_dwords);
if (ret == -1)
break;
break;
}
floatbuffer_compare(floatbuffer_ptr[0], floatbuffer_ptr[1], floatbuffer_size);
close(fd);
}

8
src/tx_rt_float.fs.asm Normal file
View File

@ -0,0 +1,8 @@
TEX TEX_SEM_WAIT TEX_SEM_ACQUIRE
temp[0].rgba = LD tex[0].rgba temp[0].rgaa ;
OUT TEX_SEM_WAIT
src0.a = temp[0],
src0.rgb = temp[0] :
out[0].a = MAX src0.a src0.a ,
out[0].rgb = MAX src0.rgb src0.rgb ;

BIN
src/tx_rt_float.fs.bin Normal file

Binary file not shown.

2
src/tx_rt_float.vs.asm Normal file
View File

@ -0,0 +1,2 @@
out[0].xyzw = VE_ADD input[0].xyzw input[0].0000 ;
out[1].xyzw = VE_ADD input[0].xy00 const[0].0000 ;

BIN
src/tx_rt_float.vs.bin Normal file

Binary file not shown.

343
src/tx_rt_float_4x.cpp Normal file
View File

@ -0,0 +1,343 @@
#include <assert.h>
#include <errno.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <stdlib.h>
#include <math.h>
#include <fcntl.h>
#include <sys/mman.h>
#include "r500/3d_registers.h"
#include "r500/3d_registers_undocumented.h"
#include "r500/3d_registers_bits.h"
#include "r500/indirect_buffer.h"
#include "r500/shader.h"
#include "r500/display_controller.h"
#include "drm/buffer.h"
#include "drm/drm.h"
#include "math/float_types.hpp"
#include "math/transform.hpp"
#include "math/constants.hpp"
#include "../model/model2.h"
#include "../model/plane.h"
#define TEXTURE_TILE_SHADER 0
const char * vertex_shader_paths[] = {
"tx_rt_float.vs.bin",
};
const int vertex_shader_paths_length = (sizeof (vertex_shader_paths)) / (sizeof (vertex_shader_paths[0]));
const char * fragment_shader_paths[] = {
"tx_rt_float.fs.bin",
};
const int fragment_shader_paths_length = (sizeof (fragment_shader_paths)) / (sizeof (fragment_shader_paths[0]));
#define PARTICLE_TEXTURE 0
const char * textures[] = {
"../texture/butterfly_1024x1024_rgba8888.data",
};
const int textures_length = (sizeof (textures)) / (sizeof (textures[0]));
struct shaders {
struct shader_offset * vertex;
struct shader_offset * fragment;
int vertex_length;
int fragment_length;
};
const int floatbuffer_width = 64;
const int floatbuffer_height = 64;
int _floatbuffer(const shaders& shaders,
int input_reloc_index,
int output_reloc_index)
{
int viewport_width = floatbuffer_width * 4;
int viewport_height = floatbuffer_height;
int texture_width = floatbuffer_width;
int texture_height = floatbuffer_height;
float vx = ((float)viewport_width) * 0.5f;
float vy = ((float)viewport_height) * 0.5f;
float tx = 0.5f / ((float)texture_width);
float ty = 0.5f / ((float)texture_height);
ib_ix = 0;
ib_generic_initialization();
printf("vp %d %d \n", viewport_width, viewport_height);
T0V(SC_SCISSOR0
, SC_SCISSOR0__XS0(0)
| SC_SCISSOR0__YS0(0)
);
T0V(SC_SCISSOR1
, SC_SCISSOR1__XS1(viewport_width - 1)
| SC_SCISSOR1__YS1(viewport_height - 1)
);
T0Vf(VAP_VPORT_XSCALE, (float)viewport_width);
T0Vf(VAP_VPORT_YSCALE, (float)viewport_height);
ib_colorbuffer(output_reloc_index, viewport_width, 0, 0); // macrotile, microtile
T0V(RB3D_COLORPITCH0
, RB3D_COLORPITCH__COLORPITCH(viewport_width >> 1)
| RB3D_COLORPITCH__COLORTILE(0)
| RB3D_COLORPITCH__COLORMICROTILE(0)
| RB3D_COLORPITCH__COLORFORMAT__ARGB32323232
);
T0V(US_OUT_FMT_0
, US_OUT_FMT__OUT_FMT(21) // C4_32_FP
| US_OUT_FMT__C0_SEL__RED
| US_OUT_FMT__C1_SEL__GREEN
| US_OUT_FMT__C2_SEL__BLUE
| US_OUT_FMT__C3_SEL__ALPHA
| US_OUT_FMT__OUT_SIGN(0)
);
T0V(US_OUT_FMT_1
, US_OUT_FMT__OUT_FMT(15) // render target is not used
);
T0V(US_OUT_FMT_2
, US_OUT_FMT__OUT_FMT(15) // render target is not used
);
T0V(US_OUT_FMT_3
, US_OUT_FMT__OUT_FMT(15) // render target is not used
);
// shaders
load_pvs_shaders(shaders.vertex, shaders.vertex_length);
load_us_shaders(shaders.fragment, shaders.fragment_length);
// GA
T0V(GB_ENABLE
, 0
);
//////////////////////////////////////////////////////////////////////////////
// RS
//////////////////////////////////////////////////////////////////////////////
ib_rs_instructions(1);
//////////////////////////////////////////////////////////////////////////////
// VAP OUT
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_OUT_VTX_FMT_0
, VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1)
);
T0V(VAP_OUT_VTX_FMT_1
, VAP_OUT_VTX_FMT_1__TEX_0_COMP_CNT(4)
);
//
T0V(ZB_CNTL, 0);
T0V(ZB_ZSTENCILCNTL, 0);
//
int macrotile = 0;
int microtile = 0;
int clamp = 2; // clamp to [0.0, 1.0]
ib_texture__1_float32(input_reloc_index,
texture_width, texture_height,
macrotile, microtile,
clamp);
ib_vap_stream_cntl__2();
T0V(US_PIXSIZE
, US_PIXSIZE__PIX_SIZE(1)
);
ib_ga_us(&shaders.fragment[TEXTURE_TILE_SHADER]);
ib_vap_pvs(&shaders.vertex[TEXTURE_TILE_SHADER]);
const float vertex_consts[] = {
tx, ty, 0, 0,
};
const int vertex_consts_size = (sizeof (vertex_consts));
ib_vap_pvs_const_cntl(vertex_consts, vertex_consts_size);
// fragment constants
const float fragment_consts[] = {
1234.0f, 0, 0, 0,
};
int fragment_consts_length = (sizeof (fragment_consts)) / (sizeof (fragment_consts[0]));
ib_ga_consts(fragment_consts, fragment_consts_length, 0);
//////////////////////////////////////////////////////////////////////////////
// VAP
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_CLIP_CNTL
, VAP_CLIP_CNTL__CLIP_DISABLE(1)
);
T0V(VAP_VTE_CNTL
, VAP_VTE_CNTL__VPORT_X_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_Y_SCALE_ENA(1)
| VAP_VTE_CNTL__VTX_XY_FMT(1) // disable W division
| VAP_VTE_CNTL__VTX_Z_FMT(1) // disable W division
);
T0V(VAP_CNTL_STATUS
, VAP_CNTL_STATUS__PVS_BYPASS(0)
);
//////////////////////////////////////////////////////////////////////////////
// 3D_DRAW
//////////////////////////////////////////////////////////////////////////////
const int dwords_per_vtx = 2;
T0V(VAP_VTX_SIZE
, VAP_VTX_SIZE__DWORDS_PER_VTX(dwords_per_vtx)
);
const float vertices[] = {
0.0f, 0.0f,
1.0f, 0.0f,
1.0f, 1.0f,
0.0f, 1.0f,
};
const int vertex_count = 4;
T3(_3D_DRAW_IMMD_2, (1 + vertex_count * dwords_per_vtx) - 1);
TU( VAP_VF_CNTL__PRIM_TYPE(5) // triangle fan
| VAP_VF_CNTL__PRIM_WALK(3)
| VAP_VF_CNTL__INDEX_SIZE(0)
| VAP_VF_CNTL__VTX_REUSE_DIS(0)
| VAP_VF_CNTL__DUAL_INDEX_MODE(0)
| VAP_VF_CNTL__USE_ALT_NUM_VERTS(0)
| VAP_VF_CNTL__NUM_VERTICES(vertex_count)
);
for (int i = 0; i < vertex_count * 2; i++) {
TF(vertices[i]);
}
return ib_ix;
}
void floatbuffer_data(void * ptr, int size)
{
float * f32 = (float*)ptr;
int vector_length = size / (4 * 4);
int offset = 16384;
for (int i = 0; i < vector_length; i++) {
f32[i * 4 + 0] = offset--;
f32[i * 4 + 1] = offset--;
f32[i * 4 + 2] = offset--;
f32[i * 4 + 3] = offset--;
}
}
void floatbuffer_compare(void * a, void * b, int size)
{
float * a_f32 = (float*)a;
float * b_f32 = (float*)b;
int vector_length = size / (4 * 4);
int matches = 0;
int ix = 0;
for (int i = 0; i < vector_length; i++) {
for (int j = 0; j < 4; j++) {
if ( (a_f32[i * 4 + 0] != b_f32[ix * 4 + 0])
|| (a_f32[i * 4 + 1] != b_f32[ix * 4 + 1])
|| (a_f32[i * 4 + 2] != b_f32[ix * 4 + 2])
|| (a_f32[i * 4 + 3] != b_f32[ix * 4 + 3])) {
printf("a[%d] = [% 2.02f % 2.02f % 2.02f % 2.02f] ; ", i,
a_f32[i * 4 + 0], a_f32[i * 4 + 1], a_f32[i * 4 + 2], a_f32[i * 4 + 3]);
printf("b[%d] = [% 2.02f % 2.02f % 2.02f % 2.02f] \n", i,
b_f32[ix * 4 + 0], b_f32[ix * 4 + 1], b_f32[ix * 4 + 2], b_f32[ix * 4 + 3]);
} else {
matches += 1;
}
ix++;
}
}
printf("vector_length %d matches %d\n", vector_length, matches);
}
int main()
{
struct shaders shaders = {
.vertex = load_shaders(vertex_shader_paths, vertex_shader_paths_length),
.fragment = load_shaders(fragment_shader_paths, fragment_shader_paths_length),
.vertex_length = vertex_shader_paths_length,
.fragment_length = fragment_shader_paths_length,
};
void * rmmio = map_pci_resource2();
int fd = open("/dev/dri/card0", O_RDWR | O_CLOEXEC);
assert(fd != -1);
const int colorbuffer_size = 1600 * 1200 * 4;
const int floatbuffer_size = floatbuffer_width * floatbuffer_height * 4 * 4;
const int floatbuffer_count = 2;
int colorbuffer_handle[2];
int zbuffer_handle;
int * texturebuffer_handle;
int flush_handle;
int floatbuffer_handle[floatbuffer_count];
void * colorbuffer_ptr[2];
void * floatbuffer_ptr[floatbuffer_count];
void * zbuffer_ptr;
// colorbuffer
colorbuffer_handle[0] = create_buffer(fd, colorbuffer_size, &colorbuffer_ptr[0]);
colorbuffer_handle[1] = create_buffer(fd, colorbuffer_size, &colorbuffer_ptr[1]);
zbuffer_handle = create_buffer(fd, colorbuffer_size, &zbuffer_ptr);
flush_handle = create_flush_buffer(fd);
texturebuffer_handle = load_textures(fd, textures, textures_length);
floatbuffer_handle[0] = create_buffer(fd, floatbuffer_size, &floatbuffer_ptr[0]);
floatbuffer_handle[1] = create_buffer(fd, floatbuffer_size * 4, &floatbuffer_ptr[1]);
fprintf(stderr, "colorbuffer handle[0] %d\n", colorbuffer_handle[0]);
fprintf(stderr, "colorbuffer handle[1] %d\n", colorbuffer_handle[1]);
fprintf(stderr, "floatbuffer handle[0] %d\n", floatbuffer_handle[0]);
fprintf(stderr, "floatbuffer handle[1] %d\n", floatbuffer_handle[1]);
fprintf(stderr, "zbuffer handle %d\n", zbuffer_handle);
int colorbuffer_ix = 0;
floatbuffer_data(floatbuffer_ptr[0], floatbuffer_size);
while (true) {
int ib_dwords = _floatbuffer(shaders,
TEXTUREBUFFER_RELOC_INDEX + 0, // input
COLORBUFFER_RELOC_INDEX); // output
int ret = drm_radeon_cs(fd,
floatbuffer_handle[1],
zbuffer_handle,
flush_handle,
floatbuffer_handle,
floatbuffer_count,
ib_dwords);
if (ret == -1)
break;
break;
}
floatbuffer_compare(floatbuffer_ptr[0], floatbuffer_ptr[1], floatbuffer_size);
close(fd);
}

View File

@ -0,0 +1,10 @@
-- temp[0]: texture coordinate
TEX TEX_SEM_WAIT TEX_SEM_ACQUIRE
temp[0].rgba = LD tex[0].rgba temp[0].rgaa ;
OUT TEX_SEM_WAIT
src0.a = temp[0] ,
src0.rgb = temp[0] :
out[0].a = MAX src0.a src0.a ,
out[0].rgb = MAX src0.rgb src0.rgb ;

Binary file not shown.

View File

@ -0,0 +1,2 @@
out[0].xyzw = VE_ADD input[0].xyzw input[0].0000 ;
out[1].xyzw = VE_ADD input[0].xy00 input[0].0000 ;

Some files were not shown because too many files have changed in this diff Show More