Compare commits

..

35 Commits
wip ... main

Author SHA1 Message Date
614a1d4172 src: add matrix_cubesphere_indexed 2025-11-20 15:09:37 -06:00
443f0f4785 parse_packets: add support for printing the content of type 3 packets 2025-11-20 14:46:18 -06:00
687bc734d9 regs: add display registers, memory controller, pcie 2025-11-15 13:03:09 -06:00
bed4b640ad particle_oriented_animated: improve vertex shader constant comments 2025-11-12 23:19:15 -06:00
fdff78f1ad shadertoy: implement shadertoy_palette_fractal 2025-11-11 16:02:39 -06:00
9e281cba58 assembler/lexer: add support for #include directive 2025-11-11 15:06:34 -06:00
90b486e744 assembler.fs: add support for omod 2025-11-11 14:22:35 -06:00
399cd6aaf9 particle: add colored particle fragment shader 2025-11-10 18:27:46 -06:00
872f0c31a8 particle_oriented_animated_quad_vbuf*: increase particle count 2025-11-09 23:12:20 -06:00
3ebdfda196 particle_oriented_animated_quad_vbuf_pixel_shader: implement particle reset 2025-11-09 22:30:27 -06:00
314267afe1 particle_oriented_animated_quad_vbuf_pixel_shader: use vertex_buffer_copy shader 2025-11-09 21:33:35 -06:00
e622d769a4 particle_oriented_animated_quad_vbuf_pixel_shader: use drm_radeon_cs2 2025-11-09 21:02:37 -06:00
1ec48e6323 add tx_rt_float_4x 2025-11-09 20:39:19 -06:00
69904efe3f particle_oriented_animated_quad_vbuf*: correct vertex buffer size calculation 2025-11-09 19:44:28 -06:00
a4c6f29cb4 add particle_oriented_animated_quad_vbuf_pixel_shader (partially working) 2025-11-09 17:44:48 -06:00
57a62859f3 add particle_oriented_animated_quad_vbuf 2025-11-08 23:55:10 -06:00
e7d571fe6a particle_oriented_animated_fan: use AOS for particle drawing 2025-11-08 23:10:04 -06:00
36cd56a51a add particle_oriented_animated_fan 2025-11-08 21:04:48 -06:00
940b0cd43d add tx_rt_float 2025-11-08 17:45:35 -06:00
0272ee93d0 add tx_rt 2025-11-08 15:46:18 -06:00
c864717deb matrix_cubesphere_tiled: enable tiling 2025-11-08 14:43:10 -06:00
23cafcdb23 rename argb -> rgba 2025-11-08 14:28:49 -06:00
089b126523 matrix_cubesphere: check drm_radeon_cs return value 2025-11-08 14:18:39 -06:00
26800a6d40 src: add particle_oriented_animated 2025-11-06 19:52:10 -06:00
ef291567b8 src: add particle_oriented 2025-11-06 18:03:08 -06:00
6bfb5bdb63 src: add plane 2025-11-06 17:12:40 -06:00
38fa29ca10 matrix_cubesphere_tiled: draw macrotiled texture 2025-11-04 18:42:26 -06:00
0d4e80b03e add matrix_cubesphere_tiled (partially incomplete) 2025-11-04 18:40:05 -06:00
1c1e7483a7 matrix_cubesphere: use separate light shader 2025-11-04 12:57:27 -06:00
ae38604ba3 matrix_cubesphere: rearrange vap_out/ib_rs_instructions 2025-11-04 12:20:08 -06:00
a335a221a4 rename drm2 to src 2025-11-04 12:13:14 -06:00
182a82c306 drm2: more generalized texture loading 2025-11-04 12:12:29 -06:00
e3519d2797 add drm2 2025-11-04 00:00:47 -06:00
fbe23d3506 assembler: add support for binary output 2025-11-03 22:04:20 -06:00
2c62869ccd drm: add matrix_cubesphere_cubemap 2025-11-03 11:14:16 -06:00
155 changed files with 20791 additions and 1041 deletions

5
drm/math/constants.hpp Normal file
View File

@ -0,0 +1,5 @@
#pragma once
#define PI (3.14159274101257324219f)
#define PI_2 (PI * 2.0f)
#define I_PI_2 (1.0f / (PI_2))

View File

@ -1018,7 +1018,7 @@ int main()
assert(texturebuffer_ptr != MAP_FAILED);
// copy texture
void * texture_buf = read_file("../texture/butterfly_1024x1024_argb8888.data");
void * texture_buf = read_file("../texture/butterfly_1024x1024_rgba8888.data");
assert(texture_buf != NULL);
for (int i = 0; i < texture_size / 4; i++) {

View File

@ -1105,7 +1105,7 @@ int main()
assert(texturebuffer_ptr != MAP_FAILED);
// copy texture
void * texture_buf = read_file("../texture/butterfly_1024x1024_argb8888.data");
void * texture_buf = read_file("../texture/butterfly_1024x1024_rgba8888.data");
assert(texture_buf != NULL);
for (int i = 0; i < texture_size / 4; i++) {

View File

@ -1,77 +0,0 @@
0x00007807,
0x02400000,
0xe403f403,
0x00000000,
0x00000000,
0x00000000,
0x00004000,
0x08020001,
0x08020080,
0x00440220,
0x00000011,
0x00000001,
0x00004000,
0x08020080,
0x08020001,
0x00000000,
0x0004c01b,
0x00000000,
0x00003800,
0x08020001,
0x08020001,
0x006d8220,
0x00000000,
0x00490010,
0x00004000,
0xc8000800,
0x08020080,
0x00446223,
0x00000021,
0x00000001,
0x00004000,
0x08020080,
0x08020002,
0x00000000,
0x0004c02b,
0x00000000,
0x00003800,
0x08020002,
0x08020002,
0x006d8220,
0x00000000,
0x00490020,
0x00004000,
0x08000402,
0x08020080,
0x00442220,
0x00000041,
0x00000001,
0x00004000,
0x08020080,
0x08020004,
0x00000000,
0x0080c043,
0x00000000,
0x00004000,
0x08020080,
0x08028004,
0x00000000,
0x00c0c040,
0x1a000000,
0x00078005,
0x08020003,
0x08001003,
0x006da220,
0x0060c003,
0x00492000,

View File

@ -1,16 +0,0 @@
0x00102001, 0x00d10002, 0x00d10001, 0x01ffe001,
0x00202001, 0x00d10022, 0x00d10001, 0x01ffe001,
0x00402001, 0x00d10042, 0x00d10001, 0x01ffe001,
0x00802001, 0x00d10062, 0x00d10001, 0x01ffe001,
0x00104001, 0x00d10082, 0x00d10001, 0x01ffe001,
0x00204001, 0x00d100a2, 0x00d10001, 0x01ffe001,
0x00404001, 0x00d100c2, 0x00d10001, 0x01ffe001,
0x00804001, 0x00d100e2, 0x00d10001, 0x01ffe001,
0x00106001, 0x01110082, 0x01110041, 0x01ffe041,
0x00206001, 0x011100a2, 0x01110041, 0x01ffe041,
0x00406001, 0x011100c2, 0x01110041, 0x01ffe041,
0x00f00203, 0x00d10020, 0x01248002, 0x01ffe002,
0x00f02203, 0x00d10040, 0x01248002, 0x01ffe002,
0x00f04203, 0x01110060, 0x01248002, 0x01ffe002,
0x00f06203, 0x00d10102, 0x01248102, 0x01ffe102,
0x00f08203, 0x01210021, 0x01248002, 0x01ffe002,

View File

@ -7,7 +7,6 @@
#include <string.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>
#include <fcntl.h>
#include <sys/mman.h>
@ -23,26 +22,13 @@
#include "math/float_types.hpp"
#include "math/transform.hpp"
#include "../model/blender_model.h"
#include "../model/pumpkin/pumpkin.h"
#include "../model/model2.h"
#include "../model/cube.h"
#define PI (3.14159274101257324219f)
#define PI_2 (PI * 2.0f)
#define I_PI_2 (1.0f / (PI_2))
struct reloc_indices {
int colorbuffer;
int zbuffer;
int vertexbuffer;
int texturebuffer;
int flush;
};
struct vb_object_offsets {
int start;
int material_index;
};
static inline uint32_t rreg(void * rmmio, uint32_t offset)
{
uint32_t value = *((volatile uint32_t *)(((uintptr_t)rmmio) + offset));
@ -56,7 +42,7 @@ static inline void wreg(void * rmmio, uint32_t offset, uint32_t value)
asm volatile ("" ::: "memory");
}
static void * read_file(const char * filename, int * out_size)
static void * read_file(const char * filename)
{
int fd = open(filename, O_RDONLY);
if (fd == -1) {
@ -87,21 +73,18 @@ static void * read_file(const char * filename, int * out_size)
close(fd);
if (out_size != NULL)
*out_size = size;
return buf;
}
static const uint32_t fragment_shader[] = {
#include "matrix_cubesphere_specular.fs.inc"
#include "matrix_cubesphere_cubemap.fs.inc"
#include "clear.fs.inc"
};
static const int fragment_shader_length = (sizeof (fragment_shader)) / (sizeof (fragment_shader[0]));
static const int fragment_shader_instructions = (fragment_shader_length / 6) - 1;
static const uint32_t vertex_shader[] = {
#include "matrix_cubesphere_specular.vs.inc"
#include "matrix_cubesphere_cubemap.vs.inc"
#include "clear_nop.vs.inc"
};
static const int vertex_shader_length = (sizeof (vertex_shader)) / (sizeof (vertex_shader[0]));
@ -114,7 +97,10 @@ union u32_f32 {
static union u32_f32 ib[16384];
int _3d_clear(int ix, const struct reloc_indices * reloc_indices)
const int triangle_count = cube_model.object[0]->triangle_count;
const int vertex_count = triangle_count * 3;
int _3d_clear(int ix)
{
//////////////////////////////////////////////////////////////////////////////
// ZB
@ -134,7 +120,7 @@ int _3d_clear(int ix, const struct reloc_indices * reloc_indices)
T0V(ZB_DEPTHOFFSET, 0);
T3(_NOP, 0);
ib[ix++].u32 = reloc_indices->zbuffer * 4; // index into relocs array
ib[ix++].u32 = 1 * 4; // index into relocs array
T0V(ZB_DEPTHPITCH
, ZB_DEPTHPITCH__DEPTHPITCH(1600 >> 2)
@ -142,7 +128,7 @@ int _3d_clear(int ix, const struct reloc_indices * reloc_indices)
//| ZB_DEPTHPITCH__DEPTHMICROTILE(1)
);
T3(_NOP, 0);
ib[ix++].u32 = reloc_indices->zbuffer * 4; // index into relocs array
ib[ix++].u32 = 1 * 4; // index into relocs array
//////////////////////////////////////////////////////////////////////////////
// RS
@ -293,59 +279,13 @@ mat4x4 perspective(float low1, float high1,
return m2 * m1;
}
int texture(int ix,
const struct reloc_indices * reloc_indices,
int texture_index)
int _3d_cube_inner(int ix, mat4x4 trans, mat4x4 world_trans, vec4 light_pos)
{
//////////////////////////////////////////////////////////////////////////////
// TX
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_PVS_STATE_FLUSH_REG, 0x00000000);
T0V(TX_INVALTAGS, 0x00000000);
T0V(TX_ENABLE
, TX_ENABLE__TEX_0_ENABLE__ENABLE);
T0V(TX_FILTER0_0
, TX_FILTER0__MAG_FILTER__LINEAR
| TX_FILTER0__MIN_FILTER__LINEAR
T0V(VAP_VTX_SIZE
, VAP_VTX_SIZE__DWORDS_PER_VTX(8)
);
T0V(TX_FILTER1_0
, TX_FILTER1__LOD_BIAS(1)
);
T0V(TX_BORDER_COLOR_0, 0);
T0V(TX_FORMAT0_0
, TX_FORMAT0__TXWIDTH(1024 - 1)
| TX_FORMAT0__TXHEIGHT(1024 - 1)
);
T0V(TX_FORMAT1_0
, TX_FORMAT1__TXFORMAT__TX_FMT_8_8_8_8
| TX_FORMAT1__SEL_ALPHA(5)
| TX_FORMAT1__SEL_RED(0)
| TX_FORMAT1__SEL_GREEN(1)
| TX_FORMAT1__SEL_BLUE(2)
| TX_FORMAT1__TEX_COORD_TYPE__2D
);
T0V(TX_FORMAT2_0, 0);
T0V(TX_OFFSET_0
//, TX_OFFSET__MACRO_TILE(1)
//| TX_OFFSET__MICRO_TILE(1)
, 0
);
T3(_NOP, 0);
ib[ix++].u32 = (reloc_indices->texturebuffer + texture_index) * 4; // index into relocs array
return ix;
}
int aos(int ix,
const struct reloc_indices * reloc_indices,
int start,
int vertex_count)
{
T0V(VAP_INDEX_OFFSET, 0x00000000);
T0V(VAP_VF_MAX_VTX_INDX
, VAP_VF_MAX_VTX_INDX__MAX_INDX(vertex_count - 1)
@ -354,56 +294,9 @@ int aos(int ix,
, VAP_VF_MIN_VTX_INDX__MIN_INDX(0)
);
//////////////////////////////////////////////////////////////////////////////
// AOS
//////////////////////////////////////////////////////////////////////////////
T3(_3D_LOAD_VBPNTR, (6 - 1));
ib[ix++].u32 // VAP_VTX_NUM_ARRAYS
= VAP_VTX_NUM_ARRAYS__VTX_NUM_ARRAYS(3)
| VAP_VTX_NUM_ARRAYS__VC_FORCE_PREFETCH(1)
;
ib[ix++].u32 // VAP_VTX_AOS_ATTR01
= VAP_VTX_AOS_ATTR__VTX_AOS_COUNT0(3)
| VAP_VTX_AOS_ATTR__VTX_AOS_STRIDE0(8)
| VAP_VTX_AOS_ATTR__VTX_AOS_COUNT1(2)
| VAP_VTX_AOS_ATTR__VTX_AOS_STRIDE1(8)
;
ib[ix++].u32 // VAP_VTX_AOS_ADDR0
= (4 * (start + 0));
ib[ix++].u32 // VAP_VTX_AOS_ADDR1
= (4 * (start + 3));
ib[ix++].u32 // VAP_VTX_AOS_ATTR23
= VAP_VTX_AOS_ATTR__VTX_AOS_COUNT0(3)
| VAP_VTX_AOS_ATTR__VTX_AOS_STRIDE0(8)
;
ib[ix++].u32 // VAP_VTX_AOS_ADDR2
= (4 * (start + 5));
// VAP_VTX_AOS_ADDR is an absolute address in VRAM. However, DRM_RADEON_CS
// modifies this to be an offset relative to the GEM buffer handles given via
// NOP:
T3(_NOP, 0);
ib[ix++].u32 = reloc_indices->vertexbuffer * 4; // index into relocs array for VAP_VTX_AOS_ADDR0
T3(_NOP, 0);
ib[ix++].u32 = reloc_indices->vertexbuffer * 4; // index into relocs array for VAP_VTX_AOS_ADDR1
T3(_NOP, 0);
ib[ix++].u32 = reloc_indices->vertexbuffer * 4; // index into relocs array for VAP_VTX_AOS_ADDR2
return ix;
}
int _3d_cube_inner(int ix,
const struct reloc_indices * reloc_indices,
mat4x4 trans, mat4x4 world_trans, vec4 light_pos, vec4 view_pos,
struct vb_object_offsets * object_offsets,
int object_count)
{
//////////////////////////////////////////////////////////////////////////////
// VAP_PVS
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_PVS_STATE_FLUSH_REG, 0x00000000);
const float consts[] = {
// 0
@ -420,9 +313,6 @@ int _3d_cube_inner(int ix,
// 8
light_pos.x, light_pos.y, light_pos.z, light_pos.w,
// 9
view_pos.x, view_pos.y, view_pos.z, view_pos.w,
};
const int consts_length = (sizeof (consts)) / (sizeof (consts[0]));
assert(consts_length % 4 == 0);
@ -449,43 +339,47 @@ int _3d_cube_inner(int ix,
, VAP_PVS_CODE_CNTL_1__PVS_LAST_VTX_SRC_INST((vertex_shader_instructions - 1))
);
for (int object_ix = 0; object_ix < object_count; object_ix++) {
int start = object_offsets[object_ix].start;
int material_index = object_offsets[object_ix].material_index;
int end = object_offsets[object_ix+1].start;
assert(end > 0 && end > start);
int size = end - start;
int vertex_count = size / 5;
//printf("obj_ix %d material_ix %d\n", object_ix, material_index);
ix = texture(ix, reloc_indices, material_index);
ix = aos(ix, reloc_indices, start, vertex_count);
//////////////////////////////////////////////////////////////////////////////
// 3D_DRAW
//////////////////////////////////////////////////////////////////////////////
T3(_3D_DRAW_VBUF_2, (1 - 1));
T3(_3D_DRAW_IMMD_2, (1 + vertex_count * 8) - 1);
ib[ix++].u32
= VAP_VF_CNTL__PRIM_TYPE(4) // triangle list
| VAP_VF_CNTL__PRIM_WALK(2) // vertex list (data fetched from memory)
= VAP_VF_CNTL__PRIM_TYPE(4)
| VAP_VF_CNTL__PRIM_WALK(3)
| VAP_VF_CNTL__INDEX_SIZE(0)
| VAP_VF_CNTL__VTX_REUSE_DIS(0)
| VAP_VF_CNTL__DUAL_INDEX_MODE(0)
| VAP_VF_CNTL__USE_ALT_NUM_VERTS(0)
| VAP_VF_CNTL__NUM_VERTICES(vertex_count)
;
const model * model = &cube_model;
const object * obj = model->object[0];
for (int i = 0; i < triangle_count; i++) {
for (int j = 0; j < 3; j++) {
vec3 p = model->position[obj->triangle[i][j].position];
vec2 t = model->texture[obj->triangle[i][j].texture];
vec3 n = model->normal[obj->triangle[i][j].normal];
ib[ix++].f32 = p.x;
ib[ix++].f32 = p.y;
ib[ix++].f32 = p.z;
ib[ix++].f32 = t.x;
ib[ix++].f32 = t.y;
ib[ix++].f32 = n.x;
ib[ix++].f32 = n.y;
ib[ix++].f32 = n.z;
}
}
return ix;
}
int _3d_cube(int ix,
const struct reloc_indices * reloc_indices,
float theta,
struct vb_object_offsets * object_offsets,
int object_count)
int _3d_cube(int ix, float theta)
{
printf("faces length %d\n", vertex_count);
//////////////////////////////////////////////////////////////////////////////
// ZB
//////////////////////////////////////////////////////////////////////////////
@ -504,7 +398,7 @@ int _3d_cube(int ix,
T0V(ZB_DEPTHOFFSET, 0);
T3(_NOP, 0);
ib[ix++].u32 = reloc_indices->zbuffer * 4; // index into relocs array
ib[ix++].u32 = 1 * 4; // index into relocs array
T0V(ZB_DEPTHPITCH
, ZB_DEPTHPITCH__DEPTHPITCH(1600 >> 2)
@ -512,14 +406,14 @@ int _3d_cube(int ix,
//| ZB_DEPTHPITCH__DEPTHMICROTILE(1)
);
T3(_NOP, 0);
ib[ix++].u32 = reloc_indices->zbuffer * 4; // index into relocs array
ib[ix++].u32 = 1 * 4; // index into relocs array
//////////////////////////////////////////////////////////////////////////////
// RS
//////////////////////////////////////////////////////////////////////////////
T0V(RS_COUNT
, RS_COUNT__IT_COUNT(20)
, RS_COUNT__IT_COUNT(16)
| RS_COUNT__IC_COUNT(0)
| RS_COUNT__W_ADDR(0)
| RS_COUNT__HIRES_EN(1)
@ -552,16 +446,9 @@ int _3d_cube(int ix,
| RS_IP__TEX_PTR_Q(15)
| RS_IP__OFFSET_EN(0)
);
T0V(RS_IP_4
, RS_IP__TEX_PTR_S(16)
| RS_IP__TEX_PTR_T(17)
| RS_IP__TEX_PTR_R(18)
| RS_IP__TEX_PTR_Q(19)
| RS_IP__OFFSET_EN(0)
);
T0V(RS_INST_COUNT
, RS_INST_COUNT__INST_COUNT(4));
, RS_INST_COUNT__INST_COUNT(3));
T0V(RS_INST_0
, RS_INST__TEX_ID(0)
| RS_INST__TEX_CN(1)
@ -582,11 +469,49 @@ int _3d_cube(int ix,
| RS_INST__TEX_CN(1)
| RS_INST__TEX_ADDR(3)
);
T0V(RS_INST_4
, RS_INST__TEX_ID(4)
| RS_INST__TEX_CN(1)
| RS_INST__TEX_ADDR(4)
//////////////////////////////////////////////////////////////////////////////
// TX
//////////////////////////////////////////////////////////////////////////////
T0V(TX_INVALTAGS, 0x00000000);
T0V(TX_ENABLE
, TX_ENABLE__TEX_0_ENABLE__ENABLE);
T0V(TX_FILTER0_0
, TX_FILTER0__CLAMP_S(2)
| TX_FILTER0__CLAMP_T(2)
| TX_FILTER0__MAG_FILTER__LINEAR
| TX_FILTER0__MIN_FILTER__LINEAR
);
T0V(TX_FILTER1_0
, TX_FILTER1__LOD_BIAS(1)
| TX_FILTER1__BORDER_FIX(1)
);
T0V(TX_BORDER_COLOR_0, 0);
T0V(TX_FORMAT0_0
, TX_FORMAT0__TXWIDTH(1024 - 1)
| TX_FORMAT0__TXHEIGHT(1024 - 1)
);
T0V(TX_FORMAT1_0
, TX_FORMAT1__TXFORMAT__TX_FMT_8_8_8_8
| TX_FORMAT1__SEL_ALPHA(5)
| TX_FORMAT1__SEL_RED(0)
| TX_FORMAT1__SEL_GREEN(1)
| TX_FORMAT1__SEL_BLUE(2)
| TX_FORMAT1__TEX_COORD_TYPE__CUBE
);
T0V(TX_FORMAT2_0, 0);
T0V(TX_OFFSET_0
//, TX_OFFSET__MACRO_TILE(1)
//| TX_OFFSET__MICRO_TILE(1)
, 0
);
T3(_NOP, 0);
ib[ix++].u32 = 2 * 4; // index into relocs array
//////////////////////////////////////////////////////////////////////////////
// VAP
@ -648,14 +573,15 @@ int _3d_cube(int ix,
| VAP_PROG_STREAM_CNTL_EXT__WRITE_ENA_0(0b1111) // XYZW
);
T0V(VAP_INDEX_OFFSET, 0x00000000);
T0V(VAP_OUT_VTX_FMT_0
, VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1));
T0V(VAP_OUT_VTX_FMT_1
, VAP_OUT_VTX_FMT_1__TEX_0_COMP_CNT(4)
| VAP_OUT_VTX_FMT_1__TEX_1_COMP_CNT(4)
| VAP_OUT_VTX_FMT_1__TEX_2_COMP_CNT(4)
| VAP_OUT_VTX_FMT_1__TEX_3_COMP_CNT(4)
| VAP_OUT_VTX_FMT_1__TEX_4_COMP_CNT(4));
| VAP_OUT_VTX_FMT_1__TEX_3_COMP_CNT(4));
//////////////////////////////////////////////////////////////////////////////
// GA_US
@ -682,19 +608,17 @@ int _3d_cube(int ix,
mat4x4 aspect = scale(vec3(3.0f/4.0f, 1, 1));
mat4x4 p = perspective(0.01f, 5.0f,
0.001f, 0.999f,
0.5f, 2.0f);
mat4x4 p = perspective(-2.0f, 2.0f,
-0.999f, 0.999f,
-1.999f, 1.999f);
vec4 light_pos = vec4(0, 0, 0, 1.0f);
vec4 view_pos = vec4(0, 0, -3, 1.0f);
// light
if (1) {
if (0) {
mat4x4 t = translate(vec3(0, 0, 3));
mat4x4 t1 = translate(vec3(1, 0, 0));
mat4x4 s = scale(0.05f);
mat4x4 s = scale(0.1f);
mat4x4 rz = rotate_y(theta * 2.f);
mat4x4 world_trans = rz * t1 * s;
@ -705,23 +629,15 @@ int _3d_cube(int ix,
light_pos = world_trans * light_pos;
/*
ix = _3d_cube_inner(ix,
reloc_indices,
trans, world_trans, light_pos, view_pos,
object_offsets,
object_count);
*/
ix = _3d_cube_inner(ix, trans, world_trans, light_pos);
}
// object
if (1) {
mat4x4 t = translate(vec3(0, 0, 3));
mat4x4 rx = rotate_x(1 * theta1 * 0.5f);
//mat4x4 ry = rotate_y(0 * theta2 * 0.8f + 1.4f);
mat4x4 ry = rotate_y(3.1415f * 1.0f);
mat4x4 s = scale(0.5f);
mat4x4 t = translate(vec3(0, 0, 0));
mat4x4 rx = rotate_x(0 * theta1 * 0.5f);
mat4x4 ry = rotate_y(1 * theta2 * 0.8f + 1.4f);
mat4x4 s = scale(1.0f);
mat4x4 world_trans = rx * ry * s;
@ -729,20 +645,13 @@ int _3d_cube(int ix,
mat4x4 trans = aspect * p * t * world_trans;
ix = _3d_cube_inner(ix,
reloc_indices,
trans, world_trans, light_pos, view_pos,
object_offsets,
object_count);
ix = _3d_cube_inner(ix, trans, world_trans, light_pos);
}
return ix;
}
int indirect_buffer(const struct reloc_indices * reloc_indices,
float theta,
struct vb_object_offsets * object_offsets,
int object_count)
int indirect_buffer(float theta)
{
int ix = 0;
@ -960,7 +869,7 @@ int indirect_buffer(const struct reloc_indices * reloc_indices,
, US_CONFIG__ZERO_TIMES_ANYTHING_EQUALS_ZERO(1)
);
T0V(US_PIXSIZE
, US_PIXSIZE__PIX_SIZE(6)
, US_PIXSIZE__PIX_SIZE(4)
);
T0V(US_FC_CTRL, 0);
@ -986,7 +895,7 @@ int indirect_buffer(const struct reloc_indices * reloc_indices,
, 0x00000000 // value replaced by kernel from relocs
);
T3(_NOP, 0);
ib[ix++].u32 = reloc_indices->colorbuffer * 4; // index into relocs array
ib[ix++].u32 = 0 * 4; // index into relocs array
T0V(RB3D_COLORPITCH0
, RB3D_COLORPITCH__COLORPITCH(1600 >> 1)
@ -995,7 +904,7 @@ int indirect_buffer(const struct reloc_indices * reloc_indices,
// The COLORPITCH NOP is ignored/not applied due to
// RADEON_CS_KEEP_TILING_FLAGS, but is still required.
T3(_NOP, 0);
ib[ix++].u32 = reloc_indices->colorbuffer * 4; // index into relocs array
ib[ix++].u32 = 0 * 4; // index into relocs array
//////////////////////////////////////////////////////////////////////////////
// SC
@ -1028,9 +937,9 @@ int indirect_buffer(const struct reloc_indices * reloc_indices,
// VAP_PVS
//////////////////////////////////////////////////////////////////////////////
//printf("vs length %d\n", vertex_shader_length);
printf("vs length %d\n", vertex_shader_length);
assert(vertex_shader_length % 4 == 0);
//printf("vs instructions %d\n", vertex_shader_instructions);
printf("vs instructions %d\n", vertex_shader_instructions);
T0V(VAP_PVS_VECTOR_INDX_REG
, VAP_PVS_VECTOR_INDX_REG__OCTWORD_OFFSET(0)
@ -1044,9 +953,9 @@ int indirect_buffer(const struct reloc_indices * reloc_indices,
// GA_US
//////////////////////////////////////////////////////////////////////////////
//printf("fs length %d\n", fragment_shader_length);
printf("fs length %d\n", fragment_shader_length);
assert(fragment_shader_length % 6 == 0);
//printf("fs instructions %d\n", fragment_shader_instructions);
printf("fs instructions %d\n", fragment_shader_instructions);
T0V(GA_US_VECTOR_INDEX, 0x00000000);
T0_ONE_REG(GA_US_VECTOR_DATA, fragment_shader_length - 1);
@ -1058,12 +967,8 @@ int indirect_buffer(const struct reloc_indices * reloc_indices,
// DRAW
//////////////////////////////////////////////////////////////////////////////
ix = _3d_clear(ix, reloc_indices);
ix = _3d_cube(ix,
reloc_indices,
theta,
object_offsets,
object_count);
ix = _3d_clear(ix);
ix = _3d_cube(ix, theta);
//////////////////////////////////////////////////////////////////////////////
// padding
@ -1127,138 +1032,6 @@ int create_colorbuffer(int fd, int colorbuffer_size, void ** out_ptr)
return args.handle;
}
int fill_vertexbuffer(void * ptr, int size,
struct vb_object_offsets ** offsets_out)
{
float * fptr = (float *)ptr;
int ix = 0;
const int object_count = (sizeof (objects)) / (sizeof (objects[0]));
// FIXME: iterate through meshes, not objects
struct vb_object_offsets * offsets =
(struct vb_object_offsets *)calloc((sizeof (struct vb_object_offsets)), object_count + 1);
for (int object_ix = 0; object_ix < object_count; object_ix++) {
const struct mesh * mesh = objects[object_ix].mesh;
offsets[object_ix].start = ix;
printf("fill vertexbuffer: object_ix %d polygons %d\n",
object_ix, mesh->polygons_length);
assert(mesh->uv_layers_length == 1);
const vec2 * uvmap = mesh->uv_layers[0];
const vec3 * position = mesh->position;
const vec3 * normal = mesh->normal;
int last_mat_ix = -1;
for (int polygon_ix = 0; polygon_ix < mesh->polygons_length; polygon_ix++) {
int uv_ix = polygon_ix * 3;
const struct polygon * polygon = &mesh->polygons[polygon_ix];
const vec3 * ap = &position[polygon->a];
const vec3 * bp = &position[polygon->b];
const vec3 * cp = &position[polygon->c];
const vec2 * at = &uvmap[uv_ix + 0];
const vec2 * bt = &uvmap[uv_ix + 1];
const vec2 * ct = &uvmap[uv_ix + 2];
const vec3 * an = &normal[polygon->a];
const vec3 * bn = &normal[polygon->b];
const vec3 * cn = &normal[polygon->c];
if (last_mat_ix != polygon->material_index) {
printf("new material: object_ix %d material_index %d\n",
object_ix, polygon->material_index);
offsets[object_ix].material_index = polygon->material_index;
last_mat_ix = polygon->material_index;
}
//assert(polygon->material_index == object_ix);
assert((ix + (8 * 3)) < (size / 4));
fptr[ix++] = ap->x;
fptr[ix++] = ap->y;
fptr[ix++] = ap->z;
fptr[ix++] = at->x;
fptr[ix++] = 1.0f - at->y;
fptr[ix++] = an->x;
fptr[ix++] = an->y;
fptr[ix++] = an->z;
fptr[ix++] = bp->x;
fptr[ix++] = bp->y;
fptr[ix++] = bp->z;
fptr[ix++] = bt->x;
fptr[ix++] = 1.0f - bt->y;
fptr[ix++] = bn->x;
fptr[ix++] = bn->y;
fptr[ix++] = bn->z;
fptr[ix++] = cp->x;
fptr[ix++] = cp->y;
fptr[ix++] = cp->z;
fptr[ix++] = ct->x;
fptr[ix++] = 1.0f - ct->y;
fptr[ix++] = cn->x;
fptr[ix++] = cn->y;
fptr[ix++] = cn->z;
}
}
offsets[object_count].start = ix;
printf("fill vertexbuffer: dwords %d size %d\n", ix, (int)(ix * (sizeof (float))));
*offsets_out = offsets;
asm volatile("" ::: "memory");
return object_count;
}
static int * load_textures(int fd, int * length_out)
{
int * texture_handles = NULL;
const int materials_length = (sizeof (materials)) / (sizeof (materials[0]));
texture_handles = (int *)calloc((sizeof (int)), materials_length);
char buf[512];
for (int i = 0; i < materials_length; i++) {
assert(strlen(materials[i].name) < 300);
snprintf(buf, (sizeof (buf)), "../model/pumpkin/textures/%s.data", materials[i].name);
int size = 0;
void * file_ptr = read_file(buf, &size);
assert(file_ptr != NULL);
assert(size > 0);
printf("load %s %d\n", buf, size);
assert(materials[i].texture_id < materials_length);
assert(texture_handles[materials[i].texture_id] == 0);
void * buffer_ptr;
int handle = create_colorbuffer(fd, size, &buffer_ptr);
for (int i = 0; i < size / 4; i++) {
((uint32_t *)buffer_ptr)[i] = ((uint32_t *)file_ptr)[i];
}
asm volatile ("" ::: "memory");
free(file_ptr);
munmap(buffer_ptr, size);
texture_handles[materials[i].texture_id] = handle;
}
assert(length_out != NULL);
*length_out = materials_length;
return texture_handles;
}
int main()
{
//////////////////////////////////////////////////////////////////////////////
@ -1281,34 +1054,55 @@ int main()
int ret;
int fd = open("/dev/dri/card0", O_RDWR | O_CLOEXEC);
const int vertexbuffer_size = 4 * 1024 * 1024;
const int colorbuffer_size = 1600 * 1200 * 4;
int colorbuffer_handle[2];
int zbuffer_handle;
int vertexbuffer_handle;
int texturebuffer_handle;
int flush_handle;
void * colorbuffer_ptr[2];
void * zbuffer_ptr;
void * vertexbuffer_ptr;
void * texturebuffer_ptr;
// colorbuffer
colorbuffer_handle[0] = create_colorbuffer(fd, colorbuffer_size, &colorbuffer_ptr[0]);
colorbuffer_handle[1] = create_colorbuffer(fd, colorbuffer_size, &colorbuffer_ptr[1]);
zbuffer_handle = create_colorbuffer(fd, colorbuffer_size, &zbuffer_ptr);
vertexbuffer_handle = create_colorbuffer(fd, vertexbuffer_size, &vertexbuffer_ptr);
fprintf(stderr, "colorbuffer handle[0] %d\n", colorbuffer_handle[0]);
fprintf(stderr, "colorbuffer handle[1] %d\n", colorbuffer_handle[1]);
fprintf(stderr, "zbuffer handle %d\n", zbuffer_handle);
struct vb_object_offsets * object_offsets = NULL;
int object_count = fill_vertexbuffer(vertexbuffer_ptr, vertexbuffer_size, &object_offsets);
munmap(vertexbuffer_ptr, vertexbuffer_size);
fprintf(stderr, "object count %d\n", object_count);
// texture
{
const int texture_size = 1024 * 1024 * 4;
int texture_handles_length = 0;
int * texture_handles = load_textures(fd, &texture_handles_length);
// copy cubemap textures
const char * textures[] = {
"../texture/cubemap_right_1024x1024_rgba8888.data",
"../texture/cubemap_left_1024x1024_rgba8888.data",
"../texture/cubemap_top_1024x1024_rgba8888.data",
"../texture/cubemap_bottom_1024x1024_rgba8888.data",
"../texture/cubemap_front_1024x1024_rgba8888.data",
"../texture/cubemap_back_1024x1024_rgba8888.data",
};
const int texturebuffer_size = texture_size * 6;
texturebuffer_handle = create_colorbuffer(fd, texturebuffer_size, &texturebuffer_ptr);
for (int i = 0; i < 6; i++) {
void * texture_buf = read_file(textures[i]);
assert(texture_buf != NULL);
for (int j = 0; j < texture_size / 4; j++) {
int texture_ix = i * (texture_size / 4) + j;
((uint32_t*)texturebuffer_ptr)[texture_ix] = ((uint32_t*)texture_buf)[j];
}
asm volatile ("" ::: "memory");
free(texture_buf);
}
munmap(texturebuffer_ptr, texture_size);
}
// flush
{
@ -1336,61 +1130,36 @@ int main()
int colorbuffer_ix = 0;
float theta = 3.14 / 4.0;
int relocs_length = 4 + texture_handles_length;
struct drm_radeon_cs_reloc * relocs =
(struct drm_radeon_cs_reloc *)calloc((sizeof (struct drm_radeon_cs_reloc)), relocs_length);
int relocs_size = (sizeof (struct drm_radeon_cs_reloc)) * relocs_length;
const struct reloc_indices reloc_indices = {
.colorbuffer = 0,
.zbuffer = 1,
.vertexbuffer = 2,
.texturebuffer = 3,
.flush = relocs_length - 1,
};
relocs[reloc_indices.zbuffer] = (struct drm_radeon_cs_reloc){
.handle = zbuffer_handle,
.read_domains = 4, // RADEON_GEM_DOMAIN_VRAM
.write_domain = 4, // RADEON_GEM_DOMAIN_VRAM
.flags = 8,
};
relocs[reloc_indices.vertexbuffer] = (struct drm_radeon_cs_reloc){
.handle = vertexbuffer_handle,
.read_domains = 4, // RADEON_GEM_DOMAIN_VRAM
.write_domain = 4, // RADEON_GEM_DOMAIN_VRAM
.flags = 8,
};
relocs[reloc_indices.flush] = (struct drm_radeon_cs_reloc){
.handle = flush_handle,
.read_domains = 2, // RADEON_GEM_DOMAIN_GTT
.write_domain = 2, // RADEON_GEM_DOMAIN_GTT
.flags = 0,
};
for (int i = 0; i < texture_handles_length; i++) {
relocs[reloc_indices.texturebuffer + i] = (struct drm_radeon_cs_reloc){
.handle = texture_handles[i],
.read_domains = 4, // RADEON_GEM_DOMAIN_VRAM
.write_domain = 4, // RADEON_GEM_DOMAIN_VRAM
.flags = 8,
};
}
int ib_dwords = indirect_buffer(theta);
while (true) {
relocs[reloc_indices.colorbuffer] = (struct drm_radeon_cs_reloc){
struct drm_radeon_cs_reloc relocs[] = {
{
.handle = colorbuffer_handle[colorbuffer_ix],
.read_domains = 4, // RADEON_GEM_DOMAIN_VRAM
.write_domain = 4, // RADEON_GEM_DOMAIN_VRAM
.flags = 8,
},
{
.handle = zbuffer_handle,
.read_domains = 4, // RADEON_GEM_DOMAIN_VRAM
.write_domain = 4, // RADEON_GEM_DOMAIN_VRAM
.flags = 8,
},
{
.handle = texturebuffer_handle,
.read_domains = 4, // RADEON_GEM_DOMAIN_VRAM
.write_domain = 4, // RADEON_GEM_DOMAIN_VRAM
.flags = 8,
},
{
.handle = flush_handle,
.read_domains = 2, // RADEON_GEM_DOMAIN_GTT
.write_domain = 2, // RADEON_GEM_DOMAIN_GTT
.flags = 0,
}
};
int ib_dwords = indirect_buffer(&reloc_indices,
theta,
object_offsets,
object_count);
struct drm_radeon_cs_chunk chunks[3] = {
{
.chunk_id = RADEON_CHUNK_ID_IB,
@ -1399,7 +1168,7 @@ int main()
},
{
.chunk_id = RADEON_CHUNK_ID_RELOCS,
.length_dw = relocs_size / (sizeof (uint32_t)),
.length_dw = (sizeof (relocs)) / (sizeof (uint32_t)),
.chunk_data = (uint64_t)(uintptr_t)relocs,
},
{
@ -1440,15 +1209,9 @@ int main()
#define D1GRPH_UPDATE__D1GRPH_SURFACE_UPDATE_PENDING (1 << 2)
uint32_t d1crtc_double_buffer_control = rreg(rmmio, D1CRTC_DOUBLE_BUFFER_CONTROL);
//printf("D1CRTC_DOUBLE_BUFFER_CONTROL: %08x\n", d1crtc_double_buffer_control);
printf("D1CRTC_DOUBLE_BUFFER_CONTROL: %08x\n", d1crtc_double_buffer_control);
assert(d1crtc_double_buffer_control == (1 << 8));
const struct timespec duration = {
.tv_sec = 0,
.tv_nsec = 16670000
};
nanosleep(&duration, NULL);
// addresses were retrieved from /sys/kernel/debug/radeon_vram_mm
//
// This assumes GEM buffer allocation always starts from the lowest
@ -1462,8 +1225,12 @@ int main()
while ((rreg(rmmio, D1GRPH_UPDATE) & D1GRPH_UPDATE__D1GRPH_SURFACE_UPDATE_PENDING) != 0);
// next state
theta += 0.005f;
theta += 0.01f;
colorbuffer_ix = (colorbuffer_ix + 1) & 1;
// next indirect buffer
ib_dwords = indirect_buffer(theta);
}
{

View File

@ -0,0 +1,15 @@
-- temp[0] -- position (world space)
-- temp[1] -- normal
-- temp[2] -- light pos (world space)
-- temp[3] -- texture
-- PIXSIZE 4
TEX TEX_SEM_WAIT TEX_SEM_ACQUIRE
temp[3].rgba = LD tex[0].rgba temp[0].rgba ;
OUT TEX_SEM_WAIT
src0.rgb = temp[3] ,
src0.a = temp[3] :
out[0].a = MAX src0.a src0.a ,
out[0].rgb = MAX src0.rgb src0.rgb ;

View File

@ -0,0 +1,42 @@
--
-- dot(m[0], v), dot(m[1], v), dot(m[2], v), dot(m[3], v)
--
-- input[0] -- position
-- input[1] -- texture
-- input[2] -- normal
-- consts[0] -- trans
-- consts[4] -- world_trans
-- consts[8] -- normal_trans
-- out[0] -- position clip space
-- out[1] -- texture
-- out[2] -- normal
-- out[3] -- object position world space
-- out[4] -- light position world space
-- position clip space
temp[1].x = VE_DOT const[0].xyzw input[0].xyzw ;
temp[1].y = VE_DOT const[1].xyzw input[0].xyzw ;
temp[1].z = VE_DOT const[2].xyzw input[0].xyzw ;
temp[1].w = VE_DOT const[3].xyzw input[0].xyzw ;
-- position world space
temp[2].xyzw = VE_ADD input[0].xyzw input[0].0000 ;
-- normal world space
temp[3].x = VE_DOT const[4].xyz0 input[2].xyz0 ;
temp[3].y = VE_DOT const[5].xyz0 input[2].xyz0 ;
temp[3].z = VE_DOT const[6].xyz0 input[2].xyz0 ;
-- position (clip space)
out[0].xyzw = VE_ADD temp[1].xyzw const[0].0000 ;
-- position (world space)
out[1].xyzw = VE_ADD temp[2].xyzw const[0].0000 ;
-- normal
out[2].xyzw = VE_ADD temp[3].xyz0 const[0].0000 ;
-- light pos (world space)
out[3].xyzw = VE_ADD const[8].xyzw const[8].0000 ;
-- texture
out[4].xyzw = VE_ADD input[1].xy00 const[0].0000 ;

View File

@ -1126,7 +1126,7 @@ int main()
assert(texturebuffer_ptr != MAP_FAILED);
// copy texture
void * texture_buf = read_file("../texture/butterfly_1024x1024_argb8888.data");
void * texture_buf = read_file("../texture/butterfly_1024x1024_rgba8888.data");
assert(texture_buf != NULL);
for (int i = 0; i < texture_size / 4; i++) {

View File

@ -121,5 +121,4 @@ OUT TEX_SEM_WAIT
src0.rgb = temp[4] ,
src1.a = temp[6] :
out[0].a = MAX src1.1 src0.1 ,
--out[0].rgb = MAD src0.rgb src1.aaa src2.000 ;
out[0].rgb = MAD src0.111 src1.111 src2.000 ;
out[0].rgb = MAD src0.rgb src1.aaa src2.000 ;

View File

@ -1,168 +0,0 @@
0x00007807,
0x02400000,
0xe404f404,
0x00000000,
0x00000000,
0x00000000,
0x00004000,
0x08020001,
0x08020080,
0x00440220,
0x00000011,
0x00000001,
0x00004000,
0x08020080,
0x08020001,
0x00000000,
0x0004c01b,
0x00000000,
0x00003800,
0x08020001,
0x08020001,
0x006d8220,
0x00000000,
0x00490010,
0x00003800,
0x08000002,
0x08020080,
0x004406d8,
0x00000000,
0x00a21020,
0x00004000,
0x08020002,
0x08020080,
0x00440220,
0x00000021,
0x00000001,
0x00004000,
0x08020080,
0x08020002,
0x00000000,
0x0004c02b,
0x00000000,
0x00003800,
0x08020002,
0x08020002,
0x006d8220,
0x00000000,
0x00490020,
0x00003800,
0x08000003,
0x08020080,
0x004406d8,
0x00000000,
0x00a21030,
0x00004000,
0x08020003,
0x08020080,
0x00440220,
0x00000031,
0x00000001,
0x00004000,
0x08020080,
0x08020003,
0x00000000,
0x0004c03b,
0x00000000,
0x00003800,
0x08020003,
0x08020003,
0x006d8220,
0x00000000,
0x00490030,
0x00004000,
0x08000402,
0x08020080,
0x00442a20,
0x00000051,
0x00000001,
0x00004000,
0x08020080,
0x08030005,
0x00000000,
0x0068c050,
0x20000000,
0x00003800,
0x00200480,
0x08020005,
0x00442b6c,
0x00000000,
0x00a22050,
0x00004000,
0x08001403,
0x08020080,
0x00442220,
0x00000051,
0x00000001,
0x00004000,
0x08020080,
0x08020005,
0x00000000,
0x0080c053,
0x00000000,
0x00004000,
0x08020080,
0x08020005,
0x00000000,
0x0000c059,
0x00000000,
0x00004000,
0x08020080,
0x08038005,
0x00000000,
0x0068c050,
0x22000000,
0x00004000,
0x08020080,
0x08020005,
0x00000000,
0x0000c058,
0x00000000,
0x00004000,
0x08000402,
0x08020080,
0x00442220,
0x00000061,
0x00000001,
0x00004000,
0x08020080,
0x08020006,
0x00000000,
0x0080c063,
0x00000000,
0x00004000,
0x08020080,
0x80624005,
0x00000000,
0x00c8f060,
0x1c000000,
0x00078005,
0x08020004,
0x08001880,
0x006da220,
0x00c19003,
0x00492000,

View File

@ -1,17 +0,0 @@
0x00102001, 0x00d10002, 0x00d10001, 0x01ffe001,
0x00202001, 0x00d10022, 0x00d10001, 0x01ffe001,
0x00402001, 0x00d10042, 0x00d10001, 0x01ffe001,
0x00802001, 0x00d10062, 0x00d10001, 0x01ffe001,
0x00104001, 0x00d10082, 0x00d10001, 0x01ffe001,
0x00204001, 0x00d100a2, 0x00d10001, 0x01ffe001,
0x00404001, 0x00d100c2, 0x00d10001, 0x01ffe001,
0x00804001, 0x00d100e2, 0x00d10001, 0x01ffe001,
0x00106001, 0x01110082, 0x01110041, 0x01ffe041,
0x00206001, 0x011100a2, 0x01110041, 0x01ffe041,
0x00406001, 0x011100c2, 0x01110041, 0x01ffe041,
0x00f00203, 0x00d10020, 0x01248002, 0x01ffe002,
0x00f02203, 0x00d10040, 0x01248002, 0x01ffe002,
0x00f04203, 0x01110060, 0x01248002, 0x01ffe002,
0x00f06203, 0x00d10102, 0x01248102, 0x01ffe102,
0x00f08203, 0x00d10122, 0x01248122, 0x01ffe122,
0x00f0a203, 0x01210021, 0x01248002, 0x01ffe002,

View File

@ -1188,7 +1188,7 @@ int main()
assert(texturebuffer_ptr != MAP_FAILED);
// copy texture
void * texture_buf = read_file("../texture/butterfly_1024x1024_argb8888.data");
void * texture_buf = read_file("../texture/butterfly_1024x1024_rgba8888.data");
assert(texture_buf != NULL);
for (int i = 0; i < texture_size / 4; i++) {

View File

@ -19,9 +19,7 @@
#include "3d_registers_bits.h"
#include "command_processor.h"
#include "math/float_types.hpp"
#include "math/transform.hpp"
#include "../model/vec.h"
#include "../model/blender_model.h"
#include "../model/pumpkin/pumpkin.h"
@ -93,14 +91,14 @@ static void * read_file(const char * filename, int * out_size)
}
static const uint32_t fragment_shader[] = {
#include "matrix_cubesphere_specular.fs.inc"
#include "texture_cube.fs.inc"
#include "clear.fs.inc"
};
static const int fragment_shader_length = (sizeof (fragment_shader)) / (sizeof (fragment_shader[0]));
static const int fragment_shader_instructions = (fragment_shader_length / 6) - 1;
static const uint32_t vertex_shader[] = {
#include "matrix_cubesphere_specular.vs.inc"
#include "pumpkin_man.vs.inc"
#include "clear_nop.vs.inc"
};
static const int vertex_shader_length = (sizeof (vertex_shader)) / (sizeof (vertex_shader[0]));
@ -208,7 +206,7 @@ int _3d_clear(int ix, const struct reloc_indices * reloc_indices)
T0V(VAP_PROG_STREAM_CNTL_EXT_0
, VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_X_0__SELECT_X
| VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_Y_0__SELECT_Y
| VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_Z_0__SELECT_FP_ONE
| VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_Z_0__SELECT_FP_ZERO
| VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_W_0__SELECT_FP_ONE
| VAP_PROG_STREAM_CNTL_EXT__WRITE_ENA_0(0b1111)
);
@ -323,31 +321,38 @@ int aos(int ix,
int start,
int vertex_count)
{
T0V(VAP_VTX_SIZE
, VAP_VTX_SIZE__DWORDS_PER_VTX(5)
);
T0V(VAP_INDEX_OFFSET, 0x00000000);
T0V(VAP_VF_MAX_VTX_INDX
, VAP_VF_MAX_VTX_INDX__MAX_INDX(vertex_count - 1)
);
T0V(VAP_VF_MIN_VTX_INDX
, VAP_VF_MIN_VTX_INDX__MIN_INDX(0)
);
//////////////////////////////////////////////////////////////////////////////
// AOS
//////////////////////////////////////////////////////////////////////////////
T3(_3D_LOAD_VBPNTR, (6 - 1));
T3(_3D_LOAD_VBPNTR, (4 - 1));
ib[ix++].u32 // VAP_VTX_NUM_ARRAYS
= VAP_VTX_NUM_ARRAYS__VTX_NUM_ARRAYS(3)
= VAP_VTX_NUM_ARRAYS__VTX_NUM_ARRAYS(2)
| VAP_VTX_NUM_ARRAYS__VC_FORCE_PREFETCH(1)
;
ib[ix++].u32 // VAP_VTX_AOS_ATTR01
= VAP_VTX_AOS_ATTR__VTX_AOS_COUNT0(3)
| VAP_VTX_AOS_ATTR__VTX_AOS_STRIDE0(8)
| VAP_VTX_AOS_ATTR__VTX_AOS_STRIDE0(5)
| VAP_VTX_AOS_ATTR__VTX_AOS_COUNT1(2)
| VAP_VTX_AOS_ATTR__VTX_AOS_STRIDE1(8)
| VAP_VTX_AOS_ATTR__VTX_AOS_STRIDE1(5)
;
ib[ix++].u32 // VAP_VTX_AOS_ADDR0
= (4 * (start + 0));
ib[ix++].u32 // VAP_VTX_AOS_ADDR1
= (4 * (start + 3));
ib[ix++].u32 // VAP_VTX_AOS_ATTR23
= VAP_VTX_AOS_ATTR__VTX_AOS_COUNT0(3)
| VAP_VTX_AOS_ATTR__VTX_AOS_STRIDE0(8)
;
ib[ix++].u32 // VAP_VTX_AOS_ADDR2
= (4 * (start + 5));
// VAP_VTX_AOS_ADDR is an absolute address in VRAM. However, DRM_RADEON_CS
// modifies this to be an offset relative to the GEM buffer handles given via
@ -357,90 +362,15 @@ int aos(int ix,
ib[ix++].u32 = reloc_indices->vertexbuffer * 4; // index into relocs array for VAP_VTX_AOS_ADDR0
T3(_NOP, 0);
ib[ix++].u32 = reloc_indices->vertexbuffer * 4; // index into relocs array for VAP_VTX_AOS_ADDR1
T3(_NOP, 0);
ib[ix++].u32 = reloc_indices->vertexbuffer * 4; // index into relocs array for VAP_VTX_AOS_ADDR2
return ix;
}
int load_consts(int ix,
const mat4x4& trans,
const mat4x4& world_trans,
const vec4& light_pos,
const vec4& view_pos)
{
//////////////////////////////////////////////////////////////////////////////
// VAP_PVS
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_PVS_STATE_FLUSH_REG, 0x00000000);
const float consts[] = {
// 0
trans[0][0], trans[0][1], trans[0][2], trans[0][3],
trans[1][0], trans[1][1], trans[1][2], trans[1][3],
trans[2][0], trans[2][1], trans[2][2], trans[2][3],
trans[3][0], trans[3][1], trans[3][2], trans[3][3],
// 4
world_trans[0][0], world_trans[0][1], world_trans[0][2], world_trans[0][3],
world_trans[1][0], world_trans[1][1], world_trans[1][2], world_trans[1][3],
world_trans[2][0], world_trans[2][1], world_trans[2][2], world_trans[2][3],
world_trans[3][0], world_trans[3][1], world_trans[3][2], world_trans[3][3],
// 8
light_pos.x, light_pos.y, light_pos.z, light_pos.w,
// 9
view_pos.x, view_pos.y, view_pos.z, view_pos.w,
};
const int consts_length = (sizeof (consts)) / (sizeof (consts[0]));
assert(consts_length % 4 == 0);
T0V(VAP_PVS_CONST_CNTL
, VAP_PVS_CONST_CNTL__PVS_CONST_BASE_OFFSET(0)
| VAP_PVS_CONST_CNTL__PVS_MAX_CONST_ADDR((consts_length / 4) - 1)
);
T0V(VAP_PVS_VECTOR_INDX_REG
, VAP_PVS_VECTOR_INDX_REG__OCTWORD_OFFSET(1024)
);
T0_ONE_REG(VAP_PVS_VECTOR_DATA_REG_128, (consts_length - 1));
for (int i = 0; i < consts_length; i++)
ib[ix++].f32 = consts[i];
return ix;
}
mat4x4 perspective(float low1, float high1,
float low2, float high2,
float low3, float high3)
{
float scale2 = (high2 - low2) / (high1 - low1);
float scale3 = (high3 - low3) / (high1 - low1);
mat4x4 m1 = mat4x4(1, 0, 0, 0,
0, 1, 0, 0,
0, 0, 1, -low1,
0, 0, 0, 1
);
mat4x4 m2 = mat4x4(1, 0, 0, 0,
0, 1, 0, 0,
0, 0, scale2, low2,
0, 0, scale3, low3
);
return m2 * m1;
}
int _3d_object(int ix,
const struct reloc_indices * reloc_indices,
float theta,
struct vb_object_offsets * object_offsets,
int object_count,
int vertex_shader_instructions)
int object_count)
{
//////////////////////////////////////////////////////////////////////////////
// ZB
@ -451,7 +381,7 @@ int _3d_object(int ix,
| ZB_CNTL__ZWRITEENABLE__ENABLE // 1
);
T0V(ZB_ZSTENCILCNTL
, ZB_ZSTENCILCNTL__ZFUNC__ALWAYS//(1) // less than
, ZB_ZSTENCILCNTL__ZFUNC(5) // greater than
);
T0V(ZB_FORMAT
@ -474,74 +404,63 @@ int _3d_object(int ix,
// RS
//////////////////////////////////////////////////////////////////////////////
T0V(RS_COUNT
, RS_COUNT__IT_COUNT(20)
| RS_COUNT__IC_COUNT(0)
| RS_COUNT__W_ADDR(0)
| RS_COUNT__HIRES_EN(1)
);
T0V(RS_IP_0
, RS_IP__TEX_PTR_S(0)
| RS_IP__TEX_PTR_T(1)
| RS_IP__TEX_PTR_R(2)
| RS_IP__TEX_PTR_Q(3)
| RS_IP__COL_PTR(0)
| RS_IP__COL_FMT(0)
| RS_IP__OFFSET_EN(0)
);
T0V(RS_IP_1
, RS_IP__TEX_PTR_S(4)
| RS_IP__TEX_PTR_T(5)
| RS_IP__TEX_PTR_R(6)
| RS_IP__TEX_PTR_Q(7)
| RS_IP__OFFSET_EN(0)
T0V(RS_COUNT
, RS_COUNT__IT_COUNT(4)
| RS_COUNT__IC_COUNT(0)
| RS_COUNT__W_ADDR(0)
| RS_COUNT__HIRES_EN(1)
);
T0V(RS_IP_2
, RS_IP__TEX_PTR_S(8)
| RS_IP__TEX_PTR_T(9)
| RS_IP__TEX_PTR_R(10)
| RS_IP__TEX_PTR_Q(11)
| RS_IP__OFFSET_EN(0)
);
T0V(RS_IP_3
, RS_IP__TEX_PTR_S(12)
| RS_IP__TEX_PTR_T(13)
| RS_IP__TEX_PTR_R(14)
| RS_IP__TEX_PTR_Q(15)
| RS_IP__OFFSET_EN(0)
);
T0V(RS_IP_4
, RS_IP__TEX_PTR_S(16)
| RS_IP__TEX_PTR_T(17)
| RS_IP__TEX_PTR_R(18)
| RS_IP__TEX_PTR_Q(19)
| RS_IP__OFFSET_EN(0)
);
T0V(RS_INST_COUNT
, RS_INST_COUNT__INST_COUNT(4));
T0V(RS_INST_COUNT, 0x00000000);
T0V(RS_INST_0
, RS_INST__TEX_ID(0)
| RS_INST__TEX_CN(1)
| RS_INST__TEX_ADDR(0)
);
T0V(RS_INST_1
, RS_INST__TEX_ID(1)
| RS_INST__TEX_CN(1)
| RS_INST__TEX_ADDR(1)
//////////////////////////////////////////////////////////////////////////////
// VAP_PVS
//////////////////////////////////////////////////////////////////////////////
float theta1 = 3.2 * 3.14f / 2;
//float theta2 = 3.14f * theta;
float theta2 = 2 * 3.14f / 2 + theta;
const float consts[] = {
I_PI_2, 0.5f, PI_2, -PI,
theta1, theta2, 0.1f, 0.5f,
3.0f/4.0f, 0.0f, 0.0f, 0.0f,
};
int consts_length = (sizeof (consts)) / (sizeof (consts[0]));
assert((consts_length % 4) == 0);
T0V(VAP_PVS_CONST_CNTL
, VAP_PVS_CONST_CNTL__PVS_CONST_BASE_OFFSET(0)
| VAP_PVS_CONST_CNTL__PVS_MAX_CONST_ADDR(consts_length / 4)
);
T0V(RS_INST_2
, RS_INST__TEX_ID(2)
| RS_INST__TEX_CN(1)
| RS_INST__TEX_ADDR(2)
T0V(VAP_PVS_VECTOR_INDX_REG
, VAP_PVS_VECTOR_INDX_REG__OCTWORD_OFFSET(1024)
);
T0V(RS_INST_3
, RS_INST__TEX_ID(3)
| RS_INST__TEX_CN(1)
| RS_INST__TEX_ADDR(3)
T0_ONE_REG(VAP_PVS_VECTOR_DATA_REG_128, (consts_length - 1));
for (int i = 0; i < consts_length; i++)
ib[ix++].f32 = consts[i];
T0V(VAP_PVS_CODE_CNTL_0
, VAP_PVS_CODE_CNTL_0__PVS_FIRST_INST(0)
| VAP_PVS_CODE_CNTL_0__PVS_XYZW_VALID_INST((vertex_shader_instructions - 1))
| VAP_PVS_CODE_CNTL_0__PVS_LAST_INST((vertex_shader_instructions - 1))
);
T0V(RS_INST_4
, RS_INST__TEX_ID(4)
| RS_INST__TEX_CN(1)
| RS_INST__TEX_ADDR(4)
T0V(VAP_PVS_CODE_CNTL_1
, VAP_PVS_CODE_CNTL_1__PVS_LAST_VTX_SRC_INST((vertex_shader_instructions - 1))
);
//////////////////////////////////////////////////////////////////////////////
@ -557,10 +476,10 @@ int _3d_object(int ix,
| VAP_VTE_CNTL__VPORT_X_OFFSET_ENA(1)
| VAP_VTE_CNTL__VPORT_Y_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_Y_OFFSET_ENA(1)
| VAP_VTE_CNTL__VPORT_Z_SCALE_ENA(0)
| VAP_VTE_CNTL__VPORT_Z_OFFSET_ENA(0)
| VAP_VTE_CNTL__VPORT_Z_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_Z_OFFSET_ENA(1)
| VAP_VTE_CNTL__VTX_XY_FMT(0)
| VAP_VTE_CNTL__VTX_Z_FMT(1)
| VAP_VTE_CNTL__VTX_Z_FMT(0)
| VAP_VTE_CNTL__VTX_W0_FMT(1)
| VAP_VTE_CNTL__SERIAL_PROC_ENA(0)
);
@ -575,7 +494,7 @@ int _3d_object(int ix,
| VAP_PROG_STREAM_CNTL__DATA_TYPE_1__FLOAT_2
| VAP_PROG_STREAM_CNTL__SKIP_DWORDS_1(0)
| VAP_PROG_STREAM_CNTL__DST_VEC_LOC_1(1)
| VAP_PROG_STREAM_CNTL__LAST_VEC_1(0)
| VAP_PROG_STREAM_CNTL__LAST_VEC_1(1)
);
T0V(VAP_PROG_STREAM_CNTL_EXT_0
, VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_X_0__SELECT_X
@ -590,28 +509,10 @@ int _3d_object(int ix,
| VAP_PROG_STREAM_CNTL_EXT__WRITE_ENA_1(0b1111) // XYZW
);
T0V(VAP_PROG_STREAM_CNTL_1
, VAP_PROG_STREAM_CNTL__DATA_TYPE_0__FLOAT_3
| VAP_PROG_STREAM_CNTL__SKIP_DWORDS_0(0)
| VAP_PROG_STREAM_CNTL__DST_VEC_LOC_0(2)
| VAP_PROG_STREAM_CNTL__LAST_VEC_0(1)
);
T0V(VAP_PROG_STREAM_CNTL_EXT_1
, VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_X_0__SELECT_X
| VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_Y_0__SELECT_Y
| VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_Z_0__SELECT_Z
| VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_W_0__SELECT_FP_ONE
| VAP_PROG_STREAM_CNTL_EXT__WRITE_ENA_0(0b1111) // XYZW
);
T0V(VAP_OUT_VTX_FMT_0
, VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1));
T0V(VAP_OUT_VTX_FMT_1
, VAP_OUT_VTX_FMT_1__TEX_0_COMP_CNT(4)
| VAP_OUT_VTX_FMT_1__TEX_1_COMP_CNT(4)
| VAP_OUT_VTX_FMT_1__TEX_2_COMP_CNT(4)
| VAP_OUT_VTX_FMT_1__TEX_3_COMP_CNT(4)
| VAP_OUT_VTX_FMT_1__TEX_4_COMP_CNT(4));
, VAP_OUT_VTX_FMT_1__TEX_0_COMP_CNT(4));
//////////////////////////////////////////////////////////////////////////////
// GA_US
@ -629,62 +530,6 @@ int _3d_object(int ix,
| US_CODE_ADDR__END_ADDR(fragment_shader_instructions - 1)
);
float theta1 = theta;
float theta2 = theta;
mat4x4 aspect = scale(vec3(3.0f/4.0f, 1, 1));
mat4x4 p = perspective(0.01f, 5.0f,
0.001f, 0.999f,
0.5f, 2.0f);
vec4 light_pos = vec4(0, 0, 0, 1.0f);
{
//mat4x4 t = translate(vec3(0, 0, 3));
mat4x4 t1 = translate(vec3(1, 0, 0));
mat4x4 s = scale(0.05f);
mat4x4 rz = rotate_y(theta * 2.f);
mat4x4 world_trans = rz * t1 * s;
//mat3x3 normal_trans = transpose(inverse(submatrix(world_trans, 3, 3)));
//mat4x4 trans = aspect * p * t * world_trans;
light_pos = world_trans * light_pos;
}
vec4 view_pos = vec4(0, 0, -3, 1.0f);
mat4x4 t = translate(vec3(0, 0, 3));
mat4x4 rx = rotate_x(1 * theta1 * 0.5f);
//mat4x4 ry = rotate_y(0 * theta2 * 0.8f + 1.4f);
mat4x4 ry = rotate_y(3.1415f * 1.0f);
mat4x4 s = scale(0.5f);
mat4x4 world_trans = rx * ry * s;
//mat3x3 normal_trans = transpose(inverse(submatrix(world_trans, 3, 3)));
mat4x4 trans = aspect * p * t * world_trans;
printf("load consts %d\n", vertex_shader_instructions);
ix = load_consts(ix,
trans,
world_trans,
light_pos,
view_pos);
T0V(VAP_PVS_CODE_CNTL_0
, VAP_PVS_CODE_CNTL_0__PVS_FIRST_INST(0)
| VAP_PVS_CODE_CNTL_0__PVS_XYZW_VALID_INST((vertex_shader_instructions - 1))
| VAP_PVS_CODE_CNTL_0__PVS_LAST_INST((vertex_shader_instructions - 1))
);
T0V(VAP_PVS_CODE_CNTL_1
, VAP_PVS_CODE_CNTL_1__PVS_LAST_VTX_SRC_INST((vertex_shader_instructions - 1))
);
for (int object_ix = 0; object_ix < object_count; object_ix++) {
int start = object_offsets[object_ix].start;
int material_index = object_offsets[object_ix].material_index;
@ -693,8 +538,8 @@ int _3d_object(int ix,
int size = end - start;
int vertex_count = size / 5;
//printf("object_ix %d start %d end %d vertex_count %d\n",
//object_ix, start, end, vertex_count);
printf("object_ix %d start %d end %d vertex_count %d\n",
object_ix, start, end, vertex_count);
ix = texture(ix, reloc_indices, material_index);
ix = aos(ix, reloc_indices, start, vertex_count);
@ -939,7 +784,7 @@ int indirect_buffer(const struct reloc_indices * reloc_indices,
, US_CONFIG__ZERO_TIMES_ANYTHING_EQUALS_ZERO(1)
);
T0V(US_PIXSIZE
, US_PIXSIZE__PIX_SIZE(6)
, US_PIXSIZE__PIX_SIZE(1)
);
T0V(US_FC_CTRL, 0);
@ -1007,9 +852,9 @@ int indirect_buffer(const struct reloc_indices * reloc_indices,
// VAP_PVS
//////////////////////////////////////////////////////////////////////////////
//printf("vs length %d\n", vertex_shader_length);
printf("vs length %d\n", vertex_shader_length);
assert(vertex_shader_length % 4 == 0);
//printf("vs instructions %d\n", vertex_shader_instructions);
printf("vs instructions %d\n", vertex_shader_instructions);
T0V(VAP_PVS_VECTOR_INDX_REG
, VAP_PVS_VECTOR_INDX_REG__OCTWORD_OFFSET(0)
@ -1023,9 +868,9 @@ int indirect_buffer(const struct reloc_indices * reloc_indices,
// GA_US
//////////////////////////////////////////////////////////////////////////////
//printf("fs length %d\n", fragment_shader_length);
printf("fs length %d\n", fragment_shader_length);
assert(fragment_shader_length % 6 == 0);
//printf("fs instructions %d\n", fragment_shader_instructions);
printf("fs instructions %d\n", fragment_shader_instructions);
T0V(GA_US_VECTOR_INDEX, 0x00000000);
T0_ONE_REG(GA_US_VECTOR_DATA, fragment_shader_length - 1);
@ -1040,8 +885,7 @@ int indirect_buffer(const struct reloc_indices * reloc_indices,
ix = _3d_clear(ix, reloc_indices);
ix = _3d_object(ix, reloc_indices, theta,
object_offsets,
object_count,
vertex_shader_instructions);
object_count);
//////////////////////////////////////////////////////////////////////////////
// padding
@ -1114,8 +958,7 @@ int fill_vertexbuffer(void * ptr, int size,
const int object_count = (sizeof (objects)) / (sizeof (objects[0]));
// FIXME: iterate through meshes, not objects
struct vb_object_offsets * offsets =
(struct vb_object_offsets *)calloc((sizeof (struct vb_object_offsets)), object_count + 1);
struct vb_object_offsets * offsets = calloc((sizeof (struct vb_object_offsets)), object_count + 1);
for (int object_ix = 0; object_ix < object_count; object_ix++) {
const struct mesh * mesh = objects[object_ix].mesh;
@ -1128,15 +971,11 @@ int fill_vertexbuffer(void * ptr, int size,
assert(mesh->uv_layers_length == 1);
const vec2 * uvmap = mesh->uv_layers[0];
const vec3 * position = mesh->position;
const vec3 * normal = mesh->normal;
int last_mat_ix = -1;
for (int polygon_ix = 0; polygon_ix < mesh->polygons_length; polygon_ix++) {
printf("[%d] position_length %d normal_length %d\n",
polygon_ix, mesh->position_length, mesh->normal_length);
int uv_ix = polygon_ix * 3;
const struct polygon * polygon = &mesh->polygons[polygon_ix];
const vec3 * ap = &position[polygon->a];
@ -1145,9 +984,6 @@ int fill_vertexbuffer(void * ptr, int size,
const vec2 * at = &uvmap[uv_ix + 0];
const vec2 * bt = &uvmap[uv_ix + 1];
const vec2 * ct = &uvmap[uv_ix + 2];
const vec3 * an = &normal[polygon->a];
const vec3 * bn = &normal[polygon->b];
const vec3 * cn = &normal[polygon->c];
if (last_mat_ix != polygon->material_index) {
printf("new material: object_ix %d material_index %d\n",
@ -1157,34 +993,25 @@ int fill_vertexbuffer(void * ptr, int size,
}
//assert(polygon->material_index == object_ix);
assert((ix + (8 * 3)) < (size / 4));
assert((ix + (5 * 3)) < (size / 4));
fptr[ix++] = ap->x;
fptr[ix++] = ap->y;
fptr[ix++] = ap->z;
fptr[ix++] = at->x;
fptr[ix++] = 1.0f - at->y;
fptr[ix++] = an->x;
fptr[ix++] = an->y;
fptr[ix++] = an->z;
fptr[ix++] = bp->x;
fptr[ix++] = bp->y;
fptr[ix++] = bp->z;
fptr[ix++] = bt->x;
fptr[ix++] = 1.0f - bt->y;
fptr[ix++] = bn->x;
fptr[ix++] = bn->y;
fptr[ix++] = bn->z;
fptr[ix++] = cp->x;
fptr[ix++] = cp->y;
fptr[ix++] = cp->z;
fptr[ix++] = ct->x;
fptr[ix++] = 1.0f - ct->y;
fptr[ix++] = cn->x;
fptr[ix++] = cn->y;
fptr[ix++] = cn->z;
}
}
@ -1264,7 +1091,7 @@ int main()
int fd = open("/dev/dri/card0", O_RDWR | O_CLOEXEC);
const int colorbuffer_size = 1600 * 1200 * 4;
const int vertexbuffer_size = 4 * 1024 * 1024;
const int vertexbuffer_size = 1 * 1024 * 1024;
int colorbuffer_handle[2];
int zbuffer_handle;
@ -1322,8 +1149,7 @@ int main()
float theta = 0;
int relocs_length = 4 + texture_handles_length;
struct drm_radeon_cs_reloc * relocs =
(struct drm_radeon_cs_reloc *)calloc((sizeof (struct drm_radeon_cs_reloc)), relocs_length);
struct drm_radeon_cs_reloc * relocs = calloc((sizeof (struct drm_radeon_cs_reloc)), relocs_length);
int relocs_size = (sizeof (struct drm_radeon_cs_reloc)) * relocs_length;
const struct reloc_indices reloc_indices = {
@ -1424,8 +1250,7 @@ int main()
#define D1GRPH_UPDATE__D1GRPH_SURFACE_UPDATE_PENDING (1 << 2)
uint32_t d1crtc_double_buffer_control = rreg(rmmio, D1CRTC_DOUBLE_BUFFER_CONTROL);
//printf("D1CRTC_DOUBLE_BUFFER_CONTROL: %08x\n", d1crtc_double_buffer_control);
//printf("here\n");
printf("D1CRTC_DOUBLE_BUFFER_CONTROL: %08x\n", d1crtc_double_buffer_control);
assert(d1crtc_double_buffer_control == (1 << 8));
// addresses were retrieved from /sys/kernel/debug/radeon_vram_mm
@ -1443,6 +1268,8 @@ int main()
// next state
theta += 0.01f;
colorbuffer_ix = (colorbuffer_ix + 1) & 1;
break;
}
{

View File

@ -304,18 +304,18 @@ int indirect_buffer(float time)
| SC_SCISSOR0__YS0(0)
);
T0V(SC_SCISSOR1
, SC_SCISSOR1__XS1(1600 - 1)
| SC_SCISSOR1__YS1(1200 - 1)
, SC_SCISSOR1__XS1(800 - 1)
| SC_SCISSOR1__YS1(600 - 1)
);
//////////////////////////////////////////////////////////////////////////////
// VAP
//////////////////////////////////////////////////////////////////////////////
T0Vf(VAP_VPORT_XSCALE, 800.0f);
T0Vf(VAP_VPORT_XOFFSET, 800.0f);
T0Vf(VAP_VPORT_YSCALE, -600.0f);
T0Vf(VAP_VPORT_YOFFSET, 600.0f);
T0Vf(VAP_VPORT_XSCALE, 400.0f);
T0Vf(VAP_VPORT_XOFFSET, 400.0f);
T0Vf(VAP_VPORT_YSCALE, -300.0f);
T0Vf(VAP_VPORT_YOFFSET, 300.0f);
T0Vf(VAP_VPORT_ZSCALE, 0.5f);
T0Vf(VAP_VPORT_ZOFFSET, 0.5f);
@ -409,7 +409,7 @@ int indirect_buffer(float time)
};
const int vertex_shader_length = (sizeof (vertex_shader)) / (sizeof (vertex_shader[0]));
assert(vertex_shader_length % 4 == 0);
printf("vs length %d\n", vertex_shader_length);
//printf("vs length %d\n", vertex_shader_length);
T0_ONE_REG(VAP_PVS_VECTOR_DATA_REG_128, vertex_shader_length - 1);
for (int i = 0; i < vertex_shader_length; i++) {
@ -449,10 +449,17 @@ int indirect_buffer(float time)
// fragment constants
#define PI (3.14159274101257324219f)
#define PI_2 (PI * 2.0f)
#define I_PI_2 (1.0f / (PI_2))
const float fragment_consts[] = {
time, 0, 0, 0,
time, 1.2, 0.01, 0.4,
PI_2, I_PI_2, 0, 0,
0.25, 0.40625, 0.5625, 0,
};
int fragment_consts_length = (sizeof (fragment_consts)) / (sizeof (fragment_consts[0]));
assert(fragment_consts_length % 4 == 0);
T0V(GA_US_VECTOR_INDEX
, GA_US_VECTOR_INDEX__INDEX(0)
@ -465,16 +472,16 @@ int indirect_buffer(float time)
// fragment code
const uint32_t fragment_shader[] = {
#include "shadertoy_palette.fs.inc"
#include "shadertoy_palette_fractal.fs.inc"
};
const int fragment_shader_length = (sizeof (fragment_shader)) / (sizeof (fragment_shader[0]));
assert(fragment_shader_length % 6 == 0);
printf("fs length %d\n", fragment_shader_length);
//printf("fs length %d\n", fragment_shader_length);
const int fragment_shader_instructions = fragment_shader_length / 6;
printf("fs instructions %d\n", fragment_shader_instructions);
//printf("fs instructions %d\n", fragment_shader_instructions);
T0V(US_PIXSIZE
, US_PIXSIZE__PIX_SIZE(2) // pixel shader stack frame size
, US_PIXSIZE__PIX_SIZE(3) // pixel shader stack frame size
);
T0V(US_CODE_RANGE
@ -513,7 +520,7 @@ int indirect_buffer(float time)
-1.0f, 1.0f, 0.0f
};
const int vertices_length = (sizeof (vertices)) / (sizeof (vertices[0]));
printf("vtx length %d\n", vertices_length);
//printf("vtx length %d\n", vertices_length);
T3(_3D_DRAW_IMMD_2, (1 + vertices_length) - 1);
ib[ix++].u32
= VAP_VF_CNTL__PRIM_TYPE(4)
@ -708,7 +715,7 @@ int main()
#define D1GRPH_UPDATE__D1GRPH_SURFACE_UPDATE_PENDING (1 << 2)
uint32_t d1crtc_double_buffer_control = rreg(rmmio, D1CRTC_DOUBLE_BUFFER_CONTROL);
printf("D1CRTC_DOUBLE_BUFFER_CONTROL: %08x\n", d1crtc_double_buffer_control);
//printf("D1CRTC_DOUBLE_BUFFER_CONTROL: %08x\n", d1crtc_double_buffer_control);
assert(d1crtc_double_buffer_control == (1 << 8));
// addresses were retrieved from /sys/kernel/debug/radeon_vram_mm

View File

@ -0,0 +1,37 @@
-- CONST[0] = { time, 1.2, 0.01, 0.4 }
-- CONST[1] = { PI_2, I_PI_2, 0, 0 },
-- CONST[2] = { 0.25, 0.40625, 0.5625, 0 },
-- temp[0] : { uv0.xy , _, l }
-- temp[1] : { uv.xy , _, d }
-- temp[2] : final_color.xyzw
-- temp[3] : {col.xyz , i }
-- vec2 uv = uv0; // temp[1]
src0.rgb = temp[0] : -- uv0
temp[1].rg = MAX src0.rg_ src0.rg_ ;
-- vec4 final_color = vec4(0, 0, 0, 1);
:
temp[2].a = MAX src0.1 src0.1 ,
temp[2].rgb = MAX src0.000 src0.000 ;
-- i = 0;
:
temp[3].a = MAX src0.0 src0.0 ;
--------------------------------------------------------------------------------
-- loop start
--------------------------------------------------------------------------------
#include "shadertoy_palette_fractal_loop_inner.fs.asm"
#include "shadertoy_palette_fractal_loop_inner.fs.asm"
#include "shadertoy_palette_fractal_loop_inner.fs.asm"
#include "shadertoy_palette_fractal_loop_inner.fs.asm"
--------------------------------------------------------------------------------
-- loop end
--------------------------------------------------------------------------------
OUT TEX_SEM_WAIT
src0.rgb = temp[2] :
out[0].a = MAX src0.1 src0.1 ,
out[0].rgb = MAX src0.rgb src0.rgb ;

View File

@ -0,0 +1,924 @@
0x00001800,
0x08020000,
0x08020080,
0x00e40720,
0x00000000,
0x00000015,
0x00007800,
0x08020080,
0x08020080,
0x00920490,
0x00c18023,
0x00000025,
0x00004000,
0x08020080,
0x08020080,
0x00000000,
0x00810033,
0x00000000,
0x00001800,
0x08020001,
0x080200bc,
0x00ed8720,
0x00000000,
0x00790010,
0x00001800,
0x08020001,
0x08020080,
0x00000720,
0x00000000,
0x00000019,
0x00001800,
0x08020001,
0x0802c080,
0x00fb0720,
0x00000000,
0x00f6d010,
0x00004000,
0x08020000,
0x08020080,
0x00840420,
0x00000001,
0x00000001,
0x00004000,
0x08020080,
0x08020000,
0x00000000,
0x0004c00b,
0x00000000,
0x00004000,
0x08020080,
0x08020000,
0x00000000,
0x0000c00a,
0x00000000,
0x00004000,
0x08020080,
0x00300100,
0x00000000,
0x0060e010,
0x1a000000,
0x00004000,
0x08020100,
0x08000500,
0x00000000,
0x00600010,
0x1a000000,
0x00003800,
0x8802c102,
0x08020001,
0x006d86d8,
0x00000000,
0x00223030,
0x00003800,
0x08020003,
0x08020080,
0x00000220,
0x00000000,
0x00000039,
0x00003800,
0x0802c003,
0x08020080,
0x004406d8,
0x00000000,
0x00a21030,
0x00000800,
0x08020003,
0x08020080,
0x00000000,
0x0000000d,
0x0000003a,
0x00001000,
0x08020003,
0x08020080,
0x00000000,
0x0000400d,
0x0000003a,
0x00002000,
0x08020003,
0x08020080,
0x00000000,
0x0000800d,
0x0000003a,
0x00003800,
0x0802c003,
0x08020080,
0x00440221,
0x00000000,
0x00221030,
0x00004000,
0x08020080,
0x08020000,
0x00000000,
0x0002c018,
0x00000000,
0x00004000,
0x08020001,
0x08020080,
0x00840420,
0x00000001,
0x00000001,
0x00004000,
0x08020080,
0x08020000,
0x00000000,
0x0004c00b,
0x00000000,
0x00004000,
0x08020080,
0x08020000,
0x00000000,
0x0000c00a,
0x00000000,
0x00004000,
0x08020080,
0x08000400,
0x00000000,
0x0068c010,
0x20000000,
0x00004000,
0x10020080,
0x08034001,
0x00000000,
0x0068c010,
0x04000000,
0x00004000,
0x08040480,
0x0b020001,
0x00000000,
0x0028c010,
0x1c000000,
0x00004000,
0x08020080,
0x08020001,
0x00000000,
0x0000c017,
0x00000000,
0x00004000,
0x08020080,
0x0802c001,
0x00000000,
0x00618010,
0x5a000000,
0x00004000,
0x08020080,
0x08020001,
0x00000000,
0x1800c01c,
0x00000000,
0x00004000,
0x08020080,
0x08020001,
0x00000000,
0x0004c01a,
0x00000000,
0x00004000,
0x08040080,
0x08020001,
0x00000000,
0x0048c010,
0x20000000,
0x00004000,
0x08020080,
0x08020001,
0x00000000,
0x0000c019,
0x00000000,
0x00004000,
0x08040080,
0x08020001,
0x00000000,
0x0028c010,
0x20000000,
0x00004000,
0x08020080,
0x08020001,
0x00000000,
0x0000c018,
0x00000000,
0x00003800,
0x00220003,
0x08000480,
0x006da220,
0x00000000,
0x00222020,
0x00004000,
0x08020080,
0x08020003,
0x00000000,
0x00618030,
0x30000000,
0x00001800,
0x08020001,
0x080200bc,
0x00ed8720,
0x00000000,
0x00790010,
0x00001800,
0x08020001,
0x08020080,
0x00000720,
0x00000000,
0x00000019,
0x00001800,
0x08020001,
0x0802c080,
0x00fb0720,
0x00000000,
0x00f6d010,
0x00004000,
0x08020000,
0x08020080,
0x00840420,
0x00000001,
0x00000001,
0x00004000,
0x08020080,
0x08020000,
0x00000000,
0x0004c00b,
0x00000000,
0x00004000,
0x08020080,
0x08020000,
0x00000000,
0x0000c00a,
0x00000000,
0x00004000,
0x08020080,
0x00300100,
0x00000000,
0x0060e010,
0x1a000000,
0x00004000,
0x08020100,
0x08000500,
0x00000000,
0x00600010,
0x1a000000,
0x00003800,
0x8802c102,
0x08020001,
0x006d86d8,
0x00000000,
0x00223030,
0x00003800,
0x08020003,
0x08020080,
0x00000220,
0x00000000,
0x00000039,
0x00003800,
0x0802c003,
0x08020080,
0x004406d8,
0x00000000,
0x00a21030,
0x00000800,
0x08020003,
0x08020080,
0x00000000,
0x0000000d,
0x0000003a,
0x00001000,
0x08020003,
0x08020080,
0x00000000,
0x0000400d,
0x0000003a,
0x00002000,
0x08020003,
0x08020080,
0x00000000,
0x0000800d,
0x0000003a,
0x00003800,
0x0802c003,
0x08020080,
0x00440221,
0x00000000,
0x00221030,
0x00004000,
0x08020080,
0x08020000,
0x00000000,
0x0002c018,
0x00000000,
0x00004000,
0x08020001,
0x08020080,
0x00840420,
0x00000001,
0x00000001,
0x00004000,
0x08020080,
0x08020000,
0x00000000,
0x0004c00b,
0x00000000,
0x00004000,
0x08020080,
0x08020000,
0x00000000,
0x0000c00a,
0x00000000,
0x00004000,
0x08020080,
0x08000400,
0x00000000,
0x0068c010,
0x20000000,
0x00004000,
0x10020080,
0x08034001,
0x00000000,
0x0068c010,
0x04000000,
0x00004000,
0x08040480,
0x0b020001,
0x00000000,
0x0028c010,
0x1c000000,
0x00004000,
0x08020080,
0x08020001,
0x00000000,
0x0000c017,
0x00000000,
0x00004000,
0x08020080,
0x0802c001,
0x00000000,
0x00618010,
0x5a000000,
0x00004000,
0x08020080,
0x08020001,
0x00000000,
0x1800c01c,
0x00000000,
0x00004000,
0x08020080,
0x08020001,
0x00000000,
0x0004c01a,
0x00000000,
0x00004000,
0x08040080,
0x08020001,
0x00000000,
0x0048c010,
0x20000000,
0x00004000,
0x08020080,
0x08020001,
0x00000000,
0x0000c019,
0x00000000,
0x00004000,
0x08040080,
0x08020001,
0x00000000,
0x0028c010,
0x20000000,
0x00004000,
0x08020080,
0x08020001,
0x00000000,
0x0000c018,
0x00000000,
0x00003800,
0x00220003,
0x08000480,
0x006da220,
0x00000000,
0x00222020,
0x00004000,
0x08020080,
0x08020003,
0x00000000,
0x00618030,
0x30000000,
0x00001800,
0x08020001,
0x080200bc,
0x00ed8720,
0x00000000,
0x00790010,
0x00001800,
0x08020001,
0x08020080,
0x00000720,
0x00000000,
0x00000019,
0x00001800,
0x08020001,
0x0802c080,
0x00fb0720,
0x00000000,
0x00f6d010,
0x00004000,
0x08020000,
0x08020080,
0x00840420,
0x00000001,
0x00000001,
0x00004000,
0x08020080,
0x08020000,
0x00000000,
0x0004c00b,
0x00000000,
0x00004000,
0x08020080,
0x08020000,
0x00000000,
0x0000c00a,
0x00000000,
0x00004000,
0x08020080,
0x00300100,
0x00000000,
0x0060e010,
0x1a000000,
0x00004000,
0x08020100,
0x08000500,
0x00000000,
0x00600010,
0x1a000000,
0x00003800,
0x8802c102,
0x08020001,
0x006d86d8,
0x00000000,
0x00223030,
0x00003800,
0x08020003,
0x08020080,
0x00000220,
0x00000000,
0x00000039,
0x00003800,
0x0802c003,
0x08020080,
0x004406d8,
0x00000000,
0x00a21030,
0x00000800,
0x08020003,
0x08020080,
0x00000000,
0x0000000d,
0x0000003a,
0x00001000,
0x08020003,
0x08020080,
0x00000000,
0x0000400d,
0x0000003a,
0x00002000,
0x08020003,
0x08020080,
0x00000000,
0x0000800d,
0x0000003a,
0x00003800,
0x0802c003,
0x08020080,
0x00440221,
0x00000000,
0x00221030,
0x00004000,
0x08020080,
0x08020000,
0x00000000,
0x0002c018,
0x00000000,
0x00004000,
0x08020001,
0x08020080,
0x00840420,
0x00000001,
0x00000001,
0x00004000,
0x08020080,
0x08020000,
0x00000000,
0x0004c00b,
0x00000000,
0x00004000,
0x08020080,
0x08020000,
0x00000000,
0x0000c00a,
0x00000000,
0x00004000,
0x08020080,
0x08000400,
0x00000000,
0x0068c010,
0x20000000,
0x00004000,
0x10020080,
0x08034001,
0x00000000,
0x0068c010,
0x04000000,
0x00004000,
0x08040480,
0x0b020001,
0x00000000,
0x0028c010,
0x1c000000,
0x00004000,
0x08020080,
0x08020001,
0x00000000,
0x0000c017,
0x00000000,
0x00004000,
0x08020080,
0x0802c001,
0x00000000,
0x00618010,
0x5a000000,
0x00004000,
0x08020080,
0x08020001,
0x00000000,
0x1800c01c,
0x00000000,
0x00004000,
0x08020080,
0x08020001,
0x00000000,
0x0004c01a,
0x00000000,
0x00004000,
0x08040080,
0x08020001,
0x00000000,
0x0048c010,
0x20000000,
0x00004000,
0x08020080,
0x08020001,
0x00000000,
0x0000c019,
0x00000000,
0x00004000,
0x08040080,
0x08020001,
0x00000000,
0x0028c010,
0x20000000,
0x00004000,
0x08020080,
0x08020001,
0x00000000,
0x0000c018,
0x00000000,
0x00003800,
0x00220003,
0x08000480,
0x006da220,
0x00000000,
0x00222020,
0x00004000,
0x08020080,
0x08020003,
0x00000000,
0x00618030,
0x30000000,
0x00001800,
0x08020001,
0x080200bc,
0x00ed8720,
0x00000000,
0x00790010,
0x00001800,
0x08020001,
0x08020080,
0x00000720,
0x00000000,
0x00000019,
0x00001800,
0x08020001,
0x0802c080,
0x00fb0720,
0x00000000,
0x00f6d010,
0x00004000,
0x08020000,
0x08020080,
0x00840420,
0x00000001,
0x00000001,
0x00004000,
0x08020080,
0x08020000,
0x00000000,
0x0004c00b,
0x00000000,
0x00004000,
0x08020080,
0x08020000,
0x00000000,
0x0000c00a,
0x00000000,
0x00004000,
0x08020080,
0x00300100,
0x00000000,
0x0060e010,
0x1a000000,
0x00004000,
0x08020100,
0x08000500,
0x00000000,
0x00600010,
0x1a000000,
0x00003800,
0x8802c102,
0x08020001,
0x006d86d8,
0x00000000,
0x00223030,
0x00003800,
0x08020003,
0x08020080,
0x00000220,
0x00000000,
0x00000039,
0x00003800,
0x0802c003,
0x08020080,
0x004406d8,
0x00000000,
0x00a21030,
0x00000800,
0x08020003,
0x08020080,
0x00000000,
0x0000000d,
0x0000003a,
0x00001000,
0x08020003,
0x08020080,
0x00000000,
0x0000400d,
0x0000003a,
0x00002000,
0x08020003,
0x08020080,
0x00000000,
0x0000800d,
0x0000003a,
0x00003800,
0x0802c003,
0x08020080,
0x00440221,
0x00000000,
0x00221030,
0x00004000,
0x08020080,
0x08020000,
0x00000000,
0x0002c018,
0x00000000,
0x00004000,
0x08020001,
0x08020080,
0x00840420,
0x00000001,
0x00000001,
0x00004000,
0x08020080,
0x08020000,
0x00000000,
0x0004c00b,
0x00000000,
0x00004000,
0x08020080,
0x08020000,
0x00000000,
0x0000c00a,
0x00000000,
0x00004000,
0x08020080,
0x08000400,
0x00000000,
0x0068c010,
0x20000000,
0x00004000,
0x10020080,
0x08034001,
0x00000000,
0x0068c010,
0x04000000,
0x00004000,
0x08040480,
0x0b020001,
0x00000000,
0x0028c010,
0x1c000000,
0x00004000,
0x08020080,
0x08020001,
0x00000000,
0x0000c017,
0x00000000,
0x00004000,
0x08020080,
0x0802c001,
0x00000000,
0x00618010,
0x5a000000,
0x00004000,
0x08020080,
0x08020001,
0x00000000,
0x1800c01c,
0x00000000,
0x00004000,
0x08020080,
0x08020001,
0x00000000,
0x0004c01a,
0x00000000,
0x00004000,
0x08040080,
0x08020001,
0x00000000,
0x0048c010,
0x20000000,
0x00004000,
0x08020080,
0x08020001,
0x00000000,
0x0000c019,
0x00000000,
0x00004000,
0x08040080,
0x08020001,
0x00000000,
0x0028c010,
0x20000000,
0x00004000,
0x08020080,
0x08020001,
0x00000000,
0x0000c018,
0x00000000,
0x00003800,
0x00220003,
0x08000480,
0x006da220,
0x00000000,
0x00222020,
0x00004000,
0x08020080,
0x08020003,
0x00000000,
0x00618030,
0x30000000,
0x00078005,
0x08020002,
0x08020080,
0x00440220,
0x00c18003,
0x00000005,

View File

@ -0,0 +1,143 @@
-- uv = uv * 1.5;
src0.rgb = temp[1] , -- uv
src0.a = float(60) : -- 1.5
temp[1].rg = MAD src0.rg_ src0.aa_ src0.00_ ;
-- uv = fract(uv);
src0.rgb = temp[1] : -- uv
temp[1].rg = FRC src0.rg_ ;
-- uv = uv - 0.5;
src0.rgb = temp[1] ,
src1.a = float(48) : -- 0.5
temp[1].rg = MAD src0.rg_ src0.11_ -src1.aa_ ;
-- l = length(uv0);
src0.rgb = temp[0] : -- uv0
DP3 src0.rg0 src0.rg0 ,
temp[0].a = DP ;
src0.a = temp[0] :
temp[0].a = RSQ |src0.a| ;
src0.a = temp[0] :
temp[0].a = RCP src0.a ;
-- d = i * 0.4 + l;
src0.a = const[0] , -- 0.4
src1.a = temp[0] , -- l
src2.a = temp[3] : -- i
temp[1].a = MAD src2.a src0.a src1.a ;
-- d = time * 0.4 + d;
src0.a = const[0] , -- 0.4
src1.a = temp[1] , -- d
src0.rgb = const[0] : -- time (r)
temp[1].a = MAD src0.r src0.a src1.a ;
--------------------------------------------------------------------------------
-- start of 'palette' function
--------------------------------------------------------------------------------
-- v = d + (vec3(0.25, 0.40625, 0.5625) + 0.5)
src0.a = temp[1] , -- d
src0.rgb = const[2] , -- vec3(0.25, 0.40625, 0.5625)
src1.rgb = float(48) , -- 0.5
srcp.rgb = add : -- (vec3(0.25, 0.40625, 0.5625) + 0.5)
temp[3].rgb = MAD src0.111 src0.aaa srcp.rgb ;
-- v = frac(v)
src0.rgb = temp[3] : -- v
temp[3].rgb = FRC src0.rgb ;
-- v = v - 0.5
src0.rgb = temp[3] , -- v
src1.rgb = float(48) : -- 0.5
temp[3].rgb = MAD src0.111 src0.rgb -src1.rgb ;
-- v = cos(v)
src0.rgb = temp[3] : -- v
COS src0.r ,
temp[3].r = SOP ;
src0.rgb = temp[3] : -- v
COS src0.g ,
temp[3].g = SOP ;
src0.rgb = temp[3] : -- v
COS src0.b ,
temp[3].b = SOP ;
-- col = vec3(0.5, 0.5, 0.5) * v + vec3(0.5, 0.5, 0.5)
src0.rgb = temp[3] , -- v
src1.rgb = float(48) : -- 0.5
temp[3].rgb = MAD src1.rgb src0.rgb src1.rgb;
--------------------------------------------------------------------------------
-- end of 'palette' function
--------------------------------------------------------------------------------
-- d = ex2(-l);
src0.a = temp[0] : -- l
temp[1].a = EX2 -src0.a ;
-- l = length(uv);
src0.rgb = temp[1] : -- uv
DP3 src0.rg0 src0.rg0 ,
temp[0].a = DP ;
src0.a = temp[0] :
temp[0].a = RSQ |src0.a| ;
src0.a = temp[0] :
temp[0].a = RCP src0.a ;
-- d = l * d;
src0.a = temp[0] , -- l
src1.a = temp[1] : -- d
temp[1].a = MAD src0.a src1.a src0.0 ;
-- d = d * 8.0 + time;
src0.a = temp[1] , -- d
src1.a = float(80) , -- 8.0
src2.rgb = const[0] : -- time (r)
temp[1].a = MAD src0.a src1.a src2.r ;
-- d = 0.125 * sin(d); <OMOD>
-- d = d * 0.159154936671257019043 + 0.5; // 48
src0.a = temp[1] , -- d
src1.rgb = const[1] , -- I_PI_2 (g)
src2.a = float(48) : -- 0.5
temp[1].a = MAD src0.a src1.g src2.a ;
-- d = fract(d);
src0.a = temp[1] : -- d
temp[1].a = FRC src0.a ;
-- d = d - 0.5;
src0.a = temp[1] , -- d
src1.a = float(48) : -- 0.5
temp[1].a = MAD src0.1 src0.a -src1.a ;
-- d = 0.125 * sin(d * PI_2);
src0.a = temp[1] :
temp[1].a = 0.125 * SIN src0.a ;
-- d = 1.0 / abs(d);
src0.a = temp[1] : -- d
temp[1].a = RCP |src0.a|;
-- d = 0.01 * d;
src0.a = temp[1] , -- d
src1.rgb = const[0] : -- 0.01 (b)
temp[1].a = MAD src0.a src1.b src0.0 ;
-- d = pow(d, 1.2);
src0.a = temp[1] : -- d
temp[1].a = LN2 src0.a ;
src0.a = temp[1] ,
src1.rgb = const[0] : -- 1.2 (g)
temp[1].a = MAD src0.a src1.g src0.0 ;
src0.a = temp[1] :
temp[1].a = EX2 src0.a ;
-- final_color = col * d + final_color
src0.rgb = temp[3] , -- col
src1.a = temp[1] , -- d
src2.rgb = temp[2] : -- final_color
temp[2].rgb = MAD src0.rgb src1.aaa src2.rgb ;
-- i = i + 1
src0.a = temp[3] :
temp[3].a = MAD src0.1 src0.a src0.1 ;

View File

@ -0,0 +1,224 @@
0x00001800,
0x08020001,
0x080200bc,
0x00ed8720,
0x00000000,
0x00790010,
0x00001800,
0x08020001,
0x08020080,
0x00000720,
0x00000000,
0x00000019,
0x00001800,
0x08020001,
0x0802c080,
0x00fb0720,
0x00000000,
0x00f6d010,
0x00004000,
0x08020000,
0x08020080,
0x00840420,
0x00000001,
0x00000001,
0x00004000,
0x08020080,
0x08020000,
0x00000000,
0x0004c00b,
0x00000000,
0x00004000,
0x08020080,
0x08020000,
0x00000000,
0x0000c00a,
0x00000000,
0x00004000,
0x08020080,
0x00300100,
0x00000000,
0x0060e010,
0x1a000000,
0x00004000,
0x08020100,
0x08000500,
0x00000000,
0x00600010,
0x1a000000,
0x00003800,
0x8802c102,
0x08020001,
0x006d86d8,
0x00000000,
0x00223030,
0x00003800,
0x08020003,
0x08020080,
0x00000220,
0x00000000,
0x00000039,
0x00003800,
0x0802c003,
0x08020080,
0x004406d8,
0x00000000,
0x00a21030,
0x00000800,
0x08020003,
0x08020080,
0x00000000,
0x0000000d,
0x0000003a,
0x00001000,
0x08020003,
0x08020080,
0x00000000,
0x0000400d,
0x0000003a,
0x00002000,
0x08020003,
0x08020080,
0x00000000,
0x0000800d,
0x0000003a,
0x00003800,
0x0802c003,
0x08020080,
0x00440221,
0x00000000,
0x00221030,
0x00004000,
0x08020080,
0x08020000,
0x00000000,
0x0002c018,
0x00000000,
0x00004000,
0x08020001,
0x08020080,
0x00840420,
0x00000001,
0x00000001,
0x00004000,
0x08020080,
0x08020000,
0x00000000,
0x0004c00b,
0x00000000,
0x00004000,
0x08020080,
0x08020000,
0x00000000,
0x0000c00a,
0x00000000,
0x00004000,
0x08020080,
0x08000400,
0x00000000,
0x0068c010,
0x20000000,
0x00004000,
0x10020080,
0x08034001,
0x00000000,
0x0068c010,
0x04000000,
0x00004000,
0x08040480,
0x0b020001,
0x00000000,
0x0028c010,
0x1c000000,
0x00004000,
0x08020080,
0x08020001,
0x00000000,
0x0000c017,
0x00000000,
0x00004000,
0x08020080,
0x0802c001,
0x00000000,
0x00618010,
0x5a000000,
0x00004000,
0x08020080,
0x08020001,
0x00000000,
0x1800c01c,
0x00000000,
0x00004000,
0x08020080,
0x08020001,
0x00000000,
0x0004c01a,
0x00000000,
0x00004000,
0x08040080,
0x08020001,
0x00000000,
0x0048c010,
0x20000000,
0x00004000,
0x08020080,
0x08020001,
0x00000000,
0x0000c019,
0x00000000,
0x00004000,
0x08040080,
0x08020001,
0x00000000,
0x0028c010,
0x20000000,
0x00004000,
0x08020080,
0x08020001,
0x00000000,
0x0000c018,
0x00000000,
0x00003800,
0x00220003,
0x08000480,
0x006da220,
0x00000000,
0x00222020,
0x00004000,
0x08020080,
0x08020003,
0x00000000,
0x00618030,
0x30000000,

View File

@ -646,7 +646,7 @@ int main()
assert(texturebuffer_ptr != MAP_FAILED);
// copy texture
void * texture_buf = read_file("../texture/butterfly_1024x1024_argb8888.data");
void * texture_buf = read_file("../texture/butterfly_1024x1024_rgba8888.data");
assert(texture_buf != NULL);
for (int i = 0; i < texture_size / 4; i++) {

View File

@ -663,7 +663,7 @@ int main()
assert(texturebuffer_ptr != MAP_FAILED);
// copy texture
void * texture_buf = read_file("../texture/butterfly_128x128_argb8888.data");
void * texture_buf = read_file("../texture/butterfly_128x128_rgba8888.data");
assert(texture_buf != NULL);
for (int i = 0; i < texture_size / 4; i++) {

View File

@ -707,7 +707,7 @@ int main()
intermediate_handle[1] = create_colorbuffer(fd, texture_size, NULL);
{
void * texture_buf = read_file("../texture/butterfly_1024x1024_argb8888.data");
void * texture_buf = read_file("../texture/butterfly_1024x1024_rgba8888.data");
assert(texture_buf != NULL);
for (int i = 0; i < texture_size / 4; i++) {
((uint32_t*)texturebuffer_ptr)[i] = ((uint32_t*)texture_buf)[i];

View File

@ -765,7 +765,7 @@ int main()
assert(texturebuffer_ptr != MAP_FAILED);
// copy texture
void * texture_buf = read_file("../texture/butterfly_1024x1024_argb8888.data");
void * texture_buf = read_file("../texture/butterfly_1024x1024_rgba8888.data");
assert(texture_buf != NULL);
for (int i = 0; i < texture_size / 4; i++) {

View File

@ -935,7 +935,7 @@ int main()
assert(texturebuffer_ptr != MAP_FAILED);
// copy texture
void * texture_buf = read_file("../texture/butterfly_1024x1024_argb8888.data");
void * texture_buf = read_file("../texture/butterfly_1024x1024_rgba8888.data");
assert(texture_buf != NULL);
for (int i = 0; i < texture_size / 4; i++) {

View File

@ -991,7 +991,7 @@ int main()
assert(texturebuffer_ptr != MAP_FAILED);
// copy texture
void * texture_buf = read_file("../texture/butterfly_1024x1024_argb8888.data");
void * texture_buf = read_file("../texture/butterfly_1024x1024_rgba8888.data");
assert(texture_buf != NULL);
for (int i = 0; i < texture_size / 4; i++) {

View File

@ -981,7 +981,7 @@ int main()
assert(texturebuffer_ptr != MAP_FAILED);
// copy texture
void * texture_buf = read_file("../texture/butterfly_1024x1024_argb8888.data");
void * texture_buf = read_file("../texture/butterfly_1024x1024_rgba8888.data");
assert(texture_buf != NULL);
for (int i = 0; i < texture_size / 4; i++) {

View File

@ -1040,7 +1040,7 @@ int main()
assert(texturebuffer_ptr != MAP_FAILED);
// copy texture
void * texture_buf = read_file("../texture/butterfly_1024x1024_argb8888.data");
void * texture_buf = read_file("../texture/butterfly_1024x1024_rgba8888.data");
assert(texture_buf != NULL);
for (int i = 0; i < texture_size / 4; i++) {

View File

@ -981,7 +981,7 @@ int main()
assert(texturebuffer_ptr != MAP_FAILED);
// copy texture
void * texture_buf = read_file("../texture/butterfly_1024x1024_argb8888.data");
void * texture_buf = read_file("../texture/butterfly_1024x1024_rgba8888.data");
assert(texture_buf != NULL);
for (int i = 0; i < texture_size / 4; i++) {

View File

@ -990,7 +990,7 @@ int main()
assert(texturebuffer_ptr != MAP_FAILED);
// copy texture
void * texture_buf = read_file("../texture/butterfly_1024x1024_argb8888.data");
void * texture_buf = read_file("../texture/butterfly_1024x1024_rgba8888.data");
assert(texture_buf != NULL);
for (int i = 0; i < texture_size / 4; i++) {

View File

@ -996,7 +996,7 @@ int main()
assert(texturebuffer_ptr != MAP_FAILED);
// copy texture
void * texture_buf = read_file("../texture/butterfly_1024x1024_argb8888.data");
void * texture_buf = read_file("../texture/butterfly_1024x1024_rgba8888.data");
assert(texture_buf != NULL);
for (int i = 0; i < texture_size / 4; i++) {

View File

@ -699,10 +699,10 @@ int main()
const int texture_size = 1024 * 1024 * 4;
texturebuffer_handle[0] = load_texture(fd,
"../texture/butterfly_1024x1024_argb8888.data",
"../texture/butterfly_1024x1024_rgba8888.data",
texture_size);
texturebuffer_handle[1] = load_texture(fd,
"../texture/bird_1024x1024_argb8888.data",
"../texture/bird_1024x1024_rgba8888.data",
texture_size);
{ // clear colorbuffer

View File

@ -972,7 +972,7 @@ int main()
assert(texturebuffer_ptr != MAP_FAILED);
// copy texture
void * texture_buf = read_file("../texture/butterfly_1024x1024_argb8888.data");
void * texture_buf = read_file("../texture/butterfly_1024x1024_rgba8888.data");
assert(texture_buf != NULL);
for (int i = 0; i < texture_size / 4; i++) {

109
model/cube.h Normal file
View File

@ -0,0 +1,109 @@
#pragma once
const vec3 cube_position[] = {
{1.000000f, 1.000000f, -1.000000f},
{1.000000f, -1.000000f, -1.000000f},
{1.000000f, 1.000000f, 1.000000f},
{1.000000f, -1.000000f, 1.000000f},
{-1.000000f, 1.000000f, -1.000000f},
{-1.000000f, -1.000000f, -1.000000f},
{-1.000000f, 1.000000f, 1.000000f},
{-1.000000f, -1.000000f, 1.000000f},
};
const vec2 cube_texture[] = {
{1.000000f, 0.000000f},
{0.000000f, 1.000000f},
{0.000000f, 0.000000f},
{1.000000f, 1.000000f},
};
const vec3 cube_normal[] = {
{-0.0000f, 1.0000f, -0.0000f},
{-0.0000f, -0.0000f, 1.0000f},
{-1.0000f, -0.0000f, -0.0000f},
{-0.0000f, -1.0000f, -0.0000f},
{1.0000f, -0.0000f, -0.0000f},
{-0.0000f, -0.0000f, -1.0000f},
};
const triangle_t cube_Cube_triangle[] = {
{
{4, 0, 0},
{2, 1, 0},
{0, 2, 0},
},
{
{2, 0, 1},
{7, 1, 1},
{3, 2, 1},
},
{
{6, 0, 2},
{5, 1, 2},
{7, 2, 2},
},
{
{1, 0, 3},
{7, 1, 3},
{5, 2, 3},
},
{
{0, 0, 4},
{3, 1, 4},
{1, 2, 4},
},
{
{4, 0, 5},
{1, 1, 5},
{5, 2, 5},
},
{
{4, 0, 0},
{6, 3, 0},
{2, 1, 0},
},
{
{2, 0, 1},
{6, 3, 1},
{7, 1, 1},
},
{
{6, 0, 2},
{4, 3, 2},
{5, 1, 2},
},
{
{1, 0, 3},
{3, 3, 3},
{7, 1, 3},
},
{
{0, 0, 4},
{2, 3, 4},
{3, 1, 4},
},
{
{4, 0, 5},
{0, 3, 5},
{1, 1, 5},
},
};
const object cube_Cube = {
.triangle = &cube_Cube_triangle[0],
.triangle_count = 12,
};
const object * cube_object[] = {
&cube_Cube,
};
const model cube_model = {
.position = cube_position,
.texture = cube_texture,
.normal = cube_normal,
.object = cube_object,
.object_count = 1
};

48
model/cube_indexed.h Normal file
View File

@ -0,0 +1,48 @@
#pragma once
const int cube_Cube_triangles[] = {
0, 1, 2,
3, 4, 5,
6, 7, 8,
9, 10, 11,
12, 13, 14,
15, 16, 17,
0, 18, 1,
3, 19, 4,
6, 20, 7,
9, 21, 10,
12, 22, 13,
15, 23, 16,
};
const int cube_Cube_triangles_length = (sizeof (cube_Cube_triangles)) / (sizeof (cube_Cube_triangles[0]));
const float cube_vertices[] = {
-1.000000f, 1.000000f, -1.000000f, 0.875000f, 0.500000f, -0.0000f, 1.0000f, -0.0000f,
1.000000f, 1.000000f, 1.000000f, 0.625000f, 0.750000f, -0.0000f, 1.0000f, -0.0000f,
1.000000f, 1.000000f, -1.000000f, 0.625000f, 0.500000f, -0.0000f, 1.0000f, -0.0000f,
1.000000f, 1.000000f, 1.000000f, 0.625000f, 0.750000f, -0.0000f, -0.0000f, 1.0000f,
-1.000000f, -1.000000f, 1.000000f, 0.375000f, 1.000000f, -0.0000f, -0.0000f, 1.0000f,
1.000000f, -1.000000f, 1.000000f, 0.375000f, 0.750000f, -0.0000f, -0.0000f, 1.0000f,
-1.000000f, 1.000000f, 1.000000f, 0.625000f, 0.000000f, -1.0000f, -0.0000f, -0.0000f,
-1.000000f, -1.000000f, -1.000000f, 0.375000f, 0.250000f, -1.0000f, -0.0000f, -0.0000f,
-1.000000f, -1.000000f, 1.000000f, 0.375000f, 0.000000f, -1.0000f, -0.0000f, -0.0000f,
1.000000f, -1.000000f, -1.000000f, 0.375000f, 0.500000f, -0.0000f, -1.0000f, -0.0000f,
-1.000000f, -1.000000f, 1.000000f, 0.125000f, 0.750000f, -0.0000f, -1.0000f, -0.0000f,
-1.000000f, -1.000000f, -1.000000f, 0.125000f, 0.500000f, -0.0000f, -1.0000f, -0.0000f,
1.000000f, 1.000000f, -1.000000f, 0.625000f, 0.500000f, 1.0000f, -0.0000f, -0.0000f,
1.000000f, -1.000000f, 1.000000f, 0.375000f, 0.750000f, 1.0000f, -0.0000f, -0.0000f,
1.000000f, -1.000000f, -1.000000f, 0.375000f, 0.500000f, 1.0000f, -0.0000f, -0.0000f,
-1.000000f, 1.000000f, -1.000000f, 0.625000f, 0.250000f, -0.0000f, -0.0000f, -1.0000f,
1.000000f, -1.000000f, -1.000000f, 0.375000f, 0.500000f, -0.0000f, -0.0000f, -1.0000f,
-1.000000f, -1.000000f, -1.000000f, 0.375000f, 0.250000f, -0.0000f, -0.0000f, -1.0000f,
-1.000000f, 1.000000f, 1.000000f, 0.875000f, 0.750000f, -0.0000f, 1.0000f, -0.0000f,
-1.000000f, 1.000000f, 1.000000f, 0.625000f, 1.000000f, -0.0000f, -0.0000f, 1.0000f,
-1.000000f, 1.000000f, -1.000000f, 0.625000f, 0.250000f, -1.0000f, -0.0000f, -0.0000f,
1.000000f, -1.000000f, 1.000000f, 0.375000f, 0.750000f, -0.0000f, -1.0000f, -0.0000f,
1.000000f, 1.000000f, 1.000000f, 0.625000f, 0.750000f, 1.0000f, -0.0000f, -0.0000f,
1.000000f, 1.000000f, -1.000000f, 0.625000f, 0.500000f, -0.0000f, -0.0000f, -1.0000f,
};
const int cube_vertices_length = (sizeof (cube_vertices)) / (sizeof (cube_vertices[0]));

329
model/cubesphere_indexed.h Normal file
View File

@ -0,0 +1,329 @@
#pragma once
const int cubesphere_Cube_triangles[] = {
0, 1, 2,
3, 1, 4,
1, 5, 2,
1, 6, 7,
8, 0, 9,
10, 4, 0,
11, 4, 12,
13, 3, 11,
14, 15, 3,
15, 16, 6,
6, 17, 18,
7, 18, 19,
7, 20, 5,
5, 21, 22,
2, 22, 23,
9, 2, 23,
24, 25, 26,
27, 25, 28,
25, 29, 26,
25, 30, 31,
32, 24, 33,
34, 28, 24,
35, 28, 36,
21, 27, 35,
37, 38, 27,
38, 39, 30,
30, 40, 41,
31, 41, 42,
31, 43, 29,
29, 44, 45,
26, 45, 46,
33, 26, 46,
47, 48, 49,
50, 48, 51,
48, 52, 49,
48, 53, 54,
55, 47, 56,
57, 51, 47,
58, 51, 59,
60, 50, 58,
61, 62, 50,
62, 63, 53,
53, 64, 65,
54, 65, 66,
54, 67, 52,
52, 68, 69,
49, 69, 70,
56, 49, 70,
71, 72, 73,
74, 72, 75,
72, 76, 73,
72, 77, 78,
79, 71, 80,
81, 75, 71,
82, 75, 83,
84, 74, 82,
85, 86, 74,
86, 87, 77,
77, 32, 88,
78, 88, 89,
78, 90, 76,
76, 91, 92,
73, 92, 93,
80, 73, 93,
94, 95, 96,
97, 95, 98,
95, 99, 96,
95, 100, 101,
84, 94, 85,
102, 98, 94,
103, 98, 104,
8, 97, 103,
9, 105, 97,
105, 22, 100,
100, 21, 35,
101, 35, 36,
101, 34, 99,
99, 32, 87,
96, 87, 106,
85, 96, 106,
107, 108, 109,
110, 108, 111,
108, 112, 109,
108, 113, 114,
68, 107, 115,
67, 111, 107,
65, 111, 66,
64, 110, 65,
116, 117, 110,
117, 118, 113,
113, 8, 103,
114, 103, 104,
114, 102, 112,
112, 84, 119,
109, 119, 120,
115, 109, 120,
0, 4, 1,
3, 15, 1,
1, 7, 5,
1, 15, 6,
8, 10, 0,
10, 12, 4,
11, 3, 4,
13, 14, 3,
14, 121, 15,
15, 121, 16,
6, 16, 17,
7, 6, 18,
7, 19, 20,
5, 20, 21,
2, 5, 22,
9, 0, 2,
24, 28, 25,
27, 38, 25,
25, 31, 29,
25, 38, 30,
32, 34, 24,
34, 36, 28,
35, 27, 28,
21, 37, 27,
37, 122, 38,
38, 122, 39,
30, 39, 40,
31, 30, 41,
31, 42, 43,
29, 43, 44,
26, 29, 45,
33, 24, 26,
47, 51, 48,
50, 62, 48,
48, 54, 52,
48, 62, 53,
55, 57, 47,
57, 59, 51,
58, 50, 51,
60, 61, 50,
61, 123, 62,
62, 123, 63,
53, 63, 64,
54, 53, 65,
54, 66, 67,
52, 67, 68,
49, 52, 69,
56, 47, 49,
71, 75, 72,
74, 86, 72,
72, 78, 76,
72, 86, 77,
79, 81, 71,
81, 83, 75,
82, 74, 75,
84, 85, 74,
85, 106, 86,
86, 106, 87,
77, 87, 32,
78, 77, 88,
78, 89, 90,
76, 90, 91,
73, 76, 92,
80, 71, 73,
94, 98, 95,
97, 105, 95,
95, 101, 99,
95, 105, 100,
84, 102, 94,
102, 104, 98,
103, 97, 98,
8, 9, 97,
9, 23, 105,
105, 23, 22,
100, 22, 21,
101, 100, 35,
101, 36, 34,
99, 34, 32,
96, 99, 87,
85, 94, 96,
107, 111, 108,
110, 117, 108,
108, 114, 112,
108, 117, 113,
68, 67, 107,
67, 66, 111,
65, 110, 111,
64, 116, 110,
116, 124, 117,
117, 124, 118,
113, 118, 8,
114, 113, 103,
114, 104, 102,
112, 102, 84,
109, 112, 119,
115, 107, 109,
};
const int cubesphere_Cube_triangles_length = (sizeof (cubesphere_Cube_triangles)) / (sizeof (cubesphere_Cube_triangles[0]));
const float cubesphere_vertices[] = {
0.316157f, 0.728990f, -0.316157f, 0.687500f, 0.562500f, 0.3362f, 0.8797f, -0.3362f,
-0.000000f, 0.839506f, -0.000000f, 0.750000f, 0.625000f, -0.0000f, 1.0000f, -0.0000f,
0.333140f, 0.781829f, 0.000000f, 0.687500f, 0.625000f, 0.3553f, 0.9348f, -0.0000f,
-0.316157f, 0.728990f, -0.316157f, 0.812500f, 0.562500f, -0.3362f, 0.8797f, -0.3362f,
0.000000f, 0.781829f, -0.333140f, 0.750000f, 0.562500f, -0.0000f, 0.9348f, -0.3553f,
0.316157f, 0.728990f, 0.316157f, 0.687500f, 0.687500f, 0.3362f, 0.8797f, 0.3362f,
-0.316157f, 0.728990f, 0.316157f, 0.812500f, 0.687500f, -0.3362f, 0.8797f, 0.3362f,
-0.000000f, 0.781829f, 0.333140f, 0.750000f, 0.687500f, -0.0000f, 0.9348f, 0.3553f,
0.500000f, 0.500000f, -0.500000f, 0.625000f, 0.500000f, 0.5774f, 0.5774f, -0.5774f,
0.572933f, 0.572933f, -0.296650f, 0.625000f, 0.562500f, 0.6737f, 0.6737f, -0.3038f,
0.296650f, 0.572933f, -0.572933f, 0.687500f, 0.500000f, 0.3038f, 0.6737f, -0.6737f,
-0.296650f, 0.572933f, -0.572933f, 0.812500f, 0.500000f, -0.3038f, 0.6737f, -0.6737f,
-0.000000f, 0.609568f, -0.609568f, 0.750000f, 0.500000f, -0.0000f, 0.7071f, -0.7071f,
-0.500000f, 0.500000f, -0.500000f, 0.875000f, 0.500000f, -0.5774f, 0.5774f, -0.5774f,
-0.572933f, 0.572933f, -0.296650f, 0.875000f, 0.562500f, -0.6737f, 0.6737f, -0.3038f,
-0.333140f, 0.781829f, -0.000000f, 0.812500f, 0.625000f, -0.3553f, 0.9348f, -0.0000f,
-0.572933f, 0.572933f, 0.296650f, 0.875000f, 0.687500f, -0.6737f, 0.6737f, 0.3038f,
-0.500000f, 0.500000f, 0.500000f, 0.875000f, 0.750000f, -0.5774f, 0.5774f, 0.5774f,
-0.296650f, 0.572933f, 0.572933f, 0.812500f, 0.750000f, -0.3038f, 0.6737f, 0.6737f,
0.000000f, 0.609568f, 0.609568f, 0.750000f, 0.750000f, -0.0000f, 0.7071f, 0.7071f,
0.296650f, 0.572933f, 0.572933f, 0.687500f, 0.750000f, 0.3038f, 0.6737f, 0.6737f,
0.500000f, 0.500000f, 0.500000f, 0.625000f, 0.750000f, 0.5774f, 0.5774f, 0.5774f,
0.572933f, 0.572933f, 0.296650f, 0.625000f, 0.687500f, 0.6737f, 0.6737f, 0.3038f,
0.609568f, 0.609568f, -0.000000f, 0.625000f, 0.625000f, 0.7071f, 0.7071f, -0.0000f,
0.316157f, -0.316157f, 0.728990f, 0.437500f, 0.812500f, 0.3362f, -0.3362f, 0.8797f,
0.000000f, 0.000000f, 0.839506f, 0.500000f, 0.875000f, -0.0000f, -0.0000f, 1.0000f,
-0.000000f, -0.333140f, 0.781829f, 0.437500f, 0.875000f, -0.0000f, -0.3553f, 0.9348f,
0.316157f, 0.316157f, 0.728990f, 0.562500f, 0.812500f, 0.3362f, 0.3362f, 0.8797f,
0.333140f, -0.000000f, 0.781829f, 0.500000f, 0.812500f, 0.3553f, -0.0000f, 0.9348f,
-0.316157f, -0.316157f, 0.728990f, 0.437500f, 0.937500f, -0.3362f, -0.3362f, 0.8797f,
-0.316157f, 0.316157f, 0.728990f, 0.562500f, 0.937500f, -0.3362f, 0.3362f, 0.8797f,
-0.333140f, 0.000000f, 0.781829f, 0.500000f, 0.937500f, -0.3553f, -0.0000f, 0.9348f,
0.500000f, -0.500000f, 0.500000f, 0.375000f, 0.750000f, 0.5774f, -0.5774f, 0.5774f,
0.296650f, -0.572933f, 0.572933f, 0.375000f, 0.812500f, 0.3038f, -0.6737f, 0.6737f,
0.572933f, -0.296650f, 0.572933f, 0.437500f, 0.750000f, 0.6737f, -0.3038f, 0.6737f,
0.572933f, 0.296650f, 0.572933f, 0.562500f, 0.750000f, 0.6737f, 0.3038f, 0.6737f,
0.609568f, 0.000000f, 0.609568f, 0.500000f, 0.750000f, 0.7071f, -0.0000f, 0.7071f,
0.296650f, 0.572933f, 0.572933f, 0.625000f, 0.812500f, 0.3038f, 0.6737f, 0.6737f,
0.000000f, 0.333140f, 0.781829f, 0.562500f, 0.875000f, -0.0000f, 0.3553f, 0.9348f,
-0.296650f, 0.572933f, 0.572933f, 0.625000f, 0.937500f, -0.3038f, 0.6737f, 0.6737f,
-0.500000f, 0.500000f, 0.500000f, 0.625000f, 1.000000f, -0.5774f, 0.5774f, 0.5774f,
-0.572933f, 0.296650f, 0.572933f, 0.562500f, 1.000000f, -0.6737f, 0.3038f, 0.6737f,
-0.609568f, -0.000000f, 0.609568f, 0.500000f, 1.000000f, -0.7071f, -0.0000f, 0.7071f,
-0.572933f, -0.296650f, 0.572933f, 0.437500f, 1.000000f, -0.6737f, -0.3038f, 0.6737f,
-0.500000f, -0.500000f, 0.500000f, 0.375000f, 1.000000f, -0.5774f, -0.5774f, 0.5774f,
-0.296650f, -0.572933f, 0.572933f, 0.375000f, 0.937500f, -0.3038f, -0.6737f, 0.6737f,
-0.000000f, -0.609568f, 0.609568f, 0.375000f, 0.875000f, -0.0000f, -0.7071f, 0.7071f,
-0.728990f, -0.316157f, 0.316157f, 0.437500f, 0.062500f, -0.8797f, -0.3362f, 0.3362f,
-0.839506f, 0.000000f, -0.000000f, 0.500000f, 0.125000f, -1.0000f, -0.0000f, -0.0000f,
-0.781829f, -0.333140f, -0.000000f, 0.437500f, 0.125000f, -0.9348f, -0.3553f, -0.0000f,
-0.728990f, 0.316157f, 0.316157f, 0.562500f, 0.062500f, -0.8797f, 0.3362f, 0.3362f,
-0.781829f, -0.000000f, 0.333140f, 0.500000f, 0.062500f, -0.9348f, -0.0000f, 0.3553f,
-0.728990f, -0.316157f, -0.316157f, 0.437500f, 0.187500f, -0.8797f, -0.3362f, -0.3362f,
-0.728990f, 0.316157f, -0.316157f, 0.562500f, 0.187500f, -0.8797f, 0.3362f, -0.3362f,
-0.781829f, 0.000000f, -0.333140f, 0.500000f, 0.187500f, -0.9348f, -0.0000f, -0.3553f,
-0.500000f, -0.500000f, 0.500000f, 0.375000f, 0.000000f, -0.5774f, -0.5774f, 0.5774f,
-0.572933f, -0.572933f, 0.296650f, 0.375000f, 0.062500f, -0.6737f, -0.6737f, 0.3038f,
-0.572933f, -0.296650f, 0.572933f, 0.437500f, 0.000000f, -0.6737f, -0.3038f, 0.6737f,
-0.572933f, 0.296650f, 0.572933f, 0.562500f, 0.000000f, -0.6737f, 0.3038f, 0.6737f,
-0.609568f, -0.000000f, 0.609568f, 0.500000f, 0.000000f, -0.7071f, -0.0000f, 0.7071f,
-0.500000f, 0.500000f, 0.500000f, 0.625000f, 0.000000f, -0.5774f, 0.5774f, 0.5774f,
-0.572933f, 0.572933f, 0.296650f, 0.625000f, 0.062500f, -0.6737f, 0.6737f, 0.3038f,
-0.781829f, 0.333140f, -0.000000f, 0.562500f, 0.125000f, -0.9348f, 0.3553f, -0.0000f,
-0.572933f, 0.572933f, -0.296650f, 0.625000f, 0.187500f, -0.6737f, 0.6737f, -0.3038f,
-0.500000f, 0.500000f, -0.500000f, 0.625000f, 0.250000f, -0.5774f, 0.5774f, -0.5774f,
-0.572933f, 0.296650f, -0.572933f, 0.562500f, 0.250000f, -0.6737f, 0.3038f, -0.6737f,
-0.609568f, -0.000000f, -0.609568f, 0.500000f, 0.250000f, -0.7071f, -0.0000f, -0.7071f,
-0.572933f, -0.296650f, -0.572933f, 0.437500f, 0.250000f, -0.6737f, -0.3038f, -0.6737f,
-0.500000f, -0.500000f, -0.500000f, 0.375000f, 0.250000f, -0.5774f, -0.5774f, -0.5774f,
-0.572933f, -0.572933f, -0.296650f, 0.375000f, 0.187500f, -0.6737f, -0.6737f, -0.3038f,
-0.609568f, -0.609568f, -0.000000f, 0.375000f, 0.125000f, -0.7071f, -0.7071f, -0.0000f,
-0.316157f, -0.728990f, -0.316157f, 0.187500f, 0.562500f, -0.3362f, -0.8797f, -0.3362f,
-0.000000f, -0.839506f, 0.000000f, 0.250000f, 0.625000f, -0.0000f, -1.0000f, -0.0000f,
-0.333140f, -0.781829f, 0.000000f, 0.187500f, 0.625000f, -0.3553f, -0.9348f, -0.0000f,
0.316157f, -0.728990f, -0.316157f, 0.312500f, 0.562500f, 0.3362f, -0.8797f, -0.3362f,
-0.000000f, -0.781829f, -0.333140f, 0.250000f, 0.562500f, -0.0000f, -0.9348f, -0.3553f,
-0.316157f, -0.728990f, 0.316157f, 0.187500f, 0.687500f, -0.3362f, -0.8797f, 0.3362f,
0.316157f, -0.728990f, 0.316157f, 0.312500f, 0.687500f, 0.3362f, -0.8797f, 0.3362f,
0.000000f, -0.781829f, 0.333140f, 0.250000f, 0.687500f, -0.0000f, -0.9348f, 0.3553f,
-0.500000f, -0.500000f, -0.500000f, 0.125000f, 0.500000f, -0.5774f, -0.5774f, -0.5774f,
-0.572933f, -0.572933f, -0.296650f, 0.125000f, 0.562500f, -0.6737f, -0.6737f, -0.3038f,
-0.296650f, -0.572933f, -0.572933f, 0.187500f, 0.500000f, -0.3038f, -0.6737f, -0.6737f,
0.296650f, -0.572933f, -0.572933f, 0.312500f, 0.500000f, 0.3038f, -0.6737f, -0.6737f,
0.000000f, -0.609568f, -0.609568f, 0.250000f, 0.500000f, -0.0000f, -0.7071f, -0.7071f,
0.500000f, -0.500000f, -0.500000f, 0.375000f, 0.500000f, 0.5774f, -0.5774f, -0.5774f,
0.572933f, -0.572933f, -0.296650f, 0.375000f, 0.562500f, 0.6737f, -0.6737f, -0.3038f,
0.333140f, -0.781829f, -0.000000f, 0.312500f, 0.625000f, 0.3553f, -0.9348f, -0.0000f,
0.572933f, -0.572933f, 0.296650f, 0.375000f, 0.687500f, 0.6737f, -0.6737f, 0.3038f,
0.296650f, -0.572933f, 0.572933f, 0.312500f, 0.750000f, 0.3038f, -0.6737f, 0.6737f,
-0.000000f, -0.609568f, 0.609568f, 0.250000f, 0.750000f, -0.0000f, -0.7071f, 0.7071f,
-0.296650f, -0.572933f, 0.572933f, 0.187500f, 0.750000f, -0.3038f, -0.6737f, 0.6737f,
-0.500000f, -0.500000f, 0.500000f, 0.125000f, 0.750000f, -0.5774f, -0.5774f, 0.5774f,
-0.572933f, -0.572933f, 0.296650f, 0.125000f, 0.687500f, -0.6737f, -0.6737f, 0.3038f,
-0.609568f, -0.609568f, -0.000000f, 0.125000f, 0.625000f, -0.7071f, -0.7071f, -0.0000f,
0.728990f, -0.316157f, -0.316157f, 0.437500f, 0.562500f, 0.8797f, -0.3362f, -0.3362f,
0.839506f, -0.000000f, -0.000000f, 0.500000f, 0.625000f, 1.0000f, -0.0000f, -0.0000f,
0.781829f, -0.333140f, 0.000000f, 0.437500f, 0.625000f, 0.9348f, -0.3553f, -0.0000f,
0.728990f, 0.316157f, -0.316157f, 0.562500f, 0.562500f, 0.8797f, 0.3362f, -0.3362f,
0.781829f, -0.000000f, -0.333140f, 0.500000f, 0.562500f, 0.9348f, -0.0000f, -0.3553f,
0.728990f, -0.316157f, 0.316157f, 0.437500f, 0.687500f, 0.8797f, -0.3362f, 0.3362f,
0.728990f, 0.316157f, 0.316157f, 0.562500f, 0.687500f, 0.8797f, 0.3362f, 0.3362f,
0.781829f, 0.000000f, 0.333140f, 0.500000f, 0.687500f, 0.9348f, -0.0000f, 0.3553f,
0.572933f, -0.296650f, -0.572933f, 0.437500f, 0.500000f, 0.6737f, -0.3038f, -0.6737f,
0.572933f, 0.296650f, -0.572933f, 0.562500f, 0.500000f, 0.6737f, 0.3038f, -0.6737f,
0.609568f, 0.000000f, -0.609568f, 0.500000f, 0.500000f, 0.7071f, -0.0000f, -0.7071f,
0.781829f, 0.333140f, -0.000000f, 0.562500f, 0.625000f, 0.9348f, 0.3553f, -0.0000f,
0.609568f, -0.609568f, -0.000000f, 0.375000f, 0.625000f, 0.7071f, -0.7071f, -0.0000f,
-0.316157f, -0.316157f, -0.728990f, 0.437500f, 0.312500f, -0.3362f, -0.3362f, -0.8797f,
0.000000f, 0.000000f, -0.839506f, 0.500000f, 0.375000f, -0.0000f, -0.0000f, -1.0000f,
0.000000f, -0.333140f, -0.781829f, 0.437500f, 0.375000f, -0.0000f, -0.3553f, -0.9348f,
-0.316157f, 0.316157f, -0.728990f, 0.562500f, 0.312500f, -0.3362f, 0.3362f, -0.8797f,
-0.333140f, -0.000000f, -0.781829f, 0.500000f, 0.312500f, -0.3553f, -0.0000f, -0.9348f,
0.316157f, -0.316157f, -0.728990f, 0.437500f, 0.437500f, 0.3362f, -0.3362f, -0.8797f,
0.316157f, 0.316157f, -0.728990f, 0.562500f, 0.437500f, 0.3362f, 0.3362f, -0.8797f,
0.333140f, 0.000000f, -0.781829f, 0.500000f, 0.437500f, 0.3553f, -0.0000f, -0.9348f,
-0.296650f, -0.572933f, -0.572933f, 0.375000f, 0.312500f, -0.3038f, -0.6737f, -0.6737f,
-0.296650f, 0.572933f, -0.572933f, 0.625000f, 0.312500f, -0.3038f, 0.6737f, -0.6737f,
-0.000000f, 0.333140f, -0.781829f, 0.562500f, 0.375000f, -0.0000f, 0.3553f, -0.9348f,
0.296650f, 0.572933f, -0.572933f, 0.625000f, 0.437500f, 0.3038f, 0.6737f, -0.6737f,
0.296650f, -0.572933f, -0.572933f, 0.375000f, 0.437500f, 0.3038f, -0.6737f, -0.6737f,
0.000000f, -0.609568f, -0.609568f, 0.375000f, 0.375000f, -0.0000f, -0.7071f, -0.7071f,
-0.609568f, 0.609568f, -0.000000f, 0.875000f, 0.625000f, -0.7071f, 0.7071f, -0.0000f,
0.000000f, 0.609568f, 0.609568f, 0.625000f, 0.875000f, -0.0000f, 0.7071f, 0.7071f,
-0.609568f, 0.609568f, -0.000000f, 0.625000f, 0.125000f, -0.7071f, 0.7071f, -0.0000f,
-0.000000f, 0.609568f, -0.609568f, 0.625000f, 0.375000f, -0.0000f, 0.7071f, -0.7071f,
};
const int cubesphere_vertices_length = (sizeof (cubesphere_vertices)) / (sizeof (cubesphere_vertices[0]));

50
model/plane.h Normal file
View File

@ -0,0 +1,50 @@
#pragma once
const vec3 plane_position[] = {
{1.000000f, 1.000000f, -0.000000f},
{1.000000f, -1.000000f, -0.000000f},
{-1.000000f, 1.000000f, 0.000000f},
{-1.000000f, -1.000000f, 0.000000f},
};
const vec2 plane_texture[] = {
{1.000000f, 0.000000f},
{0.000000f, 1.000000f},
{0.000000f, 0.000000f},
{1.000000f, 1.000000f},
};
const vec3 plane_normal[] = {
{-0.0000f, -0.0000f, -1.0000f},
};
const triangle_t plane_Plane_triangle[] = {
{
{1, 0, 0},
{2, 1, 0},
{0, 2, 0},
},
{
{1, 0, 0},
{3, 3, 0},
{2, 1, 0},
},
};
const object plane_Plane = {
.triangle = &plane_Plane_triangle[0],
.triangle_count = 2,
};
const object * plane_object[] = {
&plane_Plane,
};
const model plane_model = {
.position = plane_position,
.texture = plane_texture,
.normal = plane_normal,
.object = plane_object,
.object_count = 1
};

View File

@ -21,6 +21,41 @@ static inline void wreg(void * rmmio, uint32_t offset, uint32_t value)
asm volatile ("" ::: "memory");
}
static inline void wreg_slow(void * rmmio, uint32_t offset, uint32_t value)
{
#define MM_INDEX 0x0
#define MM_DATA 0x4
wreg(rmmio, MM_INDEX, offset);
wreg(rmmio, MM_DATA, value);
}
static inline uint32_t rreg_slow(void * rmmio, uint32_t offset)
{
wreg(rmmio, MM_INDEX, offset);
uint32_t value = rreg(rmmio, MM_DATA);
return value;
}
struct name_address {
const char * name;
const int address;
};
const struct name_address display_addresses[] = {
#include "../regs/display_registers.inc"
};
const int display_addresses_length = (sizeof (display_addresses)) / (sizeof (display_addresses[0]));
const struct name_address memory_controller_addresses[] = {
#include "../regs/memory_controller_registers.inc"
};
const int memory_controller_addresses_length = (sizeof (memory_controller_addresses)) / (sizeof (memory_controller_addresses[0]));
const struct name_address pcie_addresses[] = {
#include "../regs/pcie_registers.inc"
};
const int pcie_addresses_length = (sizeof (pcie_addresses)) / (sizeof (pcie_addresses[0]));
int main()
{
////////////////////////////////////////////////////////////////////////
@ -36,16 +71,47 @@ int main()
void * rmmio = resource2_base;
uint32_t value1 = rreg(rmmio, 0x6110);
printf("[r500] D1GRPH_PRIMARY_SURFACE_ADDRESS %08x\n", value1);
uint32_t value2 = rreg(rmmio, 0x6110 + 0x800);
printf("[r500] D2GRPH_PRIMARY_SURFACE_ADDRESS %08x\n", value2);
if (1) {
for (int i = 0; i < display_addresses_length; i++) {
uint32_t value = rreg(rmmio, display_addresses[i].address);
printf("%s %x %08x\n", display_addresses[i].name, display_addresses[i].address, value);
}
}
uint32_t value3 = rreg(rmmio, 0x6118);
printf("[r500] D1GRPH_SECONDARY_SURFACE_ADDRESS %08x\n", value3);
uint32_t value4 = rreg(rmmio, 0x6118 + 0x800);
printf("[r500] D2GRPH_SECONDARY_SURFACE_ADDRESS %08x\n", value4);
if (0) {
#define MC_IND_INDEX 0x70
#define MC_IND_INDEX__MC_IND_ADDR(x) (((x) & 0xffff) << 0)
#define MC_IND_INDEX__MC_IND_ADDR__CLEAR (~0xffff)
#define MC_IND_DATA 0x74
// skip MC_IND_INDEX/MC_IND_DATA
const int masks[] = {
(1 << 16),
(1 << 17),
(1 << 20),
(1 << 21),
(1 << 22),
};
wreg(rmmio, 0x6110, 0x813000);
wreg(rmmio, 0x6118, 0x813000);
for (int i = 2; i < memory_controller_addresses_length; i++) {
const char * name = memory_controller_addresses[i].name;
int address = memory_controller_addresses[i].address;
int mask = (1 << 16) | (1 << 17) | (1 << 20) | (1 << 21) | (1 << 22);
wreg(rmmio, MC_IND_INDEX, MC_IND_INDEX__MC_IND_ADDR(address) | mask);
uint32_t value = rreg(rmmio, MC_IND_DATA);
wreg(rmmio, MC_IND_INDEX, MC_IND_INDEX__MC_IND_ADDR__CLEAR);
printf("%s %x %08x\n", name, address, value);
}
}
if (0) {
#define PCIE_INDEX 0x30
#define PCIE_DATA 0x38
for (int i = 0; i < pcie_addresses_length; i++) {
const char * name = pcie_addresses[i].name;
int address = pcie_addresses[i].address;
wreg_slow(rmmio, PCIE_INDEX, address);
uint32_t value = rreg_slow(rmmio, PCIE_DATA);
printf("%s %x %08x\n", name, address, value);
}
}
}

View File

@ -1,8 +1,10 @@
import sys
def print_error(filename, buf, e):
def print_error(e):
assert len(e.args) == 2, e
message, token = e.args
with open(token.filename, 'rb') as f:
buf = f.read()
lines = buf.splitlines()
line = lines[token.line - 1]
@ -11,7 +13,7 @@ def print_error(filename, buf, e):
col_pointer = '^' * len(token.lexeme)
RED = "\033[0;31m"
DEFAULT = "\033[0;0m"
print(f'File: "{filename}", line {token.line}, column {token.col}\n', file=sys.stderr)
print(f'File: "{token.filename}", line {token.line}, column {token.col}\n', file=sys.stderr)
sys.stderr.write(' ')
wrote_default = False
for i, c in enumerate(line.decode('utf-8')):

View File

@ -1,4 +1,5 @@
import sys
import struct
from assembler.lexer import Lexer, LexerError
from assembler.parser import ParserError
@ -9,32 +10,47 @@ from assembler.fs.validator import validate_instruction
from assembler.fs.emitter import emit_instruction
from assembler.error import print_error
def frontend_inner(buf):
lexer = Lexer(buf, find_keyword, emit_newlines=False, minus_is_token=True)
def frontend_inner(filename, buf):
lexer = Lexer(filename, buf, find_keyword, emit_newlines=False, minus_is_token=True)
tokens = list(lexer.lex_tokens())
parser = Parser(tokens)
for ins_ast in parser.instructions():
ins = validate_instruction(ins_ast)
code = [0] * 6
emit_instruction(code, ins)
print("\n".join(f"0x{code[i]:08x}," for i in range(6)))
print()
yield code
def frontend(filename, buf):
try:
frontend_inner(buf)
yield from frontend_inner(filename, buf)
except LexerError as e:
print_error(filename, buf, e)
print_error(e)
raise
except ParserError as e:
print_error(filename, buf, e)
print_error(e)
raise
except ValidatorError as e:
print_error(filename, buf, e)
print_error(e)
raise
if __name__ == "__main__":
assert len(sys.argv) in {2, 3}
input_filename = sys.argv[1]
binary = len(sys.argv) == 3
if binary:
output_filename = sys.argv[2]
with open(input_filename, 'rb') as f:
buf = f.read()
frontend(input_filename, buf)
code_gen = list(frontend(input_filename, buf))
if not binary:
for cw in code_gen:
print("\n".join(f"0x{cw[i]:08x}," for i in range(6)))
print()
else:
with open(output_filename, 'wb') as f:
for cw in code_gen:
data = struct.pack("<IIIIII", *cw)
f.write(data)

View File

@ -17,6 +17,9 @@ def emit_alpha_op(code, alpha_op):
if alpha_op.dest.omask is not None:
US_CMN_INST.ALPHA_OMASK(code, alpha_op.dest.omask.value)
# omod
US_ALU_ALPHA_INST.OMOD(code, alpha_op.omod.value)
# opcode
US_ALU_ALPHA_INST.ALPHA_OP(code, alpha_op.opcode.value)
@ -59,6 +62,9 @@ def emit_rgb_op(code, rgb_op):
if rgb_op.dest.omask is not None:
US_CMN_INST.RGB_OMASK(code, rgb_op.dest.omask.value)
# omod
US_ALU_RGB_INST.OMOD(code, rgb_op.omod.value)
# opcode
US_ALU_RGBA_INST.RGB_OP(code, rgb_op.opcode.value)

View File

@ -110,6 +110,26 @@ class Swizzle(IntEnum):
one = 6
unused = 7
class Omod(IntEnum):
mul_1 = 0
mul_2 = 1
mul_4 = 2
mul_8 = 3
div_2 = 4
div_4 = 5
div_8 = 6
disable = 7
omod_lexemes = OrderedDict([
((b"1", b"0"), Omod.mul_1),
((b"2", b"0"), Omod.mul_2),
((b"4", b"0"), Omod.mul_4),
((b"8", b"0"), Omod.mul_8),
((b"0", b"5"), Omod.div_2),
((b"0", b"25"), Omod.div_4),
((b"0", b"125"), Omod.div_8),
])
@dataclass
class SwizzleSel:
src: SwizzleSelSrc
@ -119,12 +139,14 @@ class SwizzleSel:
@dataclass
class AlphaOperation:
dest: AlphaDest
omod: Omod
opcode: AlphaOp
sels: list[SwizzleSel]
@dataclass
class RGBOperation:
dest: RGBDest
omod: Omod
opcode: RGBOp
sels: list[SwizzleSel]
@ -413,14 +435,27 @@ def validate_instruction_operation_sels(swizzle_sels, is_alpha):
sels.append(SwizzleSel(src, swizzle, mod))
return sels
def validate_omod_operation(operation):
omod = Omod.mul_1
if operation.omod != None:
integer, decimal = operation.omod
key = (integer.lexeme, decimal.lexeme)
if key not in omod_lexemes:
valid_omods = b", ".join(b".".join(key) for key in omod_lexemes.keys()).decode('utf-8')
raise ValidatorError(f"invalid omod, expected one of [{valid_omods}]", integer)
omod = omod_lexemes[key]
return omod
def validate_alpha_instruction_operation(operation):
dest = validate_instruction_operation_dest(operation.dest_addr_swizzles,
mask_lookup=alpha_masks,
type_cls=AlphaDest)
omod = validate_omod_operation(operation)
opcode = alpha_op_kws[operation.opcode_keyword.keyword]
sels = validate_instruction_operation_sels(operation.swizzle_sels, is_alpha=True)
return AlphaOperation(
dest,
omod,
opcode,
sels
)
@ -429,10 +464,12 @@ def validate_rgb_instruction_operation(operation):
dest = validate_instruction_operation_dest(operation.dest_addr_swizzles,
mask_lookup=rgb_masks,
type_cls=RGBDest)
omod = validate_omod_operation(operation)
opcode = rgb_op_kws[operation.opcode_keyword.keyword]
sels = validate_instruction_operation_sels(operation.swizzle_sels, is_alpha=False)
return RGBOperation(
dest,
omod,
opcode,
sels
)

View File

@ -35,6 +35,7 @@ class ALUSwizzleSel:
@dataclass
class ALUOperation:
dest_addr_swizzles: list[DestAddrSwizzle]
omod: tuple[Token, Token]
opcode_keyword: Token
swizzle_sels: list[ALUSwizzleSel]
@ -115,6 +116,15 @@ class Parser(BaseParser):
return token.keyword in opcode_keywords
return False
def alu_is_omod(self):
is_omod = (
self.match(TT.identifier, offset=0)
and self.match(TT.dot, offset=1)
and self.match(TT.identifier, offset=2)
and self.match(TT.star, offset=3)
)
return is_omod
def alu_is_neg(self):
result = self.match(TT.minus)
if result:
@ -154,9 +164,17 @@ class Parser(BaseParser):
def alu_operation(self):
dest_addr_swizzles = []
while not self.alu_is_opcode():
while not (self.alu_is_opcode() or self.alu_is_omod()):
dest_addr_swizzles.append(self.dest_addr_swizzle())
omod = None
if self.alu_is_omod():
omod_integer = self.consume(TT.identifier, "expected omod decimal identifier")
self.consume(TT.dot, "expected omod decimal dot")
omod_decimal = self.consume(TT.identifier, "expected omod decimal identifier")
self.consume(TT.star, "expected omod star")
omod = (omod_integer, omod_decimal)
opcode_keyword = self.consume(TT.keyword, "expected opcode keyword")
swizzle_sels = []
@ -165,6 +183,7 @@ class Parser(BaseParser):
return ALUOperation(
dest_addr_swizzles,
omod,
opcode_keyword,
swizzle_sels
)

View File

@ -55,7 +55,7 @@ def validate_swizzle(token):
return tuple(swizzles[c] for c in token.lexeme)
def validate_mask_swizzle(token) -> tuple[AlphaMask, RGBMask]:
argb_masks = OrderedDict([
rgba_masks = OrderedDict([
(b"none" , (AlphaMask.NONE, RGBMask.NONE)),
(b"r" , (AlphaMask.NONE, RGBMask.R)),
(b"g" , (AlphaMask.NONE, RGBMask.G)),
@ -72,9 +72,9 @@ def validate_mask_swizzle(token) -> tuple[AlphaMask, RGBMask]:
(b"gba" , (AlphaMask.A, RGBMask.GB)),
(b"rgba" , (AlphaMask.A, RGBMask.RGB)),
])
if token.lexeme not in argb_masks:
if token.lexeme not in rgba_masks:
raise ValidatorError("invalid destination mask", token)
return argb_masks[token.lexeme]
return rgba_masks[token.lexeme]
def validate_masks(ins_ast: parser.TEXInstruction):
addresses = set()

View File

@ -2,6 +2,7 @@ from dataclasses import dataclass
from enum import Enum, auto
from itertools import chain
from typing import Union, Any
from os import path
DEBUG = True
@ -21,9 +22,11 @@ class TT(Enum):
bar = auto()
comma = auto()
minus = auto()
star = auto()
@dataclass
class Token:
filename: str
start_ix: int
line: int
col: int
@ -42,7 +45,10 @@ class LexerError(Exception):
pass
class Lexer:
def __init__(self, buf: memoryview, find_keyword,
def __init__(self,
filename: str,
buf: memoryview,
find_keyword,
emit_newlines=False,
minus_is_token=False):
self.start_ix = 0
@ -53,6 +59,8 @@ class Lexer:
self.find_keyword = find_keyword
self.emit_newlines = emit_newlines
self.minus_is_token = minus_is_token
self.filename = filename
self.nested_lexer = None
def at_end_p(self):
return self.current_ix >= len(self.buf)
@ -73,7 +81,7 @@ class Lexer:
return self.buf[self.current_ix]
def pos(self):
return self.start_ix, self.line, self.col - (self.current_ix - self.start_ix)
return self.filename, self.start_ix, self.line, self.col - (self.current_ix - self.start_ix)
def identifier(self):
while not self.at_end_p() and self.peek() in identifier_characters:
@ -84,8 +92,26 @@ class Lexer:
else:
return Token(*self.pos(), TT.identifier, self.lexeme(), None)
def include(self, filename):
dirname = path.dirname(self.filename)
new_filename = path.join(dirname, filename.decode('utf-8'))
with open(new_filename, 'rb') as f:
buf = f.read()
self.nested_lexer = Lexer(new_filename,
buf,
find_keyword=self.find_keyword,
emit_newlines=self.emit_newlines,
minus_is_token=self.minus_is_token)
def lex_token(self):
while True:
if self.nested_lexer is not None:
token = self.nested_lexer.lex_token()
if token.type is TT.eof:
self.nested_lexer = None
else:
return token
self.start_ix = self.current_ix
if self.at_end_p():
@ -112,6 +138,8 @@ class Lexer:
return Token(*self.pos(), TT.semicolon, self.lexeme())
elif c == ord(','):
return Token(*self.pos(), TT.comma, self.lexeme())
elif c == ord('*'):
return Token(*self.pos(), TT.star, self.lexeme())
elif c == ord('-') and self.peek() == ord('-'):
self.advance()
while not self.at_end_p() and self.peek() != ord('\n'):
@ -119,8 +147,24 @@ class Lexer:
elif self.minus_is_token and c == ord('-'):
return Token(*self.pos(), TT.minus, self.lexeme())
elif c == ord('#'):
while not self.at_end_p() and self.peek() != ord('\n'):
for c in b"include":
o = self.advance()
if o != c:
token = Token(*self.pos(), None, self.lexeme())
raise LexerError(f"unexpected character at line:{self.line} col:{self.col}, expected `#include`", token)
while self.peek() == ord(' '):
self.advance()
self.start_ix = self.current_ix
quote = self.advance()
if quote != ord('"'):
token = Token(*self.pos(), None, self.lexeme())
raise LexerError(f"unexpected character at line:{self.line} col:{self.col}, expected `\"`", token)
self.start_ix = self.current_ix
while self.peek() != ord('"'):
self.advance()
filename = self.lexeme()
assert self.advance() == ord('"')
self.include(filename)
elif c == ord(' ') or c == ord('\r') or c == ord('\t'):
pass
elif c == ord('\n'):

View File

@ -22,8 +22,8 @@ class BaseParser:
self.current_ix += 1
return token
def match(self, token_type):
token = self.peek()
def match(self, token_type, offset=0):
token = self.peek(offset)
return token.type == token_type
def match_keyword(self, keyword):

View File

@ -1,4 +1,5 @@
import sys
import struct
from assembler.lexer import Lexer, LexerError
from assembler.validator import ValidatorError
@ -21,8 +22,8 @@ out[0].xz = VE_MAD input[0].-y-_-0-_ temp[0].x_0_ temp[0].y_0_
out[0].yw = VE_MAD input[0]._x_0 temp[0]._x_0 temp[0]._z_1
"""
def frontend_inner(buf):
lexer = Lexer(buf, find_keyword)
def frontend_inner(filename, buf):
lexer = Lexer(filename, buf, find_keyword)
tokens = list(lexer.lex_tokens())
parser = Parser(tokens)
for ins in parser.instructions():
@ -36,21 +37,34 @@ def frontend_inner(buf):
def frontend(filename, buf):
try:
yield from frontend_inner(buf)
yield from frontend_inner(filename, buf)
except ParserError as e:
print_error(input_filename, buf, e)
print_error(e)
raise
except LexerError as e:
print_error(input_filename, buf, e)
print_error(e)
raise
except ValidatorError as e:
print_error(filename, buf, e)
print_error(e)
raise
if __name__ == "__main__":
assert len(sys.argv) in {2, 3}
input_filename = sys.argv[1]
#output_filename = sys.argv[2]
binary = len(sys.argv) == 3
if binary:
output_filename = sys.argv[2]
with open(input_filename, 'rb') as f:
buf = f.read()
for cw in frontend(input_filename, buf):
code_gen = list(frontend(input_filename, buf))
if not binary:
for cw in code_gen:
print(f"0x{cw[0]:08x}, 0x{cw[1]:08x}, 0x{cw[2]:08x}, 0x{cw[3]:08x},")
else:
with open(output_filename, 'wb') as f:
for cw in code_gen:
data = struct.pack("<IIII", *cw)
f.write(data)

2336
regs/display_registers.inc Normal file

File diff suppressed because it is too large Load Diff

1752
regs/display_registers.txt Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,628 @@
{
.name = "MC_IND_INDEX",
.address = 0x70,
},
{
.name = "MC_IND_DATA",
.address = 0x74,
},
{
.name = "MC_STATUS",
.address = 0x0,
},
{
.name = "MC_ARB_MIN",
.address = 0x10,
},
{
.name = "MC_PT0_CNTL",
.address = 0x100,
},
{
.name = "MC_PT0_CONTEXT0_CNTL",
.address = 0x102,
},
{
.name = "MC_PT0_CONTEXT1_CNTL",
.address = 0x103,
},
{
.name = "MC_PT0_CONTEXT2_CNTL",
.address = 0x104,
},
{
.name = "MC_PT0_CONTEXT3_CNTL",
.address = 0x105,
},
{
.name = "MC_PT0_CONTEXT4_CNTL",
.address = 0x106,
},
{
.name = "MC_PT0_CONTEXT5_CNTL",
.address = 0x107,
},
{
.name = "MC_PT0_CONTEXT6_CNTL",
.address = 0x108,
},
{
.name = "MC_PT0_CONTEXT7_CNTL",
.address = 0x109,
},
{
.name = "MC_PT0_SYSTEM_APERTURE_LOW_ADDR",
.address = 0x112,
},
{
.name = "MC_PT0_SYSTEM_APERTURE_HIGH_ADDR",
.address = 0x114,
},
{
.name = "MC_PT0_SURFACE_PROBE",
.address = 0x116,
},
{
.name = "MC_PT0_SURFACE_PROBE_FAULT_STATUS",
.address = 0x118,
},
{
.name = "MC_PT0_PROTECTION_FAULT_STATUS",
.address = 0x11a,
},
{
.name = "MC_PT0_CONTEXT0_DEFAULT_READ_ADDR",
.address = 0x11c,
},
{
.name = "MC_PT0_CONTEXT1_DEFAULT_READ_ADDR",
.address = 0x11d,
},
{
.name = "MC_PT0_CONTEXT2_DEFAULT_READ_ADDR",
.address = 0x11e,
},
{
.name = "MC_PT0_CONTEXT3_DEFAULT_READ_ADDR",
.address = 0x11f,
},
{
.name = "MC_ARB_TIMERS",
.address = 0x12,
},
{
.name = "MC_PT0_CONTEXT4_DEFAULT_READ_ADDR",
.address = 0x120,
},
{
.name = "MC_PT0_CONTEXT5_DEFAULT_READ_ADDR",
.address = 0x121,
},
{
.name = "MC_PT0_CONTEXT6_DEFAULT_READ_ADDR",
.address = 0x122,
},
{
.name = "MC_PT0_CONTEXT7_DEFAULT_READ_ADDR",
.address = 0x123,
},
{
.name = "MC_PT0_CONTEXT0_FLAT_BASE_ADDR",
.address = 0x12c,
},
{
.name = "MC_PT0_CONTEXT1_FLAT_BASE_ADDR",
.address = 0x12d,
},
{
.name = "MC_PT0_CONTEXT2_FLAT_BASE_ADDR",
.address = 0x12e,
},
{
.name = "MC_PT0_CONTEXT3_FLAT_BASE_ADDR",
.address = 0x12f,
},
{
.name = "MC_ARB_DRAM_PENALTIES",
.address = 0x13,
},
{
.name = "MC_PT0_CONTEXT4_FLAT_BASE_ADDR",
.address = 0x130,
},
{
.name = "MC_PT0_CONTEXT5_FLAT_BASE_ADDR",
.address = 0x131,
},
{
.name = "MC_PT0_CONTEXT6_FLAT_BASE_ADDR",
.address = 0x132,
},
{
.name = "MC_PT0_CONTEXT7_FLAT_BASE_ADDR",
.address = 0x133,
},
{
.name = "MC_PT0_CONTEXT0_FLAT_START_ADDR",
.address = 0x13c,
},
{
.name = "MC_PT0_CONTEXT1_FLAT_START_ADDR",
.address = 0x13d,
},
{
.name = "MC_PT0_CONTEXT2_FLAT_START_ADDR",
.address = 0x13e,
},
{
.name = "MC_PT0_CONTEXT3_FLAT_START_ADDR",
.address = 0x13f,
},
{
.name = "MC_ARB_DRAM_PENALTIES2",
.address = 0x14,
},
{
.name = "MC_PT0_CONTEXT4_FLAT_START_ADDR",
.address = 0x140,
},
{
.name = "MC_PT0_CONTEXT5_FLAT_START_ADDR",
.address = 0x141,
},
{
.name = "MC_PT0_CONTEXT6_FLAT_START_ADDR",
.address = 0x142,
},
{
.name = "MC_PT0_CONTEXT7_FLAT_START_ADDR",
.address = 0x143,
},
{
.name = "MC_PT0_CONTEXT0_FLAT_END_ADDR",
.address = 0x14c,
},
{
.name = "MC_PT0_CONTEXT1_FLAT_END_ADDR",
.address = 0x14d,
},
{
.name = "MC_PT0_CONTEXT2_FLAT_END_ADDR",
.address = 0x14e,
},
{
.name = "MC_PT0_CONTEXT3_FLAT_END_ADDR",
.address = 0x14f,
},
{
.name = "MC_ARB_DRAM_PENALTIES3",
.address = 0x15,
},
{
.name = "MC_PT0_CONTEXT4_FLAT_END_ADDR",
.address = 0x150,
},
{
.name = "MC_PT0_CONTEXT5_FLAT_END_ADDR",
.address = 0x151,
},
{
.name = "MC_PT0_CONTEXT6_FLAT_END_ADDR",
.address = 0x152,
},
{
.name = "MC_PT0_CONTEXT7_FLAT_END_ADDR",
.address = 0x153,
},
{
.name = "MC_PT0_CONTEXT0_MULTI_LEVEL_BASE_ADDR",
.address = 0x15c,
},
{
.name = "MC_PT0_CONTEXT1_MULTI_LEVEL_BASE_ADDR",
.address = 0x15d,
},
{
.name = "MC_PT0_CONTEXT2_MULTI_LEVEL_BASE_ADDR",
.address = 0x15e,
},
{
.name = "MC_PT0_CONTEXT3_MULTI_LEVEL_BASE_ADDR",
.address = 0x15f,
},
{
.name = "MC_ARB_RATIO_CLK_SEQ",
.address = 0x16,
},
{
.name = "MC_PT0_CONTEXT4_MULTI_LEVEL_BASE_ADDR",
.address = 0x160,
},
{
.name = "MC_PT0_CONTEXT5_MULTI_LEVEL_BASE_ADDR",
.address = 0x161,
},
{
.name = "MC_PT0_CONTEXT6_MULTI_LEVEL_BASE_ADDR",
.address = 0x162,
},
{
.name = "MC_PT0_CONTEXT7_MULTI_LEVEL_BASE_ADDR",
.address = 0x163,
},
{
.name = "MC_PT0_CLIENT0_CNTL",
.address = 0x16c,
},
{
.name = "MC_PT0_CLIENT1_CNTL",
.address = 0x16d,
},
{
.name = "MC_PT0_CLIENT2_CNTL",
.address = 0x16e,
},
{
.name = "MC_PT0_CLIENT3_CNTL",
.address = 0x16f,
},
{
.name = "MC_ARB_RDWR_SWITCH",
.address = 0x17,
},
{
.name = "MC_PT0_CLIENT4_CNTL",
.address = 0x170,
},
{
.name = "MC_PT0_CLIENT5_CNTL",
.address = 0x171,
},
{
.name = "MC_PT0_CLIENT6_CNTL",
.address = 0x172,
},
{
.name = "MC_PT0_CLIENT7_CNTL",
.address = 0x173,
},
{
.name = "MC_PT0_CLIENT8_CNTL",
.address = 0x174,
},
{
.name = "MC_PT0_CLIENT9_CNTL",
.address = 0x175,
},
{
.name = "MC_PT0_CLIENT10_CNTL",
.address = 0x176,
},
{
.name = "MC_PT0_CLIENT11_CNTL",
.address = 0x177,
},
{
.name = "MC_PT0_CLIENT12_CNTL",
.address = 0x178,
},
{
.name = "MC_PT0_CLIENT13_CNTL",
.address = 0x179,
},
{
.name = "MC_PT0_CLIENT14_CNTL",
.address = 0x17a,
},
{
.name = "MC_PT0_CLIENT15_CNTL",
.address = 0x17b,
},
{
.name = "MC_PT0_CLIENT16_CNTL",
.address = 0x17c,
},
{
.name = "MC_SW_CNTL",
.address = 0x18,
},
{
.name = "MC_TIMING_CNTL_2",
.address = 0x3,
},
{
.name = "MC_WRITE_AGE1",
.address = 0x37,
},
{
.name = "MC_WRITE_AGE2",
.address = 0x38,
},
{
.name = "MC_FB_LOCATION",
.address = 0x4,
},
{
.name = "MC_AGP_LOCATION",
.address = 0x5,
},
{
.name = "AGP_BASE",
.address = 0x6,
},
{
.name = "MC_SEQ_DRAM",
.address = 0x60,
},
{
.name = "MC_SEQ_RAS_TIMING",
.address = 0x61,
},
{
.name = "MC_SEQ_CAS_TIMING",
.address = 0x62,
},
{
.name = "MC_SEQ_MISC_TIMING",
.address = 0x63,
},
{
.name = "MC_SEQ_RD_CTL_I0",
.address = 0x64,
},
{
.name = "MC_SEQ_RD_CTL_I1",
.address = 0x65,
},
{
.name = "MC_SEQ_WR_CTL_I0",
.address = 0x66,
},
{
.name = "MC_SEQ_WR_CTL_I1",
.address = 0x67,
},
{
.name = "MC_SEQ_IO_CTL_I0",
.address = 0x68,
},
{
.name = "MC_SEQ_IO_CTL_I1",
.address = 0x69,
},
{
.name = "MC_SEQ_NPL_CTL_I0",
.address = 0x6a,
},
{
.name = "MC_SEQ_NPL_CTL_I1",
.address = 0x6b,
},
{
.name = "MC_SEQ_CK_PAD_CNTL_I0",
.address = 0x6c,
},
{
.name = "MC_SEQ_CK_PAD_CNTL_I1",
.address = 0x6d,
},
{
.name = "MC_SEQ_CMD_PAD_CNTL_I0",
.address = 0x6e,
},
{
.name = "MC_SEQ_CMD_PAD_CNTL_I1",
.address = 0x6f,
},
{
.name = "AGP_BASE_2",
.address = 0x7,
},
{
.name = "MC_SEQ_DQ_PAD_CNTL_I0",
.address = 0x70,
},
{
.name = "MC_SEQ_DQ_PAD_CNTL_I1",
.address = 0x71,
},
{
.name = "MC_SEQ_QS_PAD_CNTL_I0",
.address = 0x72,
},
{
.name = "MC_SEQ_QS_PAD_CNTL_I1",
.address = 0x73,
},
{
.name = "MC_SEQ_A_PAD_CNTL_I0",
.address = 0x74,
},
{
.name = "MC_SEQ_A_PAD_CNTL_I1",
.address = 0x75,
},
{
.name = "MC_SEQ_CMD",
.address = 0x76,
},
{
.name = "MC_SEQ_STATUS",
.address = 0x77,
},
{
.name = "MC_CNTL0",
.address = 0x8,
},
{
.name = "MC_IO_PAD_CNTL_I0",
.address = 0x80,
},
{
.name = "MC_IO_PAD_CNTL_I1",
.address = 0x81,
},
{
.name = "MC_IO_PAD_CNTL",
.address = 0x82,
},
{
.name = "MC_IO_RD_DQ_CNTL_I0",
.address = 0x84,
},
{
.name = "MC_IO_RD_DQ_CNTL_I1",
.address = 0x85,
},
{
.name = "MC_IO_RD_QS_CNTL_I0",
.address = 0x86,
},
{
.name = "MC_IO_RD_QS_CNTL_I1",
.address = 0x87,
},
{
.name = "MC_IO_WR_CNTL_I0",
.address = 0x88,
},
{
.name = "MC_IO_WR_CNTL_I1",
.address = 0x89,
},
{
.name = "MC_IO_CK_PAD_CNTL_I0",
.address = 0x8a,
},
{
.name = "MC_IO_CK_PAD_CNTL_I1",
.address = 0x8b,
},
{
.name = "MC_IO_CMD_PAD_CNTL_I0",
.address = 0x8c,
},
{
.name = "MC_IO_CMD_PAD_CNTL_I1",
.address = 0x8d,
},
{
.name = "MC_IO_DQ_PAD_CNTL_I0",
.address = 0x8e,
},
{
.name = "MC_IO_DQ_PAD_CNTL_I1",
.address = 0x8f,
},
{
.name = "MC_CNTL1",
.address = 0x9,
},
{
.name = "MC_IO_QS_PAD_CNTL_I0",
.address = 0x90,
},
{
.name = "MC_IO_QS_PAD_CNTL_I1",
.address = 0x91,
},
{
.name = "MC_IO_A_PAD_CNTL_I0",
.address = 0x92,
},
{
.name = "MC_IO_A_PAD_CNTL_I1",
.address = 0x93,
},
{
.name = "MC_IO_WR_DQ_CNTL_I0",
.address = 0x94,
},
{
.name = "MC_IO_WR_DQ_CNTL_I1",
.address = 0x95,
},
{
.name = "MC_IO_WR_QS_CNTL_I0",
.address = 0x96,
},
{
.name = "MC_IO_WR_QS_CNTL_I1",
.address = 0x97,
},
{
.name = "MC_VENDOR_ID_I0",
.address = 0x98,
},
{
.name = "MC_VENDOR_ID_I1",
.address = 0x99,
},
{
.name = "MC_NPL_STATUS_I0",
.address = 0x9a,
},
{
.name = "MC_NPL_STATUS_I1",
.address = 0x9b,
},
{
.name = "MC_IO_RD_QS2_CNTL_I0",
.address = 0x9c,
},
{
.name = "MC_IO_RD_QS2_CNTL_I1",
.address = 0x9d,
},
{
.name = "MC_RFSH_CNTL",
.address = 0xa,
},
{
.name = "MC_IMP_CNTL",
.address = 0xa0,
},
{
.name = "MC_IMP_DEBUG",
.address = 0xa1,
},
{
.name = "MC_IMP_STATUS",
.address = 0xa2,
},
{
.name = "MC_RBS_MAP",
.address = 0xb0,
},
{
.name = "MC_RBS_CZT_HWM",
.address = 0xb1,
},
{
.name = "MC_RBS_SUN_HWM",
.address = 0xb2,
},
{
.name = "MC_RBS_MISC",
.address = 0xb3,
},
{
.name = "MC_PMG_CMD",
.address = 0xe0,
},
{
.name = "MC_PMG_CFG",
.address = 0xe1,
},
{
.name = "MC_MISC_0",
.address = 0xf0,
},
{
.name = "MC_MISC_1",
.address = 0xf1,
},
{
.name = "MC_DEBUG",
.address = 0xfe,
},

View File

@ -0,0 +1,471 @@
MC_IND_INDEX
MCDEC:0x70
2-2
MC_IND_DATA
MCDEC:0x74
2-2
MC_STATUS
MCIND:0x0
2-2
MC_ARB_MIN
MCIND:0x10
2-8
MC_PT0_CNTL
MCIND:0x100
2-32
MC_PT0_CONTEXT0_CNTL
MCIND:0x102
2-33
MC_PT0_CONTEXT1_CNTL
MCIND:0x103
2-33
MC_PT0_CONTEXT2_CNTL
MCIND:0x104
2-33
MC_PT0_CONTEXT3_CNTL
MCIND:0x105
2-34
MC_PT0_CONTEXT4_CNTL
MCIND:0x106
2-34
MC_PT0_CONTEXT5_CNTL
MCIND:0x107
2-34
MC_PT0_CONTEXT6_CNTL
MCIND:0x108
2-34
MC_PT0_CONTEXT7_CNTL
MCIND:0x109
2-35
MC_PT0_SYSTEM_APERTURE_LOW_ADDR
MCIND:0x112
2-35
MC_PT0_SYSTEM_APERTURE_HIGH_ADDR
MCIND:0x114
2-35
MC_PT0_SURFACE_PROBE
MCIND:0x116
2-35
MC_PT0_SURFACE_PROBE_FAULT_STATUS
MCIND:0x118
2-36
MC_PT0_PROTECTION_FAULT_STATUS
MCIND:0x11A
2-36
MC_PT0_CONTEXT0_DEFAULT_READ_ADDR
MCIND:0x11C
2-36
MC_PT0_CONTEXT1_DEFAULT_READ_ADDR
MCIND:0x11D
2-36
MC_PT0_CONTEXT2_DEFAULT_READ_ADDR
MCIND:0x11E
2-37
MC_PT0_CONTEXT3_DEFAULT_READ_ADDR
MCIND:0x11F
2-37
MC_ARB_TIMERS
MCIND:0x12
2-8
MC_PT0_CONTEXT4_DEFAULT_READ_ADDR
MCIND:0x120
2-37
MC_PT0_CONTEXT5_DEFAULT_READ_ADDR
MCIND:0x121
2-37
MC_PT0_CONTEXT6_DEFAULT_READ_ADDR
MCIND:0x122
2-37
MC_PT0_CONTEXT7_DEFAULT_READ_ADDR
MCIND:0x123
2-37
MC_PT0_CONTEXT0_FLAT_BASE_ADDR
MCIND:0x12C
2-37
MC_PT0_CONTEXT1_FLAT_BASE_ADDR
MCIND:0x12D
2-38
MC_PT0_CONTEXT2_FLAT_BASE_ADDR
MCIND:0x12E
2-38
MC_PT0_CONTEXT3_FLAT_BASE_ADDR
MCIND:0x12F
2-38
MC_ARB_DRAM_PENALTIES
MCIND:0x13
2-8
MC_PT0_CONTEXT4_FLAT_BASE_ADDR
MCIND:0x130
2-38
MC_PT0_CONTEXT5_FLAT_BASE_ADDR
MCIND:0x131
2-38
MC_PT0_CONTEXT6_FLAT_BASE_ADDR
MCIND:0x132
2-38
MC_PT0_CONTEXT7_FLAT_BASE_ADDR
MCIND:0x133
2-39
MC_PT0_CONTEXT0_FLAT_START_ADDR
MCIND:0x13C
2-39
MC_PT0_CONTEXT1_FLAT_START_ADDR
MCIND:0x13D
2-39
MC_PT0_CONTEXT2_FLAT_START_ADDR
MCIND:0x13E
2-39
MC_PT0_CONTEXT3_FLAT_START_ADDR
MCIND:0x13F
2-39
MC_ARB_DRAM_PENALTIES2
MCIND:0x14
2-8
MC_PT0_CONTEXT4_FLAT_START_ADDR
MCIND:0x140
2-39
MC_PT0_CONTEXT5_FLAT_START_ADDR
MCIND:0x141
2-40
MC_PT0_CONTEXT6_FLAT_START_ADDR
MCIND:0x142
2-40
MC_PT0_CONTEXT7_FLAT_START_ADDR
MCIND:0x143
2-40
MC_PT0_CONTEXT0_FLAT_END_ADDR
MCIND:0x14C
2-40
MC_PT0_CONTEXT1_FLAT_END_ADDR
MCIND:0x14D
2-40
MC_PT0_CONTEXT2_FLAT_END_ADDR
MCIND:0x14E
2-40
MC_PT0_CONTEXT3_FLAT_END_ADDR
MCIND:0x14F
2-41
MC_ARB_DRAM_PENALTIES3
MCIND:0x15
2-9
MC_PT0_CONTEXT4_FLAT_END_ADDR
MCIND:0x150
2-41
MC_PT0_CONTEXT5_FLAT_END_ADDR
MCIND:0x151
2-41
MC_PT0_CONTEXT6_FLAT_END_ADDR
MCIND:0x152
2-41
MC_PT0_CONTEXT7_FLAT_END_ADDR
MCIND:0x153
2-41
MC_PT0_CONTEXT0_MULTI_LEVEL_BASE_ADDR
MCIND:0x15C
2-41
MC_PT0_CONTEXT1_MULTI_LEVEL_BASE_ADDR
MCIND:0x15D
2-42
MC_PT0_CONTEXT2_MULTI_LEVEL_BASE_ADDR
MCIND:0x15E
2-42
MC_PT0_CONTEXT3_MULTI_LEVEL_BASE_ADDR
MCIND:0x15F
2-42
MC_ARB_RATIO_CLK_SEQ
MCIND:0x16
2-9
MC_PT0_CONTEXT4_MULTI_LEVEL_BASE_ADDR
MCIND:0x160
2-42
MC_PT0_CONTEXT5_MULTI_LEVEL_BASE_ADDR
MCIND:0x161
2-42
MC_PT0_CONTEXT6_MULTI_LEVEL_BASE_ADDR
MCIND:0x162
2-42
MC_PT0_CONTEXT7_MULTI_LEVEL_BASE_ADDR
MCIND:0x163
2-42
MC_PT0_CLIENT0_CNTL
MCIND:0x16C
2-43
MC_PT0_CLIENT1_CNTL
MCIND:0x16D
2-43
MC_PT0_CLIENT2_CNTL
MCIND:0x16E
2-44
MC_PT0_CLIENT3_CNTL
MCIND:0x16F
2-45
MC_ARB_RDWR_SWITCH
MCIND:0x17
2-9
MC_PT0_CLIENT4_CNTL
MCIND:0x170
2-46
MC_PT0_CLIENT5_CNTL
MCIND:0x171
2-47
MC_PT0_CLIENT6_CNTL
MCIND:0x172
2-48
MC_PT0_CLIENT7_CNTL
MCIND:0x173
2-49
MC_PT0_CLIENT8_CNTL
MCIND:0x174
2-49
MC_PT0_CLIENT9_CNTL
MCIND:0x175
2-50
MC_PT0_CLIENT10_CNTL
MCIND:0x176
2-51
MC_PT0_CLIENT11_CNTL
MCIND:0x177
2-52
MC_PT0_CLIENT12_CNTL
MCIND:0x178
2-53
MC_PT0_CLIENT13_CNTL
MCIND:0x179
2-54
MC_PT0_CLIENT14_CNTL
MCIND:0x17A
2-55
MC_PT0_CLIENT15_CNTL
MCIND:0x17B
2-55
MC_PT0_CLIENT16_CNTL
MCIND:0x17C
2-56
MC_SW_CNTL
MCIND:0x18
2-9
MC_TIMING_CNTL_2
MCIND:0x3
2-3
MC_WRITE_AGE1
MCIND:0x37
2-9
MC_WRITE_AGE2
MCIND:0x38
2-10
MC_FB_LOCATION
MCIND:0x4
2-3
MC_AGP_LOCATION
MCIND:0x5
2-3
AGP_BASE
MCIND:0x6
2-3
MC_SEQ_DRAM
MCIND:0x60
2-10
MC_SEQ_RAS_TIMING
MCIND:0x61
2-11
MC_SEQ_CAS_TIMING
MCIND:0x62
2-11
MC_SEQ_MISC_TIMING
MCIND:0x63
2-12
MC_SEQ_RD_CTL_I0
MCIND:0x64
2-12
MC_SEQ_RD_CTL_I1
MCIND:0x65
2-13
MC_SEQ_WR_CTL_I0
MCIND:0x66
2-14
MC_SEQ_WR_CTL_I1
MCIND:0x67
2-15
MC_SEQ_IO_CTL_I0
MCIND:0x68
2-15
MC_SEQ_IO_CTL_I1
MCIND:0x69
2-15
MC_SEQ_NPL_CTL_I0
MCIND:0x6A
2-16
MC_SEQ_NPL_CTL_I1
MCIND:0x6B
2-16
MC_SEQ_CK_PAD_CNTL_I0
MCIND:0x6C
2-16
MC_SEQ_CK_PAD_CNTL_I1
MCIND:0x6D
2-16
MC_SEQ_CMD_PAD_CNTL_I0
MCIND:0x6E
2-17
MC_SEQ_CMD_PAD_CNTL_I1
MCIND:0x6F
2-17
AGP_BASE_2
MCIND:0x7
2-3
MC_SEQ_DQ_PAD_CNTL_I0
MCIND:0x70
2-17
MC_SEQ_DQ_PAD_CNTL_I1
MCIND:0x71
2-17
MC_SEQ_QS_PAD_CNTL_I0
MCIND:0x72
2-18
MC_SEQ_QS_PAD_CNTL_I1
MCIND:0x73
2-18
MC_SEQ_A_PAD_CNTL_I0
MCIND:0x74
2-18
MC_SEQ_A_PAD_CNTL_I1
MCIND:0x75
2-18
MC_SEQ_CMD
MCIND:0x76
2-19
MC_SEQ_STATUS
MCIND:0x77
2-19
MC_CNTL0
MCIND:0x8
2-4
MC_IO_PAD_CNTL_I0
MCIND:0x80
2-19
MC_IO_PAD_CNTL_I1
MCIND:0x81
2-20
MC_IO_PAD_CNTL
MCIND:0x82
2-21
MC_IO_RD_DQ_CNTL_I0
MCIND:0x84
2-22
MC_IO_RD_DQ_CNTL_I1
MCIND:0x85
2-22
MC_IO_RD_QS_CNTL_I0
MCIND:0x86
2-22
MC_IO_RD_QS_CNTL_I1
MCIND:0x87
2-22
MC_IO_WR_CNTL_I0
MCIND:0x88
2-22
MC_IO_WR_CNTL_I1
MCIND:0x89
2-23
MC_IO_CK_PAD_CNTL_I0
MCIND:0x8A
2-23
MC_IO_CK_PAD_CNTL_I1
MCIND:0x8B
2-23
MC_IO_CMD_PAD_CNTL_I0
MCIND:0x8C
2-23
MC_IO_CMD_PAD_CNTL_I1
MCIND:0x8D
2-24
MC_IO_DQ_PAD_CNTL_I0
MCIND:0x8E
2-24
MC_IO_DQ_PAD_CNTL_I1
MCIND:0x8F
2-24
MC_CNTL1
MCIND:0x9
2-6
MC_IO_QS_PAD_CNTL_I0
MCIND:0x90
2-25
MC_IO_QS_PAD_CNTL_I1
MCIND:0x91
2-25
MC_IO_A_PAD_CNTL_I0
MCIND:0x92
2-25
MC_IO_A_PAD_CNTL_I1
MCIND:0x93
2-26
MC_IO_WR_DQ_CNTL_I0
MCIND:0x94
2-26
MC_IO_WR_DQ_CNTL_I1
MCIND:0x95
2-26
MC_IO_WR_QS_CNTL_I0
MCIND:0x96
2-26
MC_IO_WR_QS_CNTL_I1
MCIND:0x97
2-27
MC_VENDOR_ID_I0
MCIND:0x98
2-27
MC_VENDOR_ID_I1
MCIND:0x99
2-27
MC_NPL_STATUS_I0
MCIND:0x9A
2-27
MC_NPL_STATUS_I1
MCIND:0x9B
2-27
MC_IO_RD_QS2_CNTL_I0
MCIND:0x9C
2-28
MC_IO_RD_QS2_CNTL_I1
MCIND:0x9D
2-28
MC_RFSH_CNTL
MCIND:0xA
2-8
MC_IMP_CNTL
MCIND:0xA0
2-28
MC_IMP_DEBUG
MCIND:0xA1
2-28
MC_IMP_STATUS
MCIND:0xA2
2-28
MC_RBS_MAP
MCIND:0xB0
2-29
MC_RBS_CZT_HWM
MCIND:0xB1
2-30
MC_RBS_SUN_HWM
MCIND:0xB2
2-30
MC_RBS_MISC
MCIND:0xB3
2-30
MC_PMG_CMD
MCIND:0xE0
2-31
MC_PMG_CFG
MCIND:0xE1
2-31
MC_MISC_0
MCIND:0xF0
2-31
MC_MISC_1
MCIND:0xF1
2-31
MC_DEBUG
MCIND:0xFE
2-32

View File

@ -8,6 +8,7 @@ with open(sys.argv[1]) as f:
values = [
int(s.strip(), 16)
for s in f.read().strip().split(",")
if s
]
undocumented_registers = {
@ -15,6 +16,17 @@ undocumented_registers = {
0x2184: "VAP_VSM_VTX_ASSM",
}
def decode_print(register_name, value, paren=False, display_register_name=None):
if display_register_name is None:
display_register_name = register_name
decoded_value = decode_bits(register_name, value)
head = decoded_value[0][2:]
tail = indent('\n'.join([f"= {head}", *decoded_value[1:]]), ' ')
if paren:
print(f" ({display_register_name})\n{tail}")
else:
print(f" {display_register_name}\n{tail}")
class Parser:
def __init__(self, values):
self.ix = 0
@ -52,10 +64,7 @@ class Parser:
try:
if one_reg or value == 0:
assert False
decoded_value = decode_bits(register_name, value)
head = decoded_value[0][2:]
tail = indent('\n'.join(decoded_value[1:]), ' ')
print(f" {register_name} = {head}\n{tail}")
decode_print(register_name, value)
except AssertionError:
print(f" {register_name} = 0x{value:08x}")
else:
@ -79,11 +88,83 @@ class Parser:
it_opcode = (header >> 8) & 0xff
count = (header >> 16) & 0x3fff
print(f"type 3: op:{it_opcode:02x} count:{count:04x}")
opcode_names = dict((v, k) for k, v in [
("3D_DRAW_VBUF", 0x28),
("3D_DRAW_IMMD", 0x29),
("3D_DRAW_INDX", 0x2A),
("LOAD_PALETTE", 0x2C),
("3D_LOAD_VBPNTR", 0x2F),
("INDX_BUFFER", 0x33),
("3D_DRAW_VBUF_2", 0x34),
("3D_DRAW_IMMD_2", 0x35),
("3D_DRAW_INDX_2", 0x36),
("3D_CLEAR_HIZ", 0x37),
("3D_DRAW_128", 0x39),
])
opcode_name = f"{it_opcode:02x}" if it_opcode not in opcode_names else opcode_names[it_opcode]
registers = {
"3D_DRAW_VBUF": ["VAP_VTX_FMT", "VAP_VF_CNTL"],
"3D_DRAW_IMMD": ["VAP_VTX_FMT", "VAP_VF_CNTL"],
"3D_DRAW_INDX": ["VAP_VTX_FMT", "VAP_VF_CNTL"],
"3D_LOAD_VBPNTR": ["VAP_VTX_NUM_ARRAYS",
"VAP_VTX_AOS_ATTR01",
"VAP_VTX_AOS_ADDR0",
"VAP_VTX_AOS_ADDR1",
"VAP_VTX_AOS_ATTR23",
"VAP_VTX_AOS_ADDR2",
"VAP_VTX_AOS_ADDR3",
"VAP_VTX_AOS_ATTR45",
"VAP_VTX_AOS_ADDR4",
"VAP_VTX_AOS_ADDR5",
"VAP_VTX_AOS_ATTR67",
"VAP_VTX_AOS_ADDR6",
"VAP_VTX_AOS_ADDR7",
"VAP_VTX_AOS_ATTR89",
"VAP_VTX_AOS_ADDR8",
"VAP_VTX_AOS_ADDR9",
"VAP_VTX_AOS_ATTR1011",
"VAP_VTX_AOS_ADDR10",
"VAP_VTX_AOS_ADDR11",
"VAP_VTX_AOS_ATTR1213",
"VAP_VTX_AOS_ADDR12",
"VAP_VTX_AOS_ADDR13",
"VAP_VTX_AOS_ATTR1415",
"VAP_VTX_AOS_ADDR14",
"VAP_VTX_AOS_ADDR15"],
"INDX_BUFFER": [(("ONE_REG_WR", (31, 31)), ("SKIP_COUNT", (18, 16)), ("DESTINATION", (12, 0))),
(("BUFFER_BASE", (31, 0)),),
(("BUFFER_SIZE", (31, 0)),)],
"3D_DRAW_VBUF_2": ["VAP_VF_CNTL"],
"3D_DRAW_IMMD_2": ["VAP_VF_CNTL"],
"3D_DRAW_INDX_2": ["VAP_VF_CNTL"],
}
print(f"type 3: op:{opcode_name} count:{count:04x}")
ix = 0
while count >= 0:
value = self.consume()
if opcode_name in registers and ix < len(registers[opcode_name]):
register_name = registers[opcode_name][ix]
if type(register_name) is str:
if "_AOS_ATTR" in register_name:
decode_print(register_name[:-2], value, paren=True, display_register_name=register_name)
elif "_AOS_ADDR" in register_name:
decode_print(register_name[:-1], value, paren=True, display_register_name=register_name)
else:
decode_print(register_name, value, paren=True)
else:
print(f" ({opcode_name}__{ix})")
for i, desc in enumerate(register_name):
eq_bar = '=' if i == 0 else '|'
d_name, (high, low) = desc
mask = (1 << ((high - low) + 1)) - 1
v = (value >> low) & mask
print(f' {eq_bar} {opcode_name}__{ix}__{d_name}({v})')
else:
print(f" {value:08x}")
count -= 1
ix += 1
def packet(self):
value = self.peek()

648
regs/pcie_registers.inc Normal file
View File

@ -0,0 +1,648 @@
{
.name = "PCIE_TX_CNTL",
.address = 0x1,
},
{
.name = "PCIE_TX_GART_CNTL",
.address = 0x10,
},
{
.name = "PCIE_TX_GART_DISCARD_RD_ADDR_LO",
.address = 0x11,
},
{
.name = "PCIE_TX_GART_DISCARD_RD_ADDR_HI",
.address = 0x12,
},
{
.name = "PCIE_TX_GART_BASE",
.address = 0x13,
},
{
.name = "PCIE_TX_GART_START_LO",
.address = 0x14,
},
{
.name = "PCIE_TX_GART_START_HI",
.address = 0x15,
},
{
.name = "PCIE_TX_GART_END_LO",
.address = 0x16,
},
{
.name = "PCIE_TX_GART_END_HI",
.address = 0x17,
},
{
.name = "PCIE_TX_GART_ERROR",
.address = 0x18,
},
{
.name = "PCIE_TX_SEQ",
.address = 0x2,
},
{
.name = "PCIE_TX_GART_LRU_MRU_PTR",
.address = 0x20,
},
{
.name = "PCIE_TX_GART_STATUS",
.address = 0x21,
},
{
.name = "PCIE_TX_GART_TLB_VALID",
.address = 0x22,
},
{
.name = "PCIE_TX_GART_TLB0_DATA",
.address = 0x23,
},
{
.name = "PCIE_TX_GART_TLB1_DATA",
.address = 0x24,
},
{
.name = "PCIE_TX_GART_TLB2_DATA",
.address = 0x25,
},
{
.name = "PCIE_TX_GART_TLB3_DATA",
.address = 0x26,
},
{
.name = "PCIE_TX_GART_TLB4_DATA",
.address = 0x27,
},
{
.name = "PCIE_TX_GART_TLB5_DATA",
.address = 0x28,
},
{
.name = "PCIE_TX_GART_TLB6_DATA",
.address = 0x29,
},
{
.name = "PCIE_TX_GART_TLB7_DATA",
.address = 0x2a,
},
{
.name = "PCIE_TX_GART_TLB8_DATA",
.address = 0x2b,
},
{
.name = "PCIE_TX_GART_TLB9_DATA",
.address = 0x2c,
},
{
.name = "PCIE_TX_GART_TLB10_DATA",
.address = 0x2d,
},
{
.name = "PCIE_TX_GART_TLB11_DATA",
.address = 0x2e,
},
{
.name = "PCIE_TX_GART_TLB12_DATA",
.address = 0x2f,
},
{
.name = "PCIE_TX_REPLAY",
.address = 0x3,
},
{
.name = "PCIE_TX_GART_TLB13_DATA",
.address = 0x30,
},
{
.name = "PCIE_TX_GART_TLB14_DATA",
.address = 0x31,
},
{
.name = "PCIE_TX_GART_TLB15_DATA",
.address = 0x32,
},
{
.name = "PCIE_TX_GART_TLB16_DATA",
.address = 0x33,
},
{
.name = "PCIE_TX_GART_TLB17_DATA",
.address = 0x34,
},
{
.name = "PCIE_TX_GART_TLB18_DATA",
.address = 0x35,
},
{
.name = "PCIE_TX_GART_TLB19_DATA",
.address = 0x36,
},
{
.name = "PCIE_TX_GART_TLB20_DATA",
.address = 0x37,
},
{
.name = "PCIE_TX_GART_TLB21_DATA",
.address = 0x38,
},
{
.name = "PCIE_TX_GART_TLB22_DATA",
.address = 0x39,
},
{
.name = "PCIE_TX_GART_TLB23_DATA",
.address = 0x3a,
},
{
.name = "PCIE_TX_GART_TLB24_DATA",
.address = 0x3b,
},
{
.name = "PCIE_TX_GART_TLB25_DATA",
.address = 0x3c,
},
{
.name = "PCIE_TX_GART_TLB26_DATA",
.address = 0x3d,
},
{
.name = "PCIE_TX_GART_TLB27_DATA",
.address = 0x3e,
},
{
.name = "PCIE_TX_GART_TLB28_DATA",
.address = 0x3f,
},
{
.name = "PCIE_TX_CREDITS_CONSUMED",
.address = 0x4,
},
{
.name = "PCIE_TX_GART_TLB29_DATA",
.address = 0x40,
},
{
.name = "PCIE_CLK_CNTL",
.address = 0x400,
},
{
.name = "PCIE_PRBS10",
.address = 0x401,
},
{
.name = "PCIE_PRBS23_BITCNT0",
.address = 0x402,
},
{
.name = "PCIE_PRBS23_BITCNT1",
.address = 0x403,
},
{
.name = "PCIE_PRBS23_BITCNT2",
.address = 0x404,
},
{
.name = "PCIE_PRBS23_BITCNT3",
.address = 0x405,
},
{
.name = "PCIE_PRBS23_BITCNT4",
.address = 0x406,
},
{
.name = "PCIE_PRBS23_BITCNT5",
.address = 0x407,
},
{
.name = "PCIE_PRBS23_BITCNT6",
.address = 0x408,
},
{
.name = "PCIE_PRBS23_BITCNT7",
.address = 0x409,
},
{
.name = "PCIE_PRBS23_BITCNT8",
.address = 0x40a,
},
{
.name = "PCIE_PRBS23_BITCNT9",
.address = 0x40b,
},
{
.name = "PCIE_PRBS23_BITCNT10",
.address = 0x40c,
},
{
.name = "PCIE_PRBS23_BITCNT11",
.address = 0x40d,
},
{
.name = "PCIE_PRBS23_BITCNT12",
.address = 0x40e,
},
{
.name = "PCIE_PRBS23_BITCNT13",
.address = 0x40f,
},
{
.name = "PCIE_TX_GART_TLB30_DATA",
.address = 0x41,
},
{
.name = "PCIE_PRBS23_BITCNT14",
.address = 0x410,
},
{
.name = "PCIE_PRBS23_BITCNT15",
.address = 0x411,
},
{
.name = "PCIE_PRBS23_ERRCNT0",
.address = 0x412,
},
{
.name = "PCIE_PRBS23_ERRCNT1",
.address = 0x413,
},
{
.name = "PCIE_PRBS23_ERRCNT2",
.address = 0x414,
},
{
.name = "PCIE_PRBS23_ERRCNT3",
.address = 0x415,
},
{
.name = "PCIE_PRBS23_ERRCNT4",
.address = 0x416,
},
{
.name = "PCIE_PRBS23_ERRCNT5",
.address = 0x417,
},
{
.name = "PCIE_PRBS23_ERRCNT6",
.address = 0x418,
},
{
.name = "PCIE_PRBS23_ERRCNT7",
.address = 0x419,
},
{
.name = "PCIE_PRBS23_ERRCNT8",
.address = 0x41a,
},
{
.name = "PCIE_PRBS23_ERRCNT9",
.address = 0x41b,
},
{
.name = "PCIE_PRBS23_ERRCNT10",
.address = 0x41c,
},
{
.name = "PCIE_PRBS23_ERRCNT11",
.address = 0x41d,
},
{
.name = "PCIE_PRBS23_ERRCNT12",
.address = 0x41e,
},
{
.name = "PCIE_PRBS23_ERRCNT13",
.address = 0x41f,
},
{
.name = "PCIE_TX_GART_TLB31_DATA",
.address = 0x42,
},
{
.name = "PCIE_PRBS23_ERRCNT14",
.address = 0x420,
},
{
.name = "PCIE_PRBS23_ERRCNT15",
.address = 0x421,
},
{
.name = "PCIE_PRBS23_CTRL0",
.address = 0x422,
},
{
.name = "PCIE_PRBS23_CTRL1",
.address = 0x423,
},
{
.name = "PCIE_PRBS_EN",
.address = 0x424,
},
{
.name = "PCIE_XSTRAP1",
.address = 0x425,
},
{
.name = "PCIE_XSTRAP2",
.address = 0x426,
},
{
.name = "PCIE_XSTRAP5",
.address = 0x429,
},
{
.name = "PCIE_TX_CREDITS_CONSUMED_D",
.address = 0x5,
},
{
.name = "PCIE_TX_CREDITS_CONSUMED_CPLD",
.address = 0x6,
},
{
.name = "PCIE_FLOW_CNTL",
.address = 0x60,
},
{
.name = "PCIE_TXRX_DEBUG_SEQNUM",
.address = 0x61,
},
{
.name = "PCIE_TXRX_TEST_MODE",
.address = 0x62,
},
{
.name = "PCIE_TX_CREDITS_LIMIT",
.address = 0x7,
},
{
.name = "PCIE_RX_CNTL",
.address = 0x70,
},
{
.name = "PCIE_RX_NUM_NACK",
.address = 0x71,
},
{
.name = "PCIE_RX_NUM_NACK_GENERATED",
.address = 0x72,
},
{
.name = "PCIE_RX_ACK_NACK_LATENCY",
.address = 0x73,
},
{
.name = "PCIE_RX_ACK_NACK_LATENCY_THRESHOLD",
.address = 0x74,
},
{
.name = "PCIE_RX_TLP_HDR0",
.address = 0x75,
},
{
.name = "PCIE_RX_TLP_HDR1",
.address = 0x76,
},
{
.name = "PCIE_RX_TLP_HDR2",
.address = 0x77,
},
{
.name = "PCIE_RX_TLP_HDR3",
.address = 0x78,
},
{
.name = "PCIE_RX_TLP_HDR4",
.address = 0x79,
},
{
.name = "PCIE_RX_TLP_CRC",
.address = 0x7a,
},
{
.name = "PCIE_RX_DLP0",
.address = 0x7b,
},
{
.name = "PCIE_RX_DLP1",
.address = 0x7c,
},
{
.name = "PCIE_RX_DLP_CRC",
.address = 0x7d,
},
{
.name = "PCIE_RX_CREDITS_ALLOCATED",
.address = 0x7e,
},
{
.name = "PCIE_RX_CREDITS_ALLOCATED_D",
.address = 0x7f,
},
{
.name = "PCIE_TX_CREDITS_LIMIT_D",
.address = 0x8,
},
{
.name = "PCIE_RX_CREDITS_ALLOCATED_CPLD",
.address = 0x80,
},
{
.name = "PCIE_RX_CREDITS_RECEIVED",
.address = 0x81,
},
{
.name = "PCIE_RX_CREDITS_RECEIVED_D",
.address = 0x82,
},
{
.name = "PCIE_RX_CREDITS_RECEIVED_CPLD",
.address = 0x83,
},
{
.name = "PCIE_RX_MAL_TLP_COUNT",
.address = 0x84,
},
{
.name = "PCIE_RX_ERR_LOG",
.address = 0x85,
},
{
.name = "PCIE_RX_EXPECTED_SEQNUM",
.address = 0x86,
},
{
.name = "PCIE_TX_CREDITS_LIMIT_CPLD",
.address = 0x9,
},
{
.name = "PCIE_CI_CNTL",
.address = 0x90,
},
{
.name = "PCIE_CI_FLUSH_CNTL",
.address = 0x91,
},
{
.name = "PCIE_CI_PANIC",
.address = 0x92,
},
{
.name = "PCIE_CI_HANG",
.address = 0x93,
},
{
.name = "PCIE_LC_CNTL",
.address = 0xa0,
},
{
.name = "PCIE_LC_N_FTS_CNTL",
.address = 0xa1,
},
{
.name = "PCIE_LC_LINK_WIDTH_CNTL",
.address = 0xa2,
},
{
.name = "PCIE_LC_STATE0",
.address = 0xa5,
},
{
.name = "PCIE_LC_STATE1",
.address = 0xa6,
},
{
.name = "PCIE_LC_STATE2",
.address = 0xa7,
},
{
.name = "PCIE_LC_STATE3",
.address = 0xa8,
},
{
.name = "PCIE_LC_STATE4",
.address = 0xa9,
},
{
.name = "PCIE_LC_STATE5",
.address = 0xaa,
},
{
.name = "PCIE_LC_FORCE_SYNC_LOSS_CNTL",
.address = 0xab,
},
{
.name = "PCIE_P_CNTL",
.address = 0xb0,
},
{
.name = "PCIE_P_CNTL2",
.address = 0xb1,
},
{
.name = "PCIE_P_BUF_STATUS",
.address = 0xb2,
},
{
.name = "PCIE_P_DECODER_STATUS",
.address = 0xb3,
},
{
.name = "PCIE_P_MISC_DEBUG_STATUS",
.address = 0xb4,
},
{
.name = "PCIE_P_IMP_CNTL_STRENGTH",
.address = 0xc0,
},
{
.name = "PCIE_P_IMP_CNTL_UPDATE",
.address = 0xc1,
},
{
.name = "PCIE_P_STR_CNTL_UPDATE",
.address = 0xc2,
},
{
.name = "PCIE_P_PAD_MISC_CNTL",
.address = 0xc3,
},
{
.name = "PCIE_P_SYMSYNC_CTL",
.address = 0xc4,
},
{
.name = "PCIE_P_DECODE_ERR_CNTL",
.address = 0xc5,
},
{
.name = "PCIE_ERR_CNTL",
.address = 0xe0,
},
{
.name = "PCIE_CLK_RST_CNTL",
.address = 0xe1,
},
{
.name = "PCIE_P_DECODE_ERR_CNT_0",
.address = 0xf0,
},
{
.name = "PCIE_P_DECODE_ERR_CNT_1",
.address = 0xf1,
},
{
.name = "PCIE_P_DECODE_ERR_CNT_2",
.address = 0xf2,
},
{
.name = "PCIE_P_DECODE_ERR_CNT_3",
.address = 0xf3,
},
{
.name = "PCIE_P_DECODE_ERR_CNT_4",
.address = 0xf4,
},
{
.name = "PCIE_P_DECODE_ERR_CNT_5",
.address = 0xf5,
},
{
.name = "PCIE_P_DECODE_ERR_CNT_6",
.address = 0xf6,
},
{
.name = "PCIE_P_DECODE_ERR_CNT_7",
.address = 0xf7,
},
{
.name = "PCIE_P_DECODE_ERR_CNT_8",
.address = 0xf8,
},
{
.name = "PCIE_P_DECODE_ERR_CNT_9",
.address = 0xf9,
},
{
.name = "PCIE_P_DECODE_ERR_CNT_10",
.address = 0xfa,
},
{
.name = "PCIE_P_DECODE_ERR_CNT_11",
.address = 0xfb,
},
{
.name = "PCIE_P_DECODE_ERR_CNT_12",
.address = 0xfc,
},
{
.name = "PCIE_P_DECODE_ERR_CNT_13",
.address = 0xfd,
},
{
.name = "PCIE_P_DECODE_ERR_CNT_14",
.address = 0xfe,
},
{
.name = "PCIE_P_DECODE_ERR_CNT_15",
.address = 0xff,
},

486
regs/pcie_registers.txt Normal file
View File

@ -0,0 +1,486 @@
PCIE_TX_CNTL
PCIEIND:0x1
2-80
PCIE_TX_GART_CNTL
PCIEIND:0x10
2-82
PCIE_TX_GART_DISCARD_RD_ADDR_LO
PCIEIND:0x11
2-82
PCIE_TX_GART_DISCARD_RD_ADDR_HI
PCIEIND:0x12
2-82
PCIE_TX_GART_BASE
PCIEIND:0x13
2-83
PCIE_TX_GART_START_LO
PCIEIND:0x14
2-83
PCIE_TX_GART_START_HI
PCIEIND:0x15
2-83
PCIE_TX_GART_END_LO
PCIEIND:0x16
2-83
PCIE_TX_GART_END_HI
PCIEIND:0x17
2-83
PCIE_TX_GART_ERROR
PCIEIND:0x18
2-83
PCIE_TX_SEQ
PCIEIND:0x2
2-80
PCIE_TX_GART_LRU_MRU_PTR
PCIEIND:0x20
2-84
PCIE_TX_GART_STATUS
PCIEIND:0x21
2-84
PCIE_TX_GART_TLB_VALID
PCIEIND:0x22
2-84
PCIE_TX_GART_TLB0_DATA
PCIEIND:0x23
2-84
PCIE_TX_GART_TLB1_DATA
PCIEIND:0x24
2-84
PCIE_TX_GART_TLB2_DATA
PCIEIND:0x25
2-84
PCIE_TX_GART_TLB3_DATA
PCIEIND:0x26
2-85
PCIE_TX_GART_TLB4_DATA
PCIEIND:0x27
2-85
PCIE_TX_GART_TLB5_DATA
PCIEIND:0x28
2-85
PCIE_TX_GART_TLB6_DATA
PCIEIND:0x29
2-85
PCIE_TX_GART_TLB7_DATA
PCIEIND:0x2A
2-85
PCIE_TX_GART_TLB8_DATA
PCIEIND:0x2B
2-85
PCIE_TX_GART_TLB9_DATA
PCIEIND:0x2C
2-85
PCIE_TX_GART_TLB10_DATA
PCIEIND:0x2D
2-86
PCIE_TX_GART_TLB11_DATA
PCIEIND:0x2E
2-86
PCIE_TX_GART_TLB12_DATA
PCIEIND:0x2F
2-86
PCIE_TX_REPLAY
PCIEIND:0x3
2-80
PCIE_TX_GART_TLB13_DATA
PCIEIND:0x30
2-86
PCIE_TX_GART_TLB14_DATA
PCIEIND:0x31
2-86
PCIE_TX_GART_TLB15_DATA
PCIEIND:0x32
2-86
PCIE_TX_GART_TLB16_DATA
PCIEIND:0x33
2-87
PCIE_TX_GART_TLB17_DATA
PCIEIND:0x34
2-87
PCIE_TX_GART_TLB18_DATA
PCIEIND:0x35
2-87
PCIE_TX_GART_TLB19_DATA
PCIEIND:0x36
2-87
PCIE_TX_GART_TLB20_DATA
PCIEIND:0x37
2-87
PCIE_TX_GART_TLB21_DATA
PCIEIND:0x38
2-87
PCIE_TX_GART_TLB22_DATA
PCIEIND:0x39
2-88
PCIE_TX_GART_TLB23_DATA
PCIEIND:0x3A
2-88
PCIE_TX_GART_TLB24_DATA
PCIEIND:0x3B
2-88
PCIE_TX_GART_TLB25_DATA
PCIEIND:0x3C
2-88
PCIE_TX_GART_TLB26_DATA
PCIEIND:0x3D
2-88
PCIE_TX_GART_TLB27_DATA
PCIEIND:0x3E
2-88
PCIE_TX_GART_TLB28_DATA
PCIEIND:0x3F
2-89
PCIE_TX_CREDITS_CONSUMED
PCIEIND:0x4
2-81
PCIE_TX_GART_TLB29_DATA
PCIEIND:0x40
2-89
PCIE_CLK_CNTL
PCIEIND:0x400
2-107
PCIE_PRBS10
PCIEIND:0x401
2-107
PCIE_PRBS23_BITCNT0
PCIEIND:0x402
2-107
PCIE_PRBS23_BITCNT1
PCIEIND:0x403
2-107
PCIE_PRBS23_BITCNT2
PCIEIND:0x404
2-107
PCIE_PRBS23_BITCNT3
PCIEIND:0x405
2-108
PCIE_PRBS23_BITCNT4
PCIEIND:0x406
2-108
PCIE_PRBS23_BITCNT5
PCIEIND:0x407
2-108
PCIE_PRBS23_BITCNT6
PCIEIND:0x408
2-108
PCIE_PRBS23_BITCNT7
PCIEIND:0x409
2-108
PCIE_PRBS23_BITCNT8
PCIEIND:0x40A
2-108
PCIE_PRBS23_BITCNT9
PCIEIND:0x40B
2-108
PCIE_PRBS23_BITCNT10
PCIEIND:0x40C
2-109
PCIE_PRBS23_BITCNT11
PCIEIND:0x40D
2-109
PCIE_PRBS23_BITCNT12
PCIEIND:0x40E
2-109
PCIE_PRBS23_BITCNT13
PCIEIND:0x40F
2-109
PCIE_TX_GART_TLB30_DATA
PCIEIND:0x41
2-89
PCIE_PRBS23_BITCNT14
PCIEIND:0x410
2-109
PCIE_PRBS23_BITCNT15
PCIEIND:0x411
2-109
PCIE_PRBS23_ERRCNT0
PCIEIND:0x412
2-110
PCIE_PRBS23_ERRCNT1
PCIEIND:0x413
2-110
PCIE_PRBS23_ERRCNT2
PCIEIND:0x414
2-110
PCIE_PRBS23_ERRCNT3
PCIEIND:0x415
2-110
PCIE_PRBS23_ERRCNT4
PCIEIND:0x416
2-110
PCIE_PRBS23_ERRCNT5
PCIEIND:0x417
2-110
PCIE_PRBS23_ERRCNT6
PCIEIND:0x418
2-110
PCIE_PRBS23_ERRCNT7
PCIEIND:0x419
2-111
PCIE_PRBS23_ERRCNT8
PCIEIND:0x41A
2-111
PCIE_PRBS23_ERRCNT9
PCIEIND:0x41B
2-111
PCIE_PRBS23_ERRCNT10
PCIEIND:0x41C
2-111
PCIE_PRBS23_ERRCNT11
PCIEIND:0x41D
2-111
PCIE_PRBS23_ERRCNT12
PCIEIND:0x41E
2-111
PCIE_PRBS23_ERRCNT13
PCIEIND:0x41F
2-111
PCIE_TX_GART_TLB31_DATA
PCIEIND:0x42
2-89
PCIE_PRBS23_ERRCNT14
PCIEIND:0x420
2-112
PCIE_PRBS23_ERRCNT15
PCIEIND:0x421
2-112
PCIE_PRBS23_CTRL0
PCIEIND:0x422
2-112
PCIE_PRBS23_CTRL1
PCIEIND:0x423
2-113
PCIE_PRBS_EN
PCIEIND:0x424
2-113
PCIE_XSTRAP1
PCIEIND:0x425
2-113
PCIE_XSTRAP2
PCIEIND:0x426
2-114
PCIE_XSTRAP5
PCIEIND:0x429
2-114
PCIE_TX_CREDITS_CONSUMED_D
PCIEIND:0x5
2-81
PCIE_TX_CREDITS_CONSUMED_CPLD
PCIEIND:0x6
2-81
PCIE_FLOW_CNTL
PCIEIND:0x60
2-89
PCIE_TXRX_DEBUG_SEQNUM
PCIEIND:0x61
2-90
PCIE_TXRX_TEST_MODE
PCIEIND:0x62
2-90
PCIE_TX_CREDITS_LIMIT
PCIEIND:0x7
2-81
PCIE_RX_CNTL
PCIEIND:0x70
2-90
PCIE_RX_NUM_NACK
PCIEIND:0x71
2-91
PCIE_RX_NUM_NACK_GENERATED
PCIEIND:0x72
2-91
PCIE_RX_ACK_NACK_LATENCY
PCIEIND:0x73
2-91
PCIE_RX_ACK_NACK_LATENCY_THRESHOLD
PCIEIND:0x74
2-91
PCIE_RX_TLP_HDR0
PCIEIND:0x75
2-91
PCIE_RX_TLP_HDR1
PCIEIND:0x76
2-91
PCIE_RX_TLP_HDR2
PCIEIND:0x77
2-91
PCIE_RX_TLP_HDR3
PCIEIND:0x78
2-92
PCIE_RX_TLP_HDR4
PCIEIND:0x79
2-92
PCIE_RX_TLP_CRC
PCIEIND:0x7A
2-92
PCIE_RX_DLP0
PCIEIND:0x7B
2-92
PCIE_RX_DLP1
PCIEIND:0x7C
2-92
PCIE_RX_DLP_CRC
PCIEIND:0x7D
2-92
PCIE_RX_CREDITS_ALLOCATED
PCIEIND:0x7E
2-92
PCIE_RX_CREDITS_ALLOCATED_D
PCIEIND:0x7F
2-93
PCIE_TX_CREDITS_LIMIT_D
PCIEIND:0x8
2-81
PCIE_RX_CREDITS_ALLOCATED_CPLD
PCIEIND:0x80
2-93
PCIE_RX_CREDITS_RECEIVED
PCIEIND:0x81
2-93
PCIE_RX_CREDITS_RECEIVED_D
PCIEIND:0x82
2-93
PCIE_RX_CREDITS_RECEIVED_CPLD
PCIEIND:0x83
2-93
PCIE_RX_MAL_TLP_COUNT
PCIEIND:0x84
2-93
PCIE_RX_ERR_LOG
PCIEIND:0x85
2-94
PCIE_RX_EXPECTED_SEQNUM
PCIEIND:0x86
2-94
PCIE_TX_CREDITS_LIMIT_CPLD
PCIEIND:0x9
2-82
PCIE_CI_CNTL
PCIEIND:0x90
2-94
PCIE_CI_FLUSH_CNTL
PCIEIND:0x91
2-94
PCIE_CI_PANIC
PCIEIND:0x92
2-94
PCIE_CI_HANG
PCIEIND:0x93
2-95
PCIE_LC_CNTL
PCIEIND:0xA0
2-95
PCIE_LC_N_FTS_CNTL
PCIEIND:0xA1
2-95
PCIE_LC_LINK_WIDTH_CNTL
PCIEIND:0xA2
2-97
PCIE_LC_STATE0
PCIEIND:0xA5
2-95
PCIE_LC_STATE1
PCIEIND:0xA6
2-96
PCIE_LC_STATE2
PCIEIND:0xA7
2-96
PCIE_LC_STATE3
PCIEIND:0xA8
2-96
PCIE_LC_STATE4
PCIEIND:0xA9
2-96
PCIE_LC_STATE5
PCIEIND:0xAA
2-96
PCIE_LC_FORCE_SYNC_LOSS_CNTL
PCIEIND:0xAB
2-97
PCIE_P_CNTL
PCIEIND:0xB0
2-97
PCIE_P_CNTL2
PCIEIND:0xB1
2-98
PCIE_P_BUF_STATUS
PCIEIND:0xB2
2-98
PCIE_P_DECODER_STATUS
PCIEIND:0xB3
2-99
PCIE_P_MISC_DEBUG_STATUS
PCIEIND:0xB4
2-100
PCIE_P_IMP_CNTL_STRENGTH
PCIEIND:0xC0
2-101
PCIE_P_IMP_CNTL_UPDATE
PCIEIND:0xC1
2-102
PCIE_P_STR_CNTL_UPDATE
PCIEIND:0xC2
2-102
PCIE_P_PAD_MISC_CNTL
PCIEIND:0xC3
2-102
PCIE_P_SYMSYNC_CTL
PCIEIND:0xC4
2-102
PCIE_P_DECODE_ERR_CNTL
PCIEIND:0xC5
2-103
PCIE_ERR_CNTL
PCIEIND:0xE0
2-105
PCIE_CLK_RST_CNTL
PCIEIND:0xE1
2-105
PCIE_P_DECODE_ERR_CNT_0
PCIEIND:0xF0
2-103
PCIE_P_DECODE_ERR_CNT_1
PCIEIND:0xF1
2-103
PCIE_P_DECODE_ERR_CNT_2
PCIEIND:0xF2
2-103
PCIE_P_DECODE_ERR_CNT_3
PCIEIND:0xF3
2-103
PCIE_P_DECODE_ERR_CNT_4
PCIEIND:0xF4
2-103
PCIE_P_DECODE_ERR_CNT_5
PCIEIND:0xF5
2-103
PCIE_P_DECODE_ERR_CNT_6
PCIEIND:0xF6
2-104
PCIE_P_DECODE_ERR_CNT_7
PCIEIND:0xF7
2-104
PCIE_P_DECODE_ERR_CNT_8
PCIEIND:0xF8
2-104
PCIE_P_DECODE_ERR_CNT_9
PCIEIND:0xF9
2-104
PCIE_P_DECODE_ERR_CNT_10
PCIEIND:0xFA
2-104
PCIE_P_DECODE_ERR_CNT_11
PCIEIND:0xFB
2-104
PCIE_P_DECODE_ERR_CNT_12
PCIEIND:0xFC
2-105
PCIE_P_DECODE_ERR_CNT_13
PCIEIND:0xFD
2-105
PCIE_P_DECODE_ERR_CNT_14
PCIEIND:0xFE
2-105
PCIE_P_DECODE_ERR_CNT_15
PCIEIND:0xFF
2-105

View File

@ -6,6 +6,7 @@ import pvs_dual_math
import itertools
from functools import partial
import sys
import struct
def out(level, *args):
sys.stdout.write(" " * level + " ".join(args))
@ -248,8 +249,14 @@ def parse_hex(s):
if __name__ == "__main__":
filename = sys.argv[1]
if filename.endswith(".bin"):
with open(filename, 'rb') as f:
buf = f.read()
code = [struct.unpack("<I", buf[i*4:i*4+4])[0] for i in range(len(buf) // 4)]
else:
with open(filename) as f:
buf = f.read()
code = [parse_hex(c.strip()) for c in buf.split(',') if c.strip()]
for i in range(len(code) // 4):
parse_instruction(code[i*4:i*4+4])

37
regs/rrg_registers.py Normal file
View File

@ -0,0 +1,37 @@
import sys
with open(sys.argv[1]) as f:
buf = f.read()
prefixes = sys.argv[2:]
assert len(prefixes) >= 1
lines = [line.strip() for line in buf.strip().split()]
assert len(lines) % 3 == 0
def parse(lines):
for i in range(len(lines) // 3):
name = lines[i * 3 + 0]
address = lines[i * 3 + 1]
page = lines[i * 3 + 2]
assert '-' in page, page
orig_address = address
for prefix in prefixes:
if address.startswith(f"{prefix}:"):
address = address.removeprefix(f"{prefix}:")
assert address != orig_address
assert address.startswith("0x")
address = address.removeprefix("0x")
address = int(address, 16)
yield name, address, page
for name, address, page in parse(lines):
print("{")
print(f" .name = \"{name}\",")
print(f" .address = {hex(address)},")
print("},")
#print(f"#define {name} {hex(address)}")

View File

@ -1,3 +1,4 @@
import struct
import sys
import parse_bits
from collections import OrderedDict
@ -149,6 +150,11 @@ def parse_hex(s):
if __name__ == "__main__":
filename = sys.argv[1]
if filename.endswith(".bin"):
with open(filename, 'rb') as f:
buf = f.read()
code = [struct.unpack("<I", buf[i*4:i*4+4])[0] for i in range(len(buf) // 4)]
else:
with open(filename) as f:
buf = f.read()
code = [parse_hex(c.strip()) for c in buf.split(',') if c.strip()]

View File

@ -4,6 +4,7 @@ import parse_bits
from collections import OrderedDict
from functools import partial
from pprint import pprint
import struct
VERBOSE = environ.get("VERBOSE", "false").lower() == "true"
@ -479,9 +480,15 @@ def parse_hex(s):
if __name__ == "__main__":
filename = sys.argv[1]
if filename.endswith(".bin"):
with open(filename, 'rb') as f:
buf = f.read()
code = [struct.unpack("<I", buf[i*4:i*4+4])[0] for i in range(len(buf) // 4)]
else:
with open(filename) as f:
buf = f.read()
code = [parse_hex(c.strip()) for c in buf.split(',') if c.strip()]
for i in range(len(code) // 6):
start = (i + 0) * 6
end = (i + 1) * 6

View File

@ -0,0 +1,12 @@
0x00007807,
0x02400000,
0xe400f400,
0x00000000,
0x00000000,
0x00000000,
0x00078005,
0x08020000,
0x08020000,
0x1c440220,
0x1c60c003,
0x00000005,

View File

@ -0,0 +1,8 @@
0x00f00203,
0x00d10001,
0x01248001,
0x01248001,
0x00f02203,
0x00d10021,
0x01248021,
0x01248021,

View File

@ -0,0 +1,12 @@
0x00007807,
0x02400000,
0xe400e400,
0x00000000,
0x00000000,
0x00000000,
0x00078005,
0x08020000,
0x08020000,
0x1c440220,
0x1c60c003,
0x00000005,

View File

@ -0,0 +1,16 @@
0x00100201,
0x00d10002,
0x01510001,
0x01248001,
0x00200201,
0x00d10022,
0x01510001,
0x01248001,
0x00c00201,
0x00d10062,
0x01510001,
0x01248001,
0x00702203,
0x01d10001,
0x01248001,
0x01248001,

61
src/Makefile Normal file
View File

@ -0,0 +1,61 @@
OPT = -O0
CFLAGS += -g
CFLAGS += -Wall -Werror -Wfatal-errors
CFLAGS += -Wno-error=unused-variable
CFLAGS += -Wno-error=unused-but-set-variable
CFLAGS += -Wno-narrowing
CFLAGS += $(shell pkg-config --cflags libdrm)
LDFLAGS += $(shell pkg-config --libs libdrm) -lm
VERTEX_SHADERS = $(patsubst %.asm,%.bin,$(wildcard *.vs.asm))
FRAGMENT_SHADERS = $(patsubst %.asm,%.bin,$(wildcard *.fs.asm))
SHADER_BIN = $(VERTEX_SHADERS) $(FRAGMENT_SHADERS)
R500_COMMON = \
r500/display_controller.o \
r500/indirect_buffer.o \
r500/shader.o \
drm/buffer.o \
drm/drm.o \
file.o
%: $(R500_COMMON) %.o | shaders
$(CXX) $(LDFLAGS) $^ -o $@
%.o: %.c
$(CC) $(ARCH) $(CFLAGS) $(OPT) -c $< -o $@
%.o: %.cpp
$(CXX) $(ARCH) $(CFLAGS) $(OPT) -c $< -o $@
%.vs.bin: %.vs.asm
PYTHONPATH=../regs/ python -m assembler.vs $< $@
%.fs.bin: %.fs.asm
PYTHONPATH=../regs/ python -m assembler.fs $< $@
%.vs.inc: %.vs.asm
PYTHONPATH=../regs/ python -m assembler.vs $< > $@
%.fs.inc: %.fs.asm
PYTHONPATH=../regs/ python -m assembler.fs $< > $@
shaders: $(SHADER_BIN)
@true
clean:
find . -type f -name "*.o" -delete -print
find . -type f -executable ! -name '*.*' -delete -print
.SUFFIXES:
.INTERMEDIATE:
.SECONDARY:
.PHONY: all clean phony
%: RCS/%,v
%: RCS/%
%: %,v
%: s.%
%: SCCS/s.%

4
src/clear.fs.asm Normal file
View File

@ -0,0 +1,4 @@
OUT TEX_SEM_WAIT
:
out[0].a = MAX src0.1 src0.1 ,
out[0].rgb = MAX src0.000 src0.000 ;

BIN
src/clear.fs.bin Normal file

Binary file not shown.

1
src/clear.vs.asm Normal file
View File

@ -0,0 +1 @@
out[0].xyzw = VE_ADD input[0].xyzw input[0].0000 ;

BIN
src/clear.vs.bin Normal file

Binary file not shown.

103
src/drm/buffer.c Normal file
View File

@ -0,0 +1,103 @@
#include <assert.h>
#include <stdio.h>
#include <sys/mman.h>
#include <stdlib.h>
#include <xf86drm.h>
#include <libdrm/radeon_drm.h>
#include "buffer.h"
#include "../file.h"
int create_buffer(int fd, int buffer_size, void ** out_ptr)
{
int ret;
struct drm_radeon_gem_create args = {
.size = buffer_size,
.alignment = 4096,
.handle = 0,
.initial_domain = 4, // RADEON_GEM_DOMAIN_VRAM
.flags = 4
};
ret = drmCommandWriteRead(fd, DRM_RADEON_GEM_CREATE, &args, (sizeof (struct drm_radeon_gem_create)));
if (ret != 0) {
perror("drmCommandWriteRead(DRM_RADEON_GEM_CREATE)");
}
assert(args.handle != 0);
struct drm_radeon_gem_mmap mmap_args = {
.handle = args.handle,
.offset = 0,
.size = buffer_size,
};
ret = drmCommandWriteRead(fd, DRM_RADEON_GEM_MMAP, &mmap_args, (sizeof (struct drm_radeon_gem_mmap)));
if (ret != 0) {
perror("drmCommandWriteRead(DRM_RADEON_GEM_MMAP)");
}
if (out_ptr != NULL) {
void * ptr = mmap(0,
buffer_size,
PROT_READ | PROT_WRITE,
MAP_SHARED,
fd,
mmap_args.addr_ptr);
assert(ptr != MAP_FAILED);
*out_ptr = ptr;
}
return args.handle;
}
int create_flush_buffer(int fd)
{
int ret;
struct drm_radeon_gem_create args = {
.size = 4096,
.alignment = 4096,
.handle = 0,
.initial_domain = 2, // GTT
.flags = 0
};
ret = drmCommandWriteRead(fd, DRM_RADEON_GEM_CREATE,
&args, (sizeof (args)));
if (ret != 0) {
perror("drmCommandWriteRead(DRM_RADEON_GEM_CREATE)");
}
assert(args.handle != 0);
return args.handle;
}
int * load_textures(int fd,
const char ** textures,
int textures_length)
{
int * texturebuffer_handle = (int *)malloc((sizeof (int)) * textures_length);
for (int i = 0; i < textures_length; i++) {
int size = 0;
void * buf = file_read(textures[i], &size);
assert(buf != NULL);
printf("load texture[%d]: %d\n", i, size);
void * ptr = NULL;
int handle = create_buffer(fd, size, &ptr);
for (int i = 0; i < size / 4; i++) {
((uint32_t*)ptr)[i] = ((uint32_t*)buf)[i];
}
asm volatile ("" ::: "memory");
free(buf);
munmap(ptr, size);
texturebuffer_handle[i] = handle;
}
return texturebuffer_handle;
}

15
src/drm/buffer.h Normal file
View File

@ -0,0 +1,15 @@
#pragma once
#ifdef __cplusplus
extern "C" {
#endif
int create_buffer(int fd, int buffer_size, void ** out_ptr);
int create_flush_buffer(int fd);
int * load_textures(int fd,
const char ** textures,
int textures_length);
#ifdef __cplusplus
}
#endif

162
src/drm/drm.c Normal file
View File

@ -0,0 +1,162 @@
#include <stdio.h>
#include <stdint.h>
#include <xf86drm.h>
#include <libdrm/radeon_drm.h>
#include "drm.h"
#include "../r500/indirect_buffer.h" // for extern uint32_t ib[];
int drm_radeon_cs(int fd,
int colorbuffer_handle,
int zbuffer_handle,
int vertexbuffer_handle,
int * texturebuffer_handles,
int texturebuffer_handles_length,
int ib_dwords)
{
struct drm_radeon_cs_reloc relocs[3 + texturebuffer_handles_length];
relocs[COLORBUFFER_RELOC_INDEX] = (struct drm_radeon_cs_reloc){
.handle = colorbuffer_handle,
.read_domains = 4, // RADEON_GEM_DOMAIN_VRAM
.write_domain = 4, // RADEON_GEM_DOMAIN_VRAM
.flags = 8,
};
relocs[ZBUFFER_RELOC_INDEX] = (struct drm_radeon_cs_reloc){
.handle = zbuffer_handle,
.read_domains = 4, // RADEON_GEM_DOMAIN_VRAM
.write_domain = 4, // RADEON_GEM_DOMAIN_VRAM
.flags = 8,
};
/*
relocs[FLUSH_RELOC_INDEX] = (struct drm_radeon_cs_reloc){
.handle = flush_handle,
.read_domains = 2, // RADEON_GEM_DOMAIN_GTT
.write_domain = 2, // RADEON_GEM_DOMAIN_GTT
.flags = 0,
};
*/
relocs[VERTEXBUFFER_RELOC_INDEX] = (struct drm_radeon_cs_reloc){
.handle = vertexbuffer_handle,
.read_domains = 4, // RADEON_GEM_DOMAIN_VRAM
.write_domain = 4, // RADEON_GEM_DOMAIN_VRAM
.flags = 8,
};
for (int i = 0; i < texturebuffer_handles_length; i++) {
relocs[TEXTUREBUFFER_RELOC_INDEX + i] = (struct drm_radeon_cs_reloc){
.handle = texturebuffer_handles[i],
.read_domains = 4, // RADEON_GEM_DOMAIN_VRAM
.write_domain = 4, // RADEON_GEM_DOMAIN_VRAM
.flags = 8,
};
}
const uint32_t flags[2] = {
5, // RADEON_CS_KEEP_TILING_FLAGS | RADEON_CS_END_OF_FRAME
0, // RADEON_CS_RING_GFX
};
struct drm_radeon_cs_chunk chunks[3] = {
{
.chunk_id = RADEON_CHUNK_ID_IB,
.length_dw = ib_dwords,
.chunk_data = (uint64_t)(uintptr_t)ib,
},
{
.chunk_id = RADEON_CHUNK_ID_RELOCS,
.length_dw = (sizeof (relocs)) / (sizeof (uint32_t)),
.chunk_data = (uint64_t)(uintptr_t)relocs,
},
{
.chunk_id = RADEON_CHUNK_ID_FLAGS,
.length_dw = (sizeof (flags)) / (sizeof (uint32_t)),
.chunk_data = (uint64_t)(uintptr_t)&flags,
},
};
uint64_t chunks_array[3] = {
(uint64_t)(uintptr_t)&chunks[0],
(uint64_t)(uintptr_t)&chunks[1],
(uint64_t)(uintptr_t)&chunks[2],
};
struct drm_radeon_cs cs = {
.num_chunks = 3,
.cs_id = 0,
.chunks = (uint64_t)(uintptr_t)chunks_array,
.gart_limit = 0,
.vram_limit = 0,
};
int ret = drmCommandWriteRead(fd, DRM_RADEON_CS, &cs, (sizeof (struct drm_radeon_cs)));
if (ret != 0) {
perror("drmCommandWriteRead(DRM_RADEON_CS)");
return -1;
}
return 0;
}
int drm_radeon_cs2(int fd,
int * handles,
int handles_length,
int ib_dwords)
{
struct drm_radeon_cs_reloc relocs[handles_length];
for (int i = 0; i < handles_length; i++) {
relocs[i] = (struct drm_radeon_cs_reloc){
.handle = handles[i],
.read_domains = 4, // RADEON_GEM_DOMAIN_VRAM
.write_domain = 4, // RADEON_GEM_DOMAIN_VRAM
.flags = 8,
};
}
const uint32_t flags[2] = {
5, // RADEON_CS_KEEP_TILING_FLAGS | RADEON_CS_END_OF_FRAME
0, // RADEON_CS_RING_GFX
};
struct drm_radeon_cs_chunk chunks[3] = {
{
.chunk_id = RADEON_CHUNK_ID_IB,
.length_dw = ib_dwords,
.chunk_data = (uint64_t)(uintptr_t)ib,
},
{
.chunk_id = RADEON_CHUNK_ID_RELOCS,
.length_dw = (sizeof (relocs)) / (sizeof (uint32_t)),
.chunk_data = (uint64_t)(uintptr_t)relocs,
},
{
.chunk_id = RADEON_CHUNK_ID_FLAGS,
.length_dw = (sizeof (flags)) / (sizeof (uint32_t)),
.chunk_data = (uint64_t)(uintptr_t)&flags,
},
};
uint64_t chunks_array[3] = {
(uint64_t)(uintptr_t)&chunks[0],
(uint64_t)(uintptr_t)&chunks[1],
(uint64_t)(uintptr_t)&chunks[2],
};
struct drm_radeon_cs cs = {
.num_chunks = 3,
.cs_id = 0,
.chunks = (uint64_t)(uintptr_t)chunks_array,
.gart_limit = 0,
.vram_limit = 0,
};
int ret = drmCommandWriteRead(fd, DRM_RADEON_CS, &cs, (sizeof (struct drm_radeon_cs)));
if (ret != 0) {
perror("drmCommandWriteRead(DRM_RADEON_CS)");
return -1;
}
return 0;
}

28
src/drm/drm.h Normal file
View File

@ -0,0 +1,28 @@
#pragma once
#define COLORBUFFER_RELOC_INDEX 0
#define ZBUFFER_RELOC_INDEX 1
//#define FLUSH_RELOC_INDEX 2
#define VERTEXBUFFER_RELOC_INDEX 2
#define TEXTUREBUFFER_RELOC_INDEX 3
#ifdef __cplusplus
extern "C" {
#endif
int drm_radeon_cs(int fd,
int colorbuffer_handle,
int zbuffer_handle,
int vertexbuffer_handle,
int * texturebuffer_handles,
int texturebuffer_handles_length,
int ib_dwords);
int drm_radeon_cs2(int fd,
int * handles,
int handles_length,
int ib_dwords);
#ifdef __cplusplus
}
#endif

50
src/file.c Normal file
View File

@ -0,0 +1,50 @@
#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <assert.h>
#include "file.h"
void * file_read(const char * path, int * size_out)
{
int fd = open(path, O_RDONLY);
if (fd == -1) {
fprintf(stderr, "open(%s): %s\n", path, strerror(errno));
return NULL;
}
off_t size = lseek(fd, 0, SEEK_END);
if (size == (off_t)-1) {
fprintf(stderr, "lseek(%s, SEEK_END): %s\n", path, strerror(errno));
return NULL;
}
off_t start = lseek(fd, 0, SEEK_SET);
if (start == (off_t)-1) {
fprintf(stderr, "lseek(%s, SEEK_SET): %s\n", path, strerror(errno));
return NULL;
}
void * buf = malloc(size);
ssize_t read_size = read(fd, buf, size);
if (read_size == -1) {
fprintf(stderr, "read(%s): %s\n", path, strerror(errno));
return NULL;
}
int ret = close(fd);
if (ret == -1) {
fprintf(stderr, "close(%s): %s\n", path, strerror(errno));
return NULL;
}
if (size_out != NULL) {
*size_out = size;
}
return buf;
}

11
src/file.h Normal file
View File

@ -0,0 +1,11 @@
#pragma once
#ifdef __cplusplus
extern "C" {
#endif
void * file_read(const char * path, int * size_out);
#ifdef __cplusplus
}
#endif

6
src/light.fs.asm Normal file
View File

@ -0,0 +1,6 @@
-- temp[0] -- color
OUT TEX_SEM_WAIT
src0.rgb = temp[0] :
out[0].a = MAX src0.1 src0.1 ,
out[0].rgb = MAX src0.rgb src0.rgb ;

BIN
src/light.fs.bin Normal file

Binary file not shown.

13
src/light.vs.asm Normal file
View File

@ -0,0 +1,13 @@
-- input[0] -- position
-- input[1] -- color
-- position clip space
temp[1].x = VE_DOT const[0].xyzw input[0].xyzw ;
temp[1].y = VE_DOT const[1].xyzw input[0].xyzw ;
temp[1].z = VE_DOT const[2].xyzw input[0].xyzw ;
temp[1].w = VE_DOT const[3].xyzw input[0].xyzw ;
-- position (clip space)
out[0].xyzw = VE_ADD temp[1].xyzw const[0].0000 ;
-- color
out[1].xyzw = VE_ADD const[4].xyzw const[4].0000 ;

BIN
src/light.vs.bin Normal file

Binary file not shown.

1
src/math Symbolic link
View File

@ -0,0 +1 @@
../drm/math

610
src/matrix_cubesphere.cpp Normal file
View File

@ -0,0 +1,610 @@
#include <assert.h>
#include <errno.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <stdlib.h>
#include <math.h>
#include <fcntl.h>
#include <sys/mman.h>
#include "r500/3d_registers.h"
#include "r500/3d_registers_undocumented.h"
#include "r500/3d_registers_bits.h"
#include "r500/indirect_buffer.h"
#include "r500/shader.h"
#include "r500/display_controller.h"
#include "drm/buffer.h"
#include "drm/drm.h"
#include "math/float_types.hpp"
#include "math/transform.hpp"
#include "math/constants.hpp"
#include "../model/model2.h"
#include "../model/cubesphere.h"
#define CLEAR_SHADER 0
#define CUBESPHERE_SHADER 1
#define LIGHT_SHADER 2
const char * vertex_shader_paths[] = {
"clear.vs.bin",
"matrix_cubesphere.vs.bin",
"light.vs.bin",
};
const int vertex_shader_paths_length = (sizeof (vertex_shader_paths)) / (sizeof (vertex_shader_paths[0]));
const char * fragment_shader_paths[] = {
"clear.fs.bin",
"matrix_cubesphere.fs.bin",
"light.fs.bin"
};
const int fragment_shader_paths_length = (sizeof (fragment_shader_paths)) / (sizeof (fragment_shader_paths[0]));
struct shaders {
struct shader_offset * vertex;
struct shader_offset * fragment;
int vertex_length;
int fragment_length;
};
void _3d_clear(struct shaders& shaders)
{
ib_rs_instructions(0);
//////////////////////////////////////////////////////////////////////////////
// VAP OUT
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_OUT_VTX_FMT_0
, VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1));
T0V(VAP_OUT_VTX_FMT_1
, 0);
//
ib_zbuffer(ZBUFFER_RELOC_INDEX, 1600, 7); // always
ib_texture__0();
ib_vap_stream_cntl__2();
// shaders
T0V(US_PIXSIZE
, US_PIXSIZE__PIX_SIZE(1)
);
ib_ga_us(&shaders.fragment[CLEAR_SHADER]);
ib_vap_pvs(&shaders.vertex[CLEAR_SHADER]);
//////////////////////////////////////////////////////////////////////////////
// VAP INDEX
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_INDEX_OFFSET, 0);
T0V(VAP_VF_MAX_VTX_INDX
, VAP_VF_MAX_VTX_INDX__MAX_INDX(0)
);
T0V(VAP_VF_MIN_VTX_INDX
, VAP_VF_MIN_VTX_INDX__MIN_INDX(0)
);
//////////////////////////////////////////////////////////////////////////////
// VAP
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_CLIP_CNTL
, VAP_CLIP_CNTL__CLIP_DISABLE(1)
);
T0V(VAP_VTE_CNTL
, VAP_VTE_CNTL__VTX_XY_FMT(1) // disable W division
| VAP_VTE_CNTL__VTX_Z_FMT(1) // disable W division
);
T0V(VAP_CNTL_STATUS
, VAP_CNTL_STATUS__PVS_BYPASS(0)
);
//////////////////////////////////////////////////////////////////////////////
// GA POINT SIZE
//////////////////////////////////////////////////////////////////////////////
T0V(GA_POINT_SIZE
, GA_POINT_SIZE__HEIGHT(600 * 12)
| GA_POINT_SIZE__WIDTH(800 * 12)
);
//////////////////////////////////////////////////////////////////////////////
// 3D_DRAW
//////////////////////////////////////////////////////////////////////////////
const int dwords_per_vtx = 2;
T0V(VAP_VTX_SIZE
, VAP_VTX_SIZE__DWORDS_PER_VTX(dwords_per_vtx)
);
const float center[] = {
800.0f, 600.0f,
};
const int vertex_count = 1;
T3(_3D_DRAW_IMMD_2, (1 + vertex_count * dwords_per_vtx) - 1);
TU( VAP_VF_CNTL__PRIM_TYPE(1) // point list
| VAP_VF_CNTL__PRIM_WALK(3)
| VAP_VF_CNTL__INDEX_SIZE(0)
| VAP_VF_CNTL__VTX_REUSE_DIS(0)
| VAP_VF_CNTL__DUAL_INDEX_MODE(0)
| VAP_VF_CNTL__USE_ALT_NUM_VERTS(0)
| VAP_VF_CNTL__NUM_VERTICES(vertex_count)
);
for (int i = 0; i < 2; i++) {
TF(center[i]);
}
}
mat4x4 perspective(float low1, float high1,
float low2, float high2,
float low3, float high3)
{
float scale2 = (high2 - low2) / (high1 - low1);
float scale3 = (high3 - low3) / (high1 - low1);
mat4x4 m1 = mat4x4(1, 0, 0, 0,
0, 1, 0, 0,
0, 0, 1, -low1,
0, 0, 0, 1
);
mat4x4 m2 = mat4x4(1, 0, 0, 0,
0, 1, 0, 0,
0, 0, scale2, low2,
0, 0, scale3, low3
);
return m2 * m1;
}
void _3d_cube_inner(mat4x4 trans,
mat4x4 world_trans,
vec4 light_pos)
{
T0V(VAP_PVS_STATE_FLUSH_REG, 0x00000000);
//////////////////////////////////////////////////////////////////////////////
// VAP_PVS
//////////////////////////////////////////////////////////////////////////////
const float consts[] = {
// 0
trans[0][0], trans[0][1], trans[0][2], trans[0][3],
trans[1][0], trans[1][1], trans[1][2], trans[1][3],
trans[2][0], trans[2][1], trans[2][2], trans[2][3],
trans[3][0], trans[3][1], trans[3][2], trans[3][3],
// 4
world_trans[0][0], world_trans[0][1], world_trans[0][2], world_trans[0][3],
world_trans[1][0], world_trans[1][1], world_trans[1][2], world_trans[1][3],
world_trans[2][0], world_trans[2][1], world_trans[2][2], world_trans[2][3],
world_trans[3][0], world_trans[3][1], world_trans[3][2], world_trans[3][3],
// 8
light_pos.x, light_pos.y, light_pos.z, light_pos.w,
};
ib_vap_pvs_const_cntl(consts, (sizeof (consts)));
//////////////////////////////////////////////////////////////////////////////
// 3D_DRAW
//////////////////////////////////////////////////////////////////////////////
const model * model = &cubesphere_model;
const object * obj = model->object[0];
const int triangle_count = obj->triangle_count;
const int vertex_count = triangle_count * 3;
const int dwords_per_vtx = 8;
T0V(VAP_VTX_SIZE
, VAP_VTX_SIZE__DWORDS_PER_VTX(dwords_per_vtx)
);
T3(_3D_DRAW_IMMD_2, (1 + vertex_count * dwords_per_vtx) - 1);
TU( VAP_VF_CNTL__PRIM_TYPE(4)
| VAP_VF_CNTL__PRIM_WALK(3)
| VAP_VF_CNTL__INDEX_SIZE(0)
| VAP_VF_CNTL__VTX_REUSE_DIS(0)
| VAP_VF_CNTL__DUAL_INDEX_MODE(0)
| VAP_VF_CNTL__USE_ALT_NUM_VERTS(0)
| VAP_VF_CNTL__NUM_VERTICES(vertex_count)
);
for (int i = 0; i < triangle_count; i++) {
for (int j = 0; j < 3; j++) {
vec3 p = model->position[obj->triangle[i][j].position];
vec2 t = model->texture[obj->triangle[i][j].texture];
vec3 n = model->normal[obj->triangle[i][j].normal];
TF(p.x);
TF(p.y);
TF(p.z);
TF(t.x);
TF(t.y);
TF(n.x);
TF(n.y);
TF(n.z);
}
}
}
void _3d_light_inner(mat4x4 trans)
{
T0V(VAP_PVS_STATE_FLUSH_REG, 0x00000000);
//////////////////////////////////////////////////////////////////////////////
// VAP_PVS
//////////////////////////////////////////////////////////////////////////////
const vec4 color = {1, 1, 0, 1};
const float consts[] = {
// 0
trans[0][0], trans[0][1], trans[0][2], trans[0][3],
trans[1][0], trans[1][1], trans[1][2], trans[1][3],
trans[2][0], trans[2][1], trans[2][2], trans[2][3],
trans[3][0], trans[3][1], trans[3][2], trans[3][3],
// 4
color[0], color[1], color[2], color[2],
};
ib_vap_pvs_const_cntl(consts, (sizeof (consts)));
//////////////////////////////////////////////////////////////////////////////
// 3D_DRAW
//////////////////////////////////////////////////////////////////////////////
const model * model = &cubesphere_model;
const object * obj = model->object[0];
const int triangle_count = obj->triangle_count;
const int vertex_count = triangle_count * 3;
int dwords_per_vtx = 3;
T0V(VAP_VTX_SIZE
, VAP_VTX_SIZE__DWORDS_PER_VTX(dwords_per_vtx)
);
T3(_3D_DRAW_IMMD_2, (1 + vertex_count * dwords_per_vtx) - 1);
TU( VAP_VF_CNTL__PRIM_TYPE(4)
| VAP_VF_CNTL__PRIM_WALK(3)
| VAP_VF_CNTL__INDEX_SIZE(0)
| VAP_VF_CNTL__VTX_REUSE_DIS(0)
| VAP_VF_CNTL__DUAL_INDEX_MODE(0)
| VAP_VF_CNTL__USE_ALT_NUM_VERTS(0)
| VAP_VF_CNTL__NUM_VERTICES(vertex_count)
);
for (int i = 0; i < triangle_count; i++) {
for (int j = 0; j < 3; j++) {
vec3 p = model->position[obj->triangle[i][j].position];
TF(p.x);
TF(p.y);
TF(p.z);
}
}
}
vec3 _3d_light(struct shaders& shaders,
const mat4x4& view_to_clip,
float theta)
{
ib_rs_instructions(1);
//////////////////////////////////////////////////////////////////////////////
// VAP OUT
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_OUT_VTX_FMT_0
, VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1));
T0V(VAP_OUT_VTX_FMT_1
, VAP_OUT_VTX_FMT_1__TEX_0_COMP_CNT(4));
//
ib_zbuffer(ZBUFFER_RELOC_INDEX, 1600, 1); // less
ib_texture__0();
ib_vap_stream_cntl__3();
// shaders
T0V(US_PIXSIZE
, US_PIXSIZE__PIX_SIZE(1)
);
ib_ga_us(&shaders.fragment[LIGHT_SHADER]);
ib_vap_pvs(&shaders.vertex[LIGHT_SHADER]);
//////////////////////////////////////////////////////////////////////////////
// VAP
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_CLIP_CNTL
, VAP_CLIP_CNTL__PS_UCP_MODE(3)
);
T0V(VAP_VTE_CNTL
, VAP_VTE_CNTL__VPORT_X_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_X_OFFSET_ENA(1)
| VAP_VTE_CNTL__VPORT_Y_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_Y_OFFSET_ENA(1)
| VAP_VTE_CNTL__VPORT_Z_SCALE_ENA(0)
| VAP_VTE_CNTL__VPORT_Z_OFFSET_ENA(0)
| VAP_VTE_CNTL__VTX_XY_FMT(0) // enable W division
| VAP_VTE_CNTL__VTX_Z_FMT(1) // disable W division
| VAP_VTE_CNTL__VTX_W0_FMT(1)
| VAP_VTE_CNTL__SERIAL_PROC_ENA(0)
);
T0V(VAP_CNTL_STATUS
, VAP_CNTL_STATUS__PVS_BYPASS(0)
);
//////////////////////////////////////////////////////////////////////////////
// VAP INDEX
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_INDEX_OFFSET, 0);
T0V(VAP_VF_MAX_VTX_INDX
, VAP_VF_MAX_VTX_INDX__MAX_INDX(0)
);
T0V(VAP_VF_MIN_VTX_INDX
, VAP_VF_MIN_VTX_INDX__MIN_INDX(0)
);
// light
mat4x4 t1 = translate(vec3(1, 0, 0));
mat4x4 s = scale(0.1f);
mat4x4 rz = rotate_y(theta * 2.f);
mat4x4 world_trans = rz * t1 * s;
mat4x4 trans = view_to_clip * world_trans;
_3d_light_inner(trans);
vec3 light_pos = world_trans * light_pos;
return light_pos;
}
void _3d_cube(struct shaders& shaders,
const mat4x4& view_to_clip,
float theta,
const vec3& light_pos)
{
ib_rs_instructions(4);
//////////////////////////////////////////////////////////////////////////////
// VAP OUT
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_OUT_VTX_FMT_0
, VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1));
T0V(VAP_OUT_VTX_FMT_1
, VAP_OUT_VTX_FMT_1__TEX_0_COMP_CNT(4)
| VAP_OUT_VTX_FMT_1__TEX_1_COMP_CNT(4)
| VAP_OUT_VTX_FMT_1__TEX_2_COMP_CNT(4)
| VAP_OUT_VTX_FMT_1__TEX_3_COMP_CNT(4));
//
ib_zbuffer(ZBUFFER_RELOC_INDEX, 1600, 1); // less
int width = 1024;
int height = 1024;
int macrotile = 0;
int microtile = 0;
int clamp = 0; // wrap/repeat
ib_texture__1(TEXTUREBUFFER_RELOC_INDEX,
width, height,
macrotile, microtile,
clamp);
ib_vap_stream_cntl__323();
// shaders
T0V(US_PIXSIZE
, US_PIXSIZE__PIX_SIZE(4)
);
ib_ga_us(&shaders.fragment[CUBESPHERE_SHADER]);
ib_vap_pvs(&shaders.vertex[CUBESPHERE_SHADER]);
//////////////////////////////////////////////////////////////////////////////
// VAP
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_CLIP_CNTL
, VAP_CLIP_CNTL__PS_UCP_MODE(3)
);
T0V(VAP_VTE_CNTL
, VAP_VTE_CNTL__VPORT_X_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_X_OFFSET_ENA(1)
| VAP_VTE_CNTL__VPORT_Y_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_Y_OFFSET_ENA(1)
| VAP_VTE_CNTL__VPORT_Z_SCALE_ENA(0)
| VAP_VTE_CNTL__VPORT_Z_OFFSET_ENA(0)
| VAP_VTE_CNTL__VTX_XY_FMT(0) // enable W division
| VAP_VTE_CNTL__VTX_Z_FMT(1) // disable W division
| VAP_VTE_CNTL__VTX_W0_FMT(1)
| VAP_VTE_CNTL__SERIAL_PROC_ENA(0)
);
T0V(VAP_CNTL_STATUS
, VAP_CNTL_STATUS__PVS_BYPASS(0)
);
//////////////////////////////////////////////////////////////////////////////
// VAP INDEX
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_INDEX_OFFSET, 0);
T0V(VAP_VF_MAX_VTX_INDX
, VAP_VF_MAX_VTX_INDX__MAX_INDX(0)
);
T0V(VAP_VF_MIN_VTX_INDX
, VAP_VF_MIN_VTX_INDX__MIN_INDX(0)
);
//////////////////////////////////////////////////////////////////////////////
// matrix
//////////////////////////////////////////////////////////////////////////////
// cube
mat4x4 rx = rotate_x(1 * theta * 0.5f);
mat4x4 ry = rotate_y(0 * theta * 0.8f + 1.4f);
mat4x4 s = scale(0.9f);
mat4x4 world_trans = rx * ry * s;
mat4x4 trans = view_to_clip * world_trans;
_3d_cube_inner(trans, world_trans, light_pos);
}
int indirect_buffer(shaders& shaders,
float theta)
{
int width = 1600;
int height = 1200;
int pitch = width;
ib_ix = 0;
ib_generic_initialization();
T0V(RB3D_BLENDCNTL, 0);
T0V(RB3D_ABLENDCNTL, 0);
ib_viewport(width, height);
ib_colorbuffer(COLORBUFFER_RELOC_INDEX, pitch, 0, 0);
T0V(GB_ENABLE, 0);
T0V(US_OUT_FMT_0
, US_OUT_FMT__OUT_FMT(0) // C4_8
| US_OUT_FMT__C0_SEL__BLUE
| US_OUT_FMT__C1_SEL__GREEN
| US_OUT_FMT__C2_SEL__RED
| US_OUT_FMT__C3_SEL__ALPHA
| US_OUT_FMT__OUT_SIGN(0)
);
T0V(US_OUT_FMT_1
, US_OUT_FMT__OUT_FMT(15) // render target is not used
);
T0V(US_OUT_FMT_2
, US_OUT_FMT__OUT_FMT(15) // render target is not used
);
T0V(US_OUT_FMT_2
, US_OUT_FMT__OUT_FMT(15) // render target is not used
);
load_pvs_shaders(shaders.vertex, shaders.vertex_length);
load_us_shaders(shaders.fragment, shaders.fragment_length);
//////////////////////////////////////////////////////////////////////////////
// DRAW
//////////////////////////////////////////////////////////////////////////////
mat4x4 aspect = scale(vec3(3.0f/4.0f, 1, 1));
mat4x4 p = perspective(0.01f, 5.0f,
0.001f, 0.999f,
0.5f, 2.0f);
mat4x4 t = translate(vec3(0, 0, 3));
mat4x4 view_to_clip = aspect * p * t;
_3d_clear(shaders);
vec3 light_pos = _3d_light(shaders, view_to_clip, theta);
_3d_cube(shaders, view_to_clip, theta, light_pos);
//////////////////////////////////////////////////////////////////////////////
// padding
//////////////////////////////////////////////////////////////////////////////
while ((ib_ix % 8) != 0) {
TU(0x80000000);
}
return ib_ix;
}
const char * textures[] = {
"../texture/butterfly_1024x1024_rgba8888.data",
};
const int textures_length = (sizeof (textures)) / (sizeof (textures[0]));
int main()
{
struct shaders shaders = {
.vertex = load_shaders(vertex_shader_paths, vertex_shader_paths_length),
.fragment = load_shaders(fragment_shader_paths, fragment_shader_paths_length),
.vertex_length = vertex_shader_paths_length,
.fragment_length = fragment_shader_paths_length,
};
void * rmmio = map_pci_resource2();
int fd = open("/dev/dri/card0", O_RDWR | O_CLOEXEC);
assert(fd != -1);
const int colorbuffer_size = 1600 * 1200 * 4;
int colorbuffer_handle[2];
int zbuffer_handle;
int * texturebuffer_handle;
int flush_handle;
void * colorbuffer_ptr[2];
void * zbuffer_ptr;
// colorbuffer
colorbuffer_handle[0] = create_buffer(fd, colorbuffer_size, &colorbuffer_ptr[0]);
colorbuffer_handle[1] = create_buffer(fd, colorbuffer_size, &colorbuffer_ptr[1]);
zbuffer_handle = create_buffer(fd, colorbuffer_size, &zbuffer_ptr);
flush_handle = create_flush_buffer(fd);
texturebuffer_handle = load_textures(fd, textures, textures_length);
fprintf(stderr, "colorbuffer handle[0] %d\n", colorbuffer_handle[0]);
fprintf(stderr, "colorbuffer handle[1] %d\n", colorbuffer_handle[1]);
fprintf(stderr, "zbuffer handle %d\n", zbuffer_handle);
int colorbuffer_ix = 0;
float theta = 0;
while (true) {
int ib_dwords = indirect_buffer(shaders, theta);
int ret = drm_radeon_cs(fd,
colorbuffer_handle[colorbuffer_ix],
zbuffer_handle,
flush_handle,
texturebuffer_handle,
textures_length,
ib_dwords);
if (ret == -1)
break;
primary_surface_address(rmmio, colorbuffer_ix);
// next state
theta += 0.01f;
colorbuffer_ix = (colorbuffer_ix + 1) & 1;
}
close(fd);
}

View File

@ -0,0 +1,50 @@
-- temp[0] -- position (world space)
-- temp[1] -- normal
-- temp[2] -- light pos (world space)
-- temp[3] -- texture
-- PIXSIZE 4
TEX TEX_SEM_WAIT TEX_SEM_ACQUIRE
temp[3].rgba = LD tex[0].rgba temp[3].rgaa ;
-- normal = normalize(normal)
-- normal = (1.0 / sqrt(dot(normal, normal))) * normal
src0.rgb = temp[1] :
DP3 src0.rgb src0.rgb ,
temp[1].a = DP ;
src0.a = temp[1] :
temp[1].a = RSQ |src0.a| ;
src0.a = temp[1], src0.rgb = temp[1] :
temp[1].rgb = MAD src0.rgb src0.aaa src0.000 ;
-- light_dir = normalize((f_light_pos - f_world_pos))
src1.rgb = temp[2] , -- f_light_pos
src0.rgb = temp[0] , -- f_world_pos
srcp.rgb = neg : -- (f_light_pos - f_world_pos)
DP3 srcp.rgb srcp.rgb ,
temp[2].a = DP ;
src0.a = temp[2] :
temp[2].a = RSQ |src0.a| ;
src0.a = temp[2], src0.rgb = temp[2] :
temp[2].rgb = MAD src0.rgb src0.aaa src0.000 ;
-- dot(normal, light_dir)
src0.rgb = temp[2] ,
src1.rgb = temp[1] :
DP3 src0.rgb src1.rgb ,
temp[4].a = DP ;
src0.a = temp[4] :
temp[4].a = MAX src0.a src0.0 ;
src0.a = temp[4] ,
src1.a = float(32) :
temp[4].a = MAD src0.a src0.1 src1.a ;
OUT TEX_SEM_WAIT
src0.a = temp[3],
src0.rgb = temp[3] ,
src1.a = temp[4] :
out[0].a = MAX src0.a src0.a ,
out[0].rgb = MAD src0.rgb src1.aaa src2.000 ;

Binary file not shown.

View File

@ -0,0 +1,45 @@
--
-- dot(m[0], v), dot(m[1], v), dot(m[2], v), dot(m[3], v)
--
-- input[0] -- position
-- input[1] -- texture
-- input[2] -- normal
-- consts[0] -- trans
-- consts[4] -- world_trans
-- consts[8] -- normal_trans
-- out[0] -- position clip space
-- out[1] -- texture
-- out[2] -- normal
-- out[3] -- object position world space
-- out[4] -- light position world space
-- position clip space
temp[1].x = VE_DOT const[0].xyzw input[0].xyzw ;
temp[1].y = VE_DOT const[1].xyzw input[0].xyzw ;
temp[1].z = VE_DOT const[2].xyzw input[0].xyzw ;
temp[1].w = VE_DOT const[3].xyzw input[0].xyzw ;
-- position world space
temp[2].x = VE_DOT const[4].xyzw input[0].xyzw ;
temp[2].y = VE_DOT const[5].xyzw input[0].xyzw ;
temp[2].z = VE_DOT const[6].xyzw input[0].xyzw ;
temp[2].w = VE_DOT const[7].xyzw input[0].xyzw ;
-- normal world space
temp[3].x = VE_DOT const[4].xyz0 input[2].xyz0 ;
temp[3].y = VE_DOT const[5].xyz0 input[2].xyz0 ;
temp[3].z = VE_DOT const[6].xyz0 input[2].xyz0 ;
-- position (clip space)
out[0].xyzw = VE_ADD temp[1].xyzw const[0].0000 ;
-- position (world space)
out[1].xyzw = VE_ADD temp[2].xyzw const[0].0000 ;
-- normal
out[2].xyzw = VE_ADD temp[3].xyz0 const[0].0000 ;
-- light pos (world space)
out[3].xyzw = VE_ADD const[8].xyzw const[8].0000 ;
-- texture
out[4].xyzw = VE_ADD input[1].xy00 const[0].0000 ;

Binary file not shown.

View File

@ -0,0 +1,672 @@
#include <assert.h>
#include <errno.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <stdlib.h>
#include <math.h>
#include <fcntl.h>
#include <sys/mman.h>
#include "r500/3d_registers.h"
#include "r500/3d_registers_undocumented.h"
#include "r500/3d_registers_bits.h"
#include "r500/indirect_buffer.h"
#include "r500/shader.h"
#include "r500/display_controller.h"
#include "drm/buffer.h"
#include "drm/drm.h"
#include "math/float_types.hpp"
#include "math/transform.hpp"
#include "math/constants.hpp"
#include "../model/model2.h"
#include "../model/cubesphere.h"
#include "../model/cubesphere_indexed.h"
#define CLEAR_SHADER 0
#define CUBESPHERE_SHADER 1
#define LIGHT_SHADER 2
#define INDEXBUFFER_RELOC_INDEX 4
const char * vertex_shader_paths[] = {
"clear.vs.bin",
"matrix_cubesphere.vs.bin",
"light.vs.bin",
};
const int vertex_shader_paths_length = (sizeof (vertex_shader_paths)) / (sizeof (vertex_shader_paths[0]));
const char * fragment_shader_paths[] = {
"clear.fs.bin",
"matrix_cubesphere.fs.bin",
"light.fs.bin"
};
const int fragment_shader_paths_length = (sizeof (fragment_shader_paths)) / (sizeof (fragment_shader_paths[0]));
struct shaders {
struct shader_offset * vertex;
struct shader_offset * fragment;
int vertex_length;
int fragment_length;
};
void _3d_clear(struct shaders& shaders)
{
ib_rs_instructions(0);
//////////////////////////////////////////////////////////////////////////////
// VAP OUT
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_OUT_VTX_FMT_0
, VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1));
T0V(VAP_OUT_VTX_FMT_1
, 0);
//
ib_zbuffer(ZBUFFER_RELOC_INDEX, 1600, 7); // always
ib_texture__0();
ib_vap_stream_cntl__2();
// shaders
T0V(US_PIXSIZE
, US_PIXSIZE__PIX_SIZE(1)
);
ib_ga_us(&shaders.fragment[CLEAR_SHADER]);
ib_vap_pvs(&shaders.vertex[CLEAR_SHADER]);
//////////////////////////////////////////////////////////////////////////////
// VAP INDEX
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_INDEX_OFFSET, 0);
T0V(VAP_VF_MAX_VTX_INDX
, VAP_VF_MAX_VTX_INDX__MAX_INDX(0)
);
T0V(VAP_VF_MIN_VTX_INDX
, VAP_VF_MIN_VTX_INDX__MIN_INDX(0)
);
//////////////////////////////////////////////////////////////////////////////
// VAP
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_CLIP_CNTL
, VAP_CLIP_CNTL__CLIP_DISABLE(1)
);
T0V(VAP_VTE_CNTL
, VAP_VTE_CNTL__VTX_XY_FMT(1) // disable W division
| VAP_VTE_CNTL__VTX_Z_FMT(1) // disable W division
);
T0V(VAP_CNTL_STATUS
, VAP_CNTL_STATUS__PVS_BYPASS(0)
);
//////////////////////////////////////////////////////////////////////////////
// GA POINT SIZE
//////////////////////////////////////////////////////////////////////////////
T0V(GA_POINT_SIZE
, GA_POINT_SIZE__HEIGHT(600 * 12)
| GA_POINT_SIZE__WIDTH(800 * 12)
);
//////////////////////////////////////////////////////////////////////////////
// 3D_DRAW
//////////////////////////////////////////////////////////////////////////////
const int dwords_per_vtx = 2;
T0V(VAP_VTX_SIZE
, VAP_VTX_SIZE__DWORDS_PER_VTX(dwords_per_vtx)
);
const float center[] = {
800.0f, 600.0f,
};
const int vertex_count = 1;
T3(_3D_DRAW_IMMD_2, (1 + vertex_count * dwords_per_vtx) - 1);
TU( VAP_VF_CNTL__PRIM_TYPE(1) // point list
| VAP_VF_CNTL__PRIM_WALK(3)
| VAP_VF_CNTL__INDEX_SIZE(0)
| VAP_VF_CNTL__VTX_REUSE_DIS(0)
| VAP_VF_CNTL__DUAL_INDEX_MODE(0)
| VAP_VF_CNTL__USE_ALT_NUM_VERTS(0)
| VAP_VF_CNTL__NUM_VERTICES(vertex_count)
);
for (int i = 0; i < 2; i++) {
TF(center[i]);
}
}
mat4x4 perspective(float low1, float high1,
float low2, float high2,
float low3, float high3)
{
float scale2 = (high2 - low2) / (high1 - low1);
float scale3 = (high3 - low3) / (high1 - low1);
mat4x4 m1 = mat4x4(1, 0, 0, 0,
0, 1, 0, 0,
0, 0, 1, -low1,
0, 0, 0, 1
);
mat4x4 m2 = mat4x4(1, 0, 0, 0,
0, 1, 0, 0,
0, 0, scale2, low2,
0, 0, scale3, low3
);
return m2 * m1;
}
void _3d_cube_inner(mat4x4 trans,
mat4x4 world_trans,
vec4 light_pos)
{
T0V(VAP_PVS_STATE_FLUSH_REG, 0x00000000);
//////////////////////////////////////////////////////////////////////////////
// VAP_PVS
//////////////////////////////////////////////////////////////////////////////
const float consts[] = {
// 0
trans[0][0], trans[0][1], trans[0][2], trans[0][3],
trans[1][0], trans[1][1], trans[1][2], trans[1][3],
trans[2][0], trans[2][1], trans[2][2], trans[2][3],
trans[3][0], trans[3][1], trans[3][2], trans[3][3],
// 4
world_trans[0][0], world_trans[0][1], world_trans[0][2], world_trans[0][3],
world_trans[1][0], world_trans[1][1], world_trans[1][2], world_trans[1][3],
world_trans[2][0], world_trans[2][1], world_trans[2][2], world_trans[2][3],
world_trans[3][0], world_trans[3][1], world_trans[3][2], world_trans[3][3],
// 8
light_pos.x, light_pos.y, light_pos.z, light_pos.w,
};
ib_vap_pvs_const_cntl(consts, (sizeof (consts)));
//////////////////////////////////////////////////////////////////////////////
// AOS
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_PVS_STATE_FLUSH_REG, 0x00000000);
T3(_3D_LOAD_VBPNTR, (6 - 1));
TU( // VAP_VTX_NUM_ARRAYS
VAP_VTX_NUM_ARRAYS__VTX_NUM_ARRAYS(3)
| VAP_VTX_NUM_ARRAYS__VC_FORCE_PREFETCH(1)
);
TU( // VAP_VTX_AOS_ATTR01
VAP_VTX_AOS_ATTR__VTX_AOS_COUNT0(3)
| VAP_VTX_AOS_ATTR__VTX_AOS_STRIDE0(8)
| VAP_VTX_AOS_ATTR__VTX_AOS_COUNT1(2)
| VAP_VTX_AOS_ATTR__VTX_AOS_STRIDE1(8)
);
TU( // VAP_VTX_AOS_ADDR0
(4 * 0)
);
TU( // VAP_VTX_AOS_ADDR1
(4 * 3)
);
TU( // VAP_VTX_AOS_ATTR23
VAP_VTX_AOS_ATTR__VTX_AOS_COUNT0(3)
| VAP_VTX_AOS_ATTR__VTX_AOS_STRIDE0(8)
);
TU( // VAP_VTX_AOS_ADDR2
(4 * 5)
);
// VAP_VTX_AOS_ADDR is an absolute address in VRAM. However, DRM_RADEON_CS
// modifies this to be an offset relative to the GEM buffer handles given via
// NOP:
T3(_NOP, 0);
TU(VERTEXBUFFER_RELOC_INDEX * 4); // index into relocs array for VAP_VTX_AOS_ADDR0
T3(_NOP, 0);
TU(VERTEXBUFFER_RELOC_INDEX * 4); // index into relocs array for VAP_VTX_AOS_ADDR1
T3(_NOP, 0);
TU(VERTEXBUFFER_RELOC_INDEX * 4); // index into relocs array for VAP_VTX_AOS_ADDR2
//////////////////////////////////////////////////////////////////////////////
// 3D_DRAW
//////////////////////////////////////////////////////////////////////////////
const int vertex_count = cubesphere_Cube_triangles_length;
T3(_3D_DRAW_INDX_2, (1 - 1));
TU( VAP_VF_CNTL__PRIM_TYPE(4)
| VAP_VF_CNTL__PRIM_WALK(1) // indexes in command stream , vertices from memory
| VAP_VF_CNTL__INDEX_SIZE(1) // 32 bits per index
| VAP_VF_CNTL__VTX_REUSE_DIS(0)
| VAP_VF_CNTL__DUAL_INDEX_MODE(0)
| VAP_VF_CNTL__USE_ALT_NUM_VERTS(0)
| VAP_VF_CNTL__NUM_VERTICES(vertex_count)
);
T3(_INDX_BUFFER, (3 - 1));
TU( // INDX_BUFFER__0
INDX_BUFFER__0__ONE_REG_WR(1)
| INDX_BUFFER__0__SKIP_COUNT(0)
| INDX_BUFFER__0__DESTINATION(VAP_PORT_IDX0 >> 2)
);
TU( // INDX_BUFFER__1
INDX_BUFFER__1__BUFFER_BASE(0) // replaced by reloc
);
TU( // INDX_BUFFER__2
INDX_BUFFER__2__BUFFER_SIZE(vertex_count) // in dwords
);
T3(_NOP, 0);
TU(INDEXBUFFER_RELOC_INDEX * 4); // index into relocs array for INDX_BUFFER__1__BUFFER_BASE
}
void _3d_light_inner(mat4x4 trans)
{
T0V(VAP_PVS_STATE_FLUSH_REG, 0x00000000);
//////////////////////////////////////////////////////////////////////////////
// VAP_PVS
//////////////////////////////////////////////////////////////////////////////
const vec4 color = {1, 1, 0, 1};
const float consts[] = {
// 0
trans[0][0], trans[0][1], trans[0][2], trans[0][3],
trans[1][0], trans[1][1], trans[1][2], trans[1][3],
trans[2][0], trans[2][1], trans[2][2], trans[2][3],
trans[3][0], trans[3][1], trans[3][2], trans[3][3],
// 4
color[0], color[1], color[2], color[2],
};
ib_vap_pvs_const_cntl(consts, (sizeof (consts)));
//////////////////////////////////////////////////////////////////////////////
// 3D_DRAW
//////////////////////////////////////////////////////////////////////////////
const model * model = &cubesphere_model;
const object * obj = model->object[0];
const int triangle_count = obj->triangle_count;
const int vertex_count = triangle_count * 3;
int dwords_per_vtx = 3;
T0V(VAP_VTX_SIZE
, VAP_VTX_SIZE__DWORDS_PER_VTX(dwords_per_vtx)
);
T3(_3D_DRAW_IMMD_2, (1 + vertex_count * dwords_per_vtx) - 1);
TU( VAP_VF_CNTL__PRIM_TYPE(4)
| VAP_VF_CNTL__PRIM_WALK(3)
| VAP_VF_CNTL__INDEX_SIZE(0)
| VAP_VF_CNTL__VTX_REUSE_DIS(0)
| VAP_VF_CNTL__DUAL_INDEX_MODE(0)
| VAP_VF_CNTL__USE_ALT_NUM_VERTS(0)
| VAP_VF_CNTL__NUM_VERTICES(vertex_count)
);
for (int i = 0; i < triangle_count; i++) {
for (int j = 0; j < 3; j++) {
vec3 p = model->position[obj->triangle[i][j].position];
TF(p.x);
TF(p.y);
TF(p.z);
}
}
}
vec3 _3d_light(struct shaders& shaders,
const mat4x4& view_to_clip,
float theta)
{
ib_rs_instructions(1);
//////////////////////////////////////////////////////////////////////////////
// VAP OUT
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_OUT_VTX_FMT_0
, VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1));
T0V(VAP_OUT_VTX_FMT_1
, VAP_OUT_VTX_FMT_1__TEX_0_COMP_CNT(4));
//
ib_zbuffer(ZBUFFER_RELOC_INDEX, 1600, 1); // less
ib_texture__0();
ib_vap_stream_cntl__3();
// shaders
T0V(US_PIXSIZE
, US_PIXSIZE__PIX_SIZE(1)
);
ib_ga_us(&shaders.fragment[LIGHT_SHADER]);
ib_vap_pvs(&shaders.vertex[LIGHT_SHADER]);
//////////////////////////////////////////////////////////////////////////////
// VAP
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_CLIP_CNTL
, VAP_CLIP_CNTL__PS_UCP_MODE(3)
);
T0V(VAP_VTE_CNTL
, VAP_VTE_CNTL__VPORT_X_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_X_OFFSET_ENA(1)
| VAP_VTE_CNTL__VPORT_Y_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_Y_OFFSET_ENA(1)
| VAP_VTE_CNTL__VPORT_Z_SCALE_ENA(0)
| VAP_VTE_CNTL__VPORT_Z_OFFSET_ENA(0)
| VAP_VTE_CNTL__VTX_XY_FMT(0) // enable W division
| VAP_VTE_CNTL__VTX_Z_FMT(1) // disable W division
| VAP_VTE_CNTL__VTX_W0_FMT(1)
| VAP_VTE_CNTL__SERIAL_PROC_ENA(0)
);
T0V(VAP_CNTL_STATUS
, VAP_CNTL_STATUS__PVS_BYPASS(0)
);
//////////////////////////////////////////////////////////////////////////////
// VAP INDEX
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_INDEX_OFFSET, 0);
T0V(VAP_VF_MAX_VTX_INDX
, VAP_VF_MAX_VTX_INDX__MAX_INDX(0)
);
T0V(VAP_VF_MIN_VTX_INDX
, VAP_VF_MIN_VTX_INDX__MIN_INDX(0)
);
// light
mat4x4 t1 = translate(vec3(1, 0, 0));
mat4x4 s = scale(0.1f);
mat4x4 rz = rotate_y(theta * 2.f);
mat4x4 world_trans = rz * t1 * s;
mat4x4 trans = view_to_clip * world_trans;
_3d_light_inner(trans);
vec3 light_pos = world_trans * light_pos;
return light_pos;
}
void _3d_cube(struct shaders& shaders,
const mat4x4& view_to_clip,
float theta,
const vec3& light_pos)
{
ib_rs_instructions(4);
//////////////////////////////////////////////////////////////////////////////
// VAP OUT
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_OUT_VTX_FMT_0
, VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1));
T0V(VAP_OUT_VTX_FMT_1
, VAP_OUT_VTX_FMT_1__TEX_0_COMP_CNT(4)
| VAP_OUT_VTX_FMT_1__TEX_1_COMP_CNT(4)
| VAP_OUT_VTX_FMT_1__TEX_2_COMP_CNT(4)
| VAP_OUT_VTX_FMT_1__TEX_3_COMP_CNT(4));
//
ib_zbuffer(ZBUFFER_RELOC_INDEX, 1600, 1); // less
int width = 1024;
int height = 1024;
int macrotile = 0;
int microtile = 0;
int clamp = 0; // wrap/repeat
ib_texture__1(TEXTUREBUFFER_RELOC_INDEX,
width, height,
macrotile, microtile,
clamp);
ib_vap_stream_cntl__323();
// shaders
T0V(US_PIXSIZE
, US_PIXSIZE__PIX_SIZE(4)
);
ib_ga_us(&shaders.fragment[CUBESPHERE_SHADER]);
ib_vap_pvs(&shaders.vertex[CUBESPHERE_SHADER]);
//////////////////////////////////////////////////////////////////////////////
// VAP
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_CLIP_CNTL
, VAP_CLIP_CNTL__PS_UCP_MODE(3)
);
T0V(VAP_VTE_CNTL
, VAP_VTE_CNTL__VPORT_X_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_X_OFFSET_ENA(1)
| VAP_VTE_CNTL__VPORT_Y_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_Y_OFFSET_ENA(1)
| VAP_VTE_CNTL__VPORT_Z_SCALE_ENA(0)
| VAP_VTE_CNTL__VPORT_Z_OFFSET_ENA(0)
| VAP_VTE_CNTL__VTX_XY_FMT(0) // enable W division
| VAP_VTE_CNTL__VTX_Z_FMT(1) // disable W division
| VAP_VTE_CNTL__VTX_W0_FMT(1)
| VAP_VTE_CNTL__SERIAL_PROC_ENA(0)
);
T0V(VAP_CNTL_STATUS
, VAP_CNTL_STATUS__PVS_BYPASS(0)
);
//////////////////////////////////////////////////////////////////////////////
// VAP INDEX
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_INDEX_OFFSET, 0);
T0V(VAP_VF_MAX_VTX_INDX
, VAP_VF_MAX_VTX_INDX__MAX_INDX(124)
);
T0V(VAP_VF_MIN_VTX_INDX
, VAP_VF_MIN_VTX_INDX__MIN_INDX(0)
);
//////////////////////////////////////////////////////////////////////////////
// matrix
//////////////////////////////////////////////////////////////////////////////
// cube
mat4x4 rx = rotate_x(1 * theta * 0.5f);
mat4x4 ry = rotate_y(0 * theta * 0.8f + 1.4f);
mat4x4 s = scale(0.9f);
mat4x4 world_trans = rx * ry * s;
mat4x4 trans = view_to_clip * world_trans;
_3d_cube_inner(trans, world_trans, light_pos);
}
int indirect_buffer(shaders& shaders,
float theta)
{
int width = 1600;
int height = 1200;
int pitch = width;
ib_ix = 0;
ib_generic_initialization();
T0V(RB3D_BLENDCNTL, 0);
T0V(RB3D_ABLENDCNTL, 0);
ib_viewport(width, height);
ib_colorbuffer(COLORBUFFER_RELOC_INDEX, pitch, 0, 0);
T0V(GB_ENABLE, 0);
T0V(US_OUT_FMT_0
, US_OUT_FMT__OUT_FMT(0) // C4_8
| US_OUT_FMT__C0_SEL__BLUE
| US_OUT_FMT__C1_SEL__GREEN
| US_OUT_FMT__C2_SEL__RED
| US_OUT_FMT__C3_SEL__ALPHA
| US_OUT_FMT__OUT_SIGN(0)
);
T0V(US_OUT_FMT_1
, US_OUT_FMT__OUT_FMT(15) // render target is not used
);
T0V(US_OUT_FMT_2
, US_OUT_FMT__OUT_FMT(15) // render target is not used
);
T0V(US_OUT_FMT_2
, US_OUT_FMT__OUT_FMT(15) // render target is not used
);
load_pvs_shaders(shaders.vertex, shaders.vertex_length);
load_us_shaders(shaders.fragment, shaders.fragment_length);
//////////////////////////////////////////////////////////////////////////////
// DRAW
//////////////////////////////////////////////////////////////////////////////
mat4x4 aspect = scale(vec3(3.0f/4.0f, 1, 1));
mat4x4 p = perspective(0.01f, 5.0f,
0.001f, 0.999f,
0.5f, 2.0f);
mat4x4 t = translate(vec3(0, 0, 3));
mat4x4 view_to_clip = aspect * p * t;
_3d_clear(shaders);
vec3 light_pos = _3d_light(shaders, view_to_clip, theta);
_3d_cube(shaders, view_to_clip, theta, light_pos);
//////////////////////////////////////////////////////////////////////////////
// padding
//////////////////////////////////////////////////////////////////////////////
while ((ib_ix % 8) != 0) {
TU(0x80000000);
}
return ib_ix;
}
const char * textures[] = {
"../texture/butterfly_1024x1024_rgba8888.data",
};
const int textures_length = (sizeof (textures)) / (sizeof (textures[0]));
void transfer_vertex_index(void * vertexbuffer_ptr,
void * indexbuffer_ptr)
{
for (int i = 0; i < cubesphere_vertices_length; i++) {
((float *)vertexbuffer_ptr)[i] = cubesphere_vertices[i];
}
for (int i = 0; i < cubesphere_vertices_length; i++) {
((int *)indexbuffer_ptr)[i] = cubesphere_Cube_triangles[i];
}
asm volatile ("" ::: "memory");
}
int main()
{
struct shaders shaders = {
.vertex = load_shaders(vertex_shader_paths, vertex_shader_paths_length),
.fragment = load_shaders(fragment_shader_paths, fragment_shader_paths_length),
.vertex_length = vertex_shader_paths_length,
.fragment_length = fragment_shader_paths_length,
};
void * rmmio = map_pci_resource2();
int fd = open("/dev/dri/card0", O_RDWR | O_CLOEXEC);
assert(fd != -1);
const int colorbuffer_size = 1600 * 1200 * 4;
int colorbuffer_handle[2];
int zbuffer_handle;
int vertexbuffer_handle;
int * texturebuffer_handle;
int indexbuffer_handle;
void * vertexbuffer_ptr;
void * indexbuffer_ptr;
void * colorbuffer_ptr[2];
void * zbuffer_ptr;
// cubesphere_Cube_triangles_length, cubesphere_vertices_length
const int vertexbuffer_size = cubesphere_vertices_length * 4;
const int indexbuffer_size = cubesphere_Cube_triangles_length * 4;
// colorbuffer
colorbuffer_handle[0] = create_buffer(fd, colorbuffer_size, &colorbuffer_ptr[0]);
colorbuffer_handle[1] = create_buffer(fd, colorbuffer_size, &colorbuffer_ptr[1]);
zbuffer_handle = create_buffer(fd, colorbuffer_size, &zbuffer_ptr);
vertexbuffer_handle = create_buffer(fd, vertexbuffer_size, &vertexbuffer_ptr);
texturebuffer_handle = load_textures(fd, textures, textures_length);
indexbuffer_handle = create_buffer(fd, indexbuffer_size, &indexbuffer_ptr);
fprintf(stderr, "colorbuffer handle[0] %d\n", colorbuffer_handle[0]);
fprintf(stderr, "colorbuffer handle[1] %d\n", colorbuffer_handle[1]);
fprintf(stderr, "zbuffer handle %d\n", zbuffer_handle);
transfer_vertex_index(vertexbuffer_ptr, indexbuffer_ptr);
int colorbuffer_ix = 0;
float theta = 0;
while (true) {
int ib_dwords = indirect_buffer(shaders, theta);
int handles[] = {
colorbuffer_handle[colorbuffer_ix], // 0
zbuffer_handle, // 1
vertexbuffer_handle, // 2
texturebuffer_handle[0], // 3
indexbuffer_handle, // 4
};
int handles_length = (sizeof (handles)) / (sizeof (handles[0]));
int ret = drm_radeon_cs2(fd,
handles,
handles_length,
ib_dwords);
if (ret == -1)
break;
primary_surface_address(rmmio, colorbuffer_ix);
// next state
theta += 0.01f;
colorbuffer_ix = (colorbuffer_ix + 1) & 1;
}
close(fd);
}

View File

@ -0,0 +1,821 @@
#include <assert.h>
#include <errno.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <stdlib.h>
#include <math.h>
#include <fcntl.h>
#include <sys/mman.h>
#include "r500/3d_registers.h"
#include "r500/3d_registers_undocumented.h"
#include "r500/3d_registers_bits.h"
#include "r500/indirect_buffer.h"
#include "r500/shader.h"
#include "r500/display_controller.h"
#include "drm/buffer.h"
#include "drm/drm.h"
#include "math/float_types.hpp"
#include "math/transform.hpp"
#include "math/constants.hpp"
#include "../model/model2.h"
#include "../model/cubesphere.h"
#define CLEAR_SHADER 0
#define CUBESPHERE_SHADER 1
#define LIGHT_SHADER 2
#define TEXTURE_TILE_SHADER 3
const char * vertex_shader_paths[] = {
"clear.vs.bin",
"matrix_cubesphere.vs.bin",
"light.vs.bin",
"texture_tile.vs.bin",
};
const int vertex_shader_paths_length = (sizeof (vertex_shader_paths)) / (sizeof (vertex_shader_paths[0]));
const char * fragment_shader_paths[] = {
"clear.fs.bin",
"matrix_cubesphere.fs.bin",
"light.fs.bin",
"texture_tile.fs.bin",
};
const int fragment_shader_paths_length = (sizeof (fragment_shader_paths)) / (sizeof (fragment_shader_paths[0]));
struct shaders {
struct shader_offset * vertex;
struct shader_offset * fragment;
int vertex_length;
int fragment_length;
};
void _3d_clear(const shaders& shaders)
{
ib_rs_instructions(0);
//////////////////////////////////////////////////////////////////////////////
// VAP OUT
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_OUT_VTX_FMT_0
, VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1));
T0V(VAP_OUT_VTX_FMT_1
, 0);
//
ib_zbuffer(ZBUFFER_RELOC_INDEX, 1600, 7); // always
ib_texture__0();
ib_vap_stream_cntl__2();
// shaders
T0V(US_PIXSIZE
, US_PIXSIZE__PIX_SIZE(1)
);
ib_ga_us(&shaders.fragment[CLEAR_SHADER]);
ib_vap_pvs(&shaders.vertex[CLEAR_SHADER]);
//////////////////////////////////////////////////////////////////////////////
// VAP INDEX
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_INDEX_OFFSET, 0);
T0V(VAP_VF_MAX_VTX_INDX
, VAP_VF_MAX_VTX_INDX__MAX_INDX(0)
);
T0V(VAP_VF_MIN_VTX_INDX
, VAP_VF_MIN_VTX_INDX__MIN_INDX(0)
);
//////////////////////////////////////////////////////////////////////////////
// VAP
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_CLIP_CNTL
, VAP_CLIP_CNTL__CLIP_DISABLE(1)
);
T0V(VAP_VTE_CNTL
, VAP_VTE_CNTL__VTX_XY_FMT(1) // disable W division
| VAP_VTE_CNTL__VTX_Z_FMT(1) // disable W division
);
T0V(VAP_CNTL_STATUS
, VAP_CNTL_STATUS__PVS_BYPASS(0)
);
//////////////////////////////////////////////////////////////////////////////
// GA POINT SIZE
//////////////////////////////////////////////////////////////////////////////
T0V(GA_POINT_SIZE
, GA_POINT_SIZE__HEIGHT(600 * 12)
| GA_POINT_SIZE__WIDTH(800 * 12)
);
//////////////////////////////////////////////////////////////////////////////
// 3D_DRAW
//////////////////////////////////////////////////////////////////////////////
const int dwords_per_vtx = 2;
T0V(VAP_VTX_SIZE
, VAP_VTX_SIZE__DWORDS_PER_VTX(dwords_per_vtx)
);
const float center[] = {
800.0f, 600.0f,
};
const int vertex_count = 1;
T3(_3D_DRAW_IMMD_2, (1 + vertex_count * dwords_per_vtx) - 1);
TU( VAP_VF_CNTL__PRIM_TYPE(1) // point list
| VAP_VF_CNTL__PRIM_WALK(3)
| VAP_VF_CNTL__INDEX_SIZE(0)
| VAP_VF_CNTL__VTX_REUSE_DIS(0)
| VAP_VF_CNTL__DUAL_INDEX_MODE(0)
| VAP_VF_CNTL__USE_ALT_NUM_VERTS(0)
| VAP_VF_CNTL__NUM_VERTICES(vertex_count)
);
for (int i = 0; i < 2; i++) {
TF(center[i]);
}
}
mat4x4 perspective(float low1, float high1,
float low2, float high2,
float low3, float high3)
{
float scale2 = (high2 - low2) / (high1 - low1);
float scale3 = (high3 - low3) / (high1 - low1);
mat4x4 m1 = mat4x4(1, 0, 0, 0,
0, 1, 0, 0,
0, 0, 1, -low1,
0, 0, 0, 1
);
mat4x4 m2 = mat4x4(1, 0, 0, 0,
0, 1, 0, 0,
0, 0, scale2, low2,
0, 0, scale3, low3
);
return m2 * m1;
}
void _3d_cube_inner(mat4x4 trans,
mat4x4 world_trans,
vec4 light_pos)
{
T0V(VAP_PVS_STATE_FLUSH_REG, 0x00000000);
//////////////////////////////////////////////////////////////////////////////
// VAP_PVS
//////////////////////////////////////////////////////////////////////////////
const float consts[] = {
// 0
trans[0][0], trans[0][1], trans[0][2], trans[0][3],
trans[1][0], trans[1][1], trans[1][2], trans[1][3],
trans[2][0], trans[2][1], trans[2][2], trans[2][3],
trans[3][0], trans[3][1], trans[3][2], trans[3][3],
// 4
world_trans[0][0], world_trans[0][1], world_trans[0][2], world_trans[0][3],
world_trans[1][0], world_trans[1][1], world_trans[1][2], world_trans[1][3],
world_trans[2][0], world_trans[2][1], world_trans[2][2], world_trans[2][3],
world_trans[3][0], world_trans[3][1], world_trans[3][2], world_trans[3][3],
// 8
light_pos.x, light_pos.y, light_pos.z, light_pos.w,
};
ib_vap_pvs_const_cntl(consts, (sizeof (consts)));
//////////////////////////////////////////////////////////////////////////////
// 3D_DRAW
//////////////////////////////////////////////////////////////////////////////
const model * model = &cubesphere_model;
const object * obj = model->object[0];
const int triangle_count = obj->triangle_count;
const int vertex_count = triangle_count * 3;
const int dwords_per_vtx = 8;
T0V(VAP_VTX_SIZE
, VAP_VTX_SIZE__DWORDS_PER_VTX(dwords_per_vtx)
);
T3(_3D_DRAW_IMMD_2, (1 + vertex_count * dwords_per_vtx) - 1);
TU( VAP_VF_CNTL__PRIM_TYPE(4)
| VAP_VF_CNTL__PRIM_WALK(3)
| VAP_VF_CNTL__INDEX_SIZE(0)
| VAP_VF_CNTL__VTX_REUSE_DIS(0)
| VAP_VF_CNTL__DUAL_INDEX_MODE(0)
| VAP_VF_CNTL__USE_ALT_NUM_VERTS(0)
| VAP_VF_CNTL__NUM_VERTICES(vertex_count)
);
for (int i = 0; i < triangle_count; i++) {
for (int j = 0; j < 3; j++) {
vec3 p = model->position[obj->triangle[i][j].position];
vec2 t = model->texture[obj->triangle[i][j].texture];
vec3 n = model->normal[obj->triangle[i][j].normal];
TF(p.x);
TF(p.y);
TF(p.z);
TF(t.x);
TF(t.y);
TF(n.x);
TF(n.y);
TF(n.z);
}
}
}
void _3d_light_inner(mat4x4 trans)
{
T0V(VAP_PVS_STATE_FLUSH_REG, 0x00000000);
//////////////////////////////////////////////////////////////////////////////
// VAP_PVS
//////////////////////////////////////////////////////////////////////////////
const vec4 color = {1, 1, 0, 1};
const float consts[] = {
// 0
trans[0][0], trans[0][1], trans[0][2], trans[0][3],
trans[1][0], trans[1][1], trans[1][2], trans[1][3],
trans[2][0], trans[2][1], trans[2][2], trans[2][3],
trans[3][0], trans[3][1], trans[3][2], trans[3][3],
// 4
color[0], color[1], color[2], color[2],
};
ib_vap_pvs_const_cntl(consts, (sizeof (consts)));
//////////////////////////////////////////////////////////////////////////////
// 3D_DRAW
//////////////////////////////////////////////////////////////////////////////
const model * model = &cubesphere_model;
const object * obj = model->object[0];
const int triangle_count = obj->triangle_count;
const int vertex_count = triangle_count * 3;
int dwords_per_vtx = 3;
T0V(VAP_VTX_SIZE
, VAP_VTX_SIZE__DWORDS_PER_VTX(dwords_per_vtx)
);
T3(_3D_DRAW_IMMD_2, (1 + vertex_count * dwords_per_vtx) - 1);
TU( VAP_VF_CNTL__PRIM_TYPE(4)
| VAP_VF_CNTL__PRIM_WALK(3)
| VAP_VF_CNTL__INDEX_SIZE(0)
| VAP_VF_CNTL__VTX_REUSE_DIS(0)
| VAP_VF_CNTL__DUAL_INDEX_MODE(0)
| VAP_VF_CNTL__USE_ALT_NUM_VERTS(0)
| VAP_VF_CNTL__NUM_VERTICES(vertex_count)
);
for (int i = 0; i < triangle_count; i++) {
for (int j = 0; j < 3; j++) {
vec3 p = model->position[obj->triangle[i][j].position];
TF(p.x);
TF(p.y);
TF(p.z);
}
}
}
vec3 _3d_light(const shaders& shaders,
const mat4x4& view_to_clip,
float theta)
{
ib_rs_instructions(1);
//////////////////////////////////////////////////////////////////////////////
// VAP OUT
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_OUT_VTX_FMT_0
, VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1));
T0V(VAP_OUT_VTX_FMT_1
, VAP_OUT_VTX_FMT_1__TEX_0_COMP_CNT(4));
//
ib_zbuffer(ZBUFFER_RELOC_INDEX, 1600, 1); // less
ib_texture__0();
ib_vap_stream_cntl__3();
// shaders
T0V(US_PIXSIZE
, US_PIXSIZE__PIX_SIZE(1)
);
ib_ga_us(&shaders.fragment[LIGHT_SHADER]);
ib_vap_pvs(&shaders.vertex[LIGHT_SHADER]);
//////////////////////////////////////////////////////////////////////////////
// VAP
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_CLIP_CNTL
, VAP_CLIP_CNTL__PS_UCP_MODE(3)
);
T0V(VAP_VTE_CNTL
, VAP_VTE_CNTL__VPORT_X_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_X_OFFSET_ENA(1)
| VAP_VTE_CNTL__VPORT_Y_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_Y_OFFSET_ENA(1)
| VAP_VTE_CNTL__VPORT_Z_SCALE_ENA(0)
| VAP_VTE_CNTL__VPORT_Z_OFFSET_ENA(0)
| VAP_VTE_CNTL__VTX_XY_FMT(0) // enable W division
| VAP_VTE_CNTL__VTX_Z_FMT(1) // disable W division
| VAP_VTE_CNTL__VTX_W0_FMT(1)
| VAP_VTE_CNTL__SERIAL_PROC_ENA(0)
);
T0V(VAP_CNTL_STATUS
, VAP_CNTL_STATUS__PVS_BYPASS(0)
);
//////////////////////////////////////////////////////////////////////////////
// VAP INDEX
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_INDEX_OFFSET, 0);
T0V(VAP_VF_MAX_VTX_INDX
, VAP_VF_MAX_VTX_INDX__MAX_INDX(0)
);
T0V(VAP_VF_MIN_VTX_INDX
, VAP_VF_MIN_VTX_INDX__MIN_INDX(0)
);
// light
mat4x4 t1 = translate(vec3(1, 0, 0));
mat4x4 s = scale(0.1f);
mat4x4 rz = rotate_y(theta * 2.f);
mat4x4 world_trans = rz * t1 * s;
mat4x4 trans = view_to_clip * world_trans;
_3d_light_inner(trans);
vec3 light_pos = world_trans * light_pos;
return light_pos;
}
void _3d_cube(const shaders& shaders,
const mat4x4& view_to_clip,
float theta,
const vec3& light_pos)
{
ib_rs_instructions(4);
//////////////////////////////////////////////////////////////////////////////
// VAP OUT
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_OUT_VTX_FMT_0
, VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1));
T0V(VAP_OUT_VTX_FMT_1
, VAP_OUT_VTX_FMT_1__TEX_0_COMP_CNT(4)
| VAP_OUT_VTX_FMT_1__TEX_1_COMP_CNT(4)
| VAP_OUT_VTX_FMT_1__TEX_2_COMP_CNT(4)
| VAP_OUT_VTX_FMT_1__TEX_3_COMP_CNT(4));
//
ib_zbuffer(ZBUFFER_RELOC_INDEX, 1600, 1); // less
int width = 1024;
int height = 1024;
int macrotile = 1;
int microtile = 1;
int clamp = 0; // wrap/repeat
ib_texture__1(TEXTUREBUFFER_RELOC_INDEX,
width, height,
macrotile, microtile,
clamp);
ib_vap_stream_cntl__323();
// shaders
T0V(US_PIXSIZE
, US_PIXSIZE__PIX_SIZE(4)
);
ib_ga_us(&shaders.fragment[CUBESPHERE_SHADER]);
ib_vap_pvs(&shaders.vertex[CUBESPHERE_SHADER]);
//////////////////////////////////////////////////////////////////////////////
// VAP
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_CLIP_CNTL
, VAP_CLIP_CNTL__PS_UCP_MODE(3)
);
T0V(VAP_VTE_CNTL
, VAP_VTE_CNTL__VPORT_X_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_X_OFFSET_ENA(1)
| VAP_VTE_CNTL__VPORT_Y_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_Y_OFFSET_ENA(1)
| VAP_VTE_CNTL__VPORT_Z_SCALE_ENA(0)
| VAP_VTE_CNTL__VPORT_Z_OFFSET_ENA(0)
| VAP_VTE_CNTL__VTX_XY_FMT(0) // enable W division
| VAP_VTE_CNTL__VTX_Z_FMT(1) // disable W division
| VAP_VTE_CNTL__VTX_W0_FMT(1)
| VAP_VTE_CNTL__SERIAL_PROC_ENA(0)
);
T0V(VAP_CNTL_STATUS
, VAP_CNTL_STATUS__PVS_BYPASS(0)
);
//////////////////////////////////////////////////////////////////////////////
// VAP INDEX
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_INDEX_OFFSET, 0);
T0V(VAP_VF_MAX_VTX_INDX
, VAP_VF_MAX_VTX_INDX__MAX_INDX(0)
);
T0V(VAP_VF_MIN_VTX_INDX
, VAP_VF_MIN_VTX_INDX__MIN_INDX(0)
);
//////////////////////////////////////////////////////////////////////////////
// matrix
//////////////////////////////////////////////////////////////////////////////
// cube
mat4x4 rx = rotate_x(1 * theta * 0.5f);
mat4x4 ry = rotate_y(0 * theta * 0.8f + 1.4f);
mat4x4 s = scale(0.9f);
mat4x4 world_trans = rx * ry * s;
mat4x4 trans = view_to_clip * world_trans;
_3d_cube_inner(trans, world_trans, light_pos);
}
int indirect_buffer(const shaders& shaders,
float theta)
{
int width = 1600;
int height = 1200;
int pitch = width;
ib_ix = 0;
ib_generic_initialization();
T0V(RB3D_BLENDCNTL, 0);
T0V(RB3D_ABLENDCNTL, 0);
ib_viewport(width, height);
ib_colorbuffer(COLORBUFFER_RELOC_INDEX, pitch, 0, 0);
T0V(GB_ENABLE, 0);
T0V(US_OUT_FMT_0
, US_OUT_FMT__OUT_FMT(0) // C4_8
| US_OUT_FMT__C0_SEL__BLUE
| US_OUT_FMT__C1_SEL__GREEN
| US_OUT_FMT__C2_SEL__RED
| US_OUT_FMT__C3_SEL__ALPHA
| US_OUT_FMT__OUT_SIGN(0)
);
T0V(US_OUT_FMT_1
, US_OUT_FMT__OUT_FMT(15) // render target is not used
);
T0V(US_OUT_FMT_2
, US_OUT_FMT__OUT_FMT(15) // render target is not used
);
T0V(US_OUT_FMT_3
, US_OUT_FMT__OUT_FMT(15) // render target is not used
);
load_pvs_shaders(shaders.vertex, shaders.vertex_length);
load_us_shaders(shaders.fragment, shaders.fragment_length);
//////////////////////////////////////////////////////////////////////////////
// DRAW
//////////////////////////////////////////////////////////////////////////////
mat4x4 aspect = scale(vec3(3.0f/4.0f, 1, 1));
mat4x4 p = perspective(0.01f, 5.0f,
0.001f, 0.999f,
0.5f, 2.0f);
mat4x4 t = translate(vec3(0, 0, 3));
mat4x4 view_to_clip = aspect * p * t;
_3d_clear(shaders);
vec3 light_pos = _3d_light(shaders, view_to_clip, theta);
_3d_cube(shaders, view_to_clip, theta, light_pos);
//////////////////////////////////////////////////////////////////////////////
// padding
//////////////////////////////////////////////////////////////////////////////
while ((ib_ix % 8) != 0) {
TU(0x80000000);
}
return ib_ix;
}
int _tile_texture(const shaders& shaders,
int input_reloc_index,
int output_reloc_index)
{
int width = 1024;
int height = 1024;
int pitch = width;
float x = (float)width * 0.5f;
float y = (float)height * 0.5f;
ib_ix = 0;
ib_generic_initialization();
ib_viewport(width, height);
ib_colorbuffer(output_reloc_index, pitch, 1, 1); // macrotile, microtile
T0V(US_OUT_FMT_0
, US_OUT_FMT__OUT_FMT(0) // C4_8
| US_OUT_FMT__C0_SEL__RED
| US_OUT_FMT__C1_SEL__GREEN
| US_OUT_FMT__C2_SEL__BLUE
| US_OUT_FMT__C3_SEL__ALPHA
| US_OUT_FMT__OUT_SIGN(0)
);
T0V(US_OUT_FMT_1
, US_OUT_FMT__OUT_FMT(15) // render target is not used
);
T0V(US_OUT_FMT_2
, US_OUT_FMT__OUT_FMT(15) // render target is not used
);
T0V(US_OUT_FMT_3
, US_OUT_FMT__OUT_FMT(15) // render target is not used
);
// shaders
load_pvs_shaders(shaders.vertex, shaders.vertex_length);
load_us_shaders(shaders.fragment, shaders.fragment_length);
// GA
T0V(GB_ENABLE
, GB_ENABLE__POINT_STUFF_ENABLE(1)
| GB_ENABLE__TEX0_SOURCE(2) // stuff with source texture coordinates s,t
);
T0Vf(GA_POINT_S0, 0.0f);
T0Vf(GA_POINT_T0, 1.0f);
T0Vf(GA_POINT_S1, 1.0f);
T0Vf(GA_POINT_T1, 0.0f);
//////////////////////////////////////////////////////////////////////////////
// RS
//////////////////////////////////////////////////////////////////////////////
int rs_instructions = 1;
ib_rs_instructions(0);
T0V(RS_IP_0
, RS_IP__TEX_PTR_S(0)
| RS_IP__TEX_PTR_T(1)
| RS_IP__TEX_PTR_R(62) // constant 0.0
| RS_IP__TEX_PTR_Q(63) // constant 1.0
);
T0V(RS_COUNT
, RS_COUNT__IT_COUNT(2)
| RS_COUNT__IC_COUNT(0)
| RS_COUNT__W_ADDR(0)
| RS_COUNT__HIRES_EN(1)
);
T0V(RS_INST_COUNT
, RS_INST_COUNT__INST_COUNT(rs_instructions - 1));
T0V(RS_INST_0
, RS_INST__TEX_ID(0)
| RS_INST__TEX_CN(1)
| RS_INST__TEX_ADDR(0)
);
//////////////////////////////////////////////////////////////////////////////
// VAP OUT
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_OUT_VTX_FMT_0
, VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1)
);
T0V(VAP_OUT_VTX_FMT_1
, 0
);
//
T0V(ZB_CNTL, 0);
T0V(ZB_ZSTENCILCNTL, 0);
//
int macrotile = 0;
int microtile = 0;
int clamp = 2; // clamp to [0.0, 1.0]
ib_texture__1(input_reloc_index,
width, height,
macrotile, microtile,
clamp);
ib_vap_stream_cntl__2();
T0V(US_PIXSIZE
, US_PIXSIZE__PIX_SIZE(1)
);
ib_ga_us(&shaders.fragment[TEXTURE_TILE_SHADER]);
ib_vap_pvs(&shaders.vertex[TEXTURE_TILE_SHADER]);
//////////////////////////////////////////////////////////////////////////////
// VAP
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_CLIP_CNTL
, VAP_CLIP_CNTL__CLIP_DISABLE(1)
);
T0V(VAP_VTE_CNTL
, VAP_VTE_CNTL__VTX_XY_FMT(1) // disable W division
| VAP_VTE_CNTL__VTX_Z_FMT(1) // disable W division
);
T0V(VAP_CNTL_STATUS
, VAP_CNTL_STATUS__PVS_BYPASS(0)
);
//////////////////////////////////////////////////////////////////////////////
// GA POINT SIZE
//////////////////////////////////////////////////////////////////////////////
T0V(GA_POINT_SIZE
, GA_POINT_SIZE__HEIGHT((int)(y * 12.0f))
| GA_POINT_SIZE__WIDTH((int)(x * 12.0f))
);
//////////////////////////////////////////////////////////////////////////////
// 3D_DRAW
//////////////////////////////////////////////////////////////////////////////
const int dwords_per_vtx = 2;
T0V(VAP_VTX_SIZE
, VAP_VTX_SIZE__DWORDS_PER_VTX(dwords_per_vtx)
);
const float center[] = {
x, y,
};
const int vertex_count = 1;
T3(_3D_DRAW_IMMD_2, (1 + vertex_count * dwords_per_vtx) - 1);
TU( VAP_VF_CNTL__PRIM_TYPE(1) // point list
| VAP_VF_CNTL__PRIM_WALK(3)
| VAP_VF_CNTL__INDEX_SIZE(0)
| VAP_VF_CNTL__VTX_REUSE_DIS(0)
| VAP_VF_CNTL__DUAL_INDEX_MODE(0)
| VAP_VF_CNTL__USE_ALT_NUM_VERTS(0)
| VAP_VF_CNTL__NUM_VERTICES(vertex_count)
);
for (int i = 0; i < 2; i++) {
TF(center[i]);
}
return ib_ix;
}
const char * textures[] = {
"../texture/butterfly_1024x1024_rgba8888.data",
};
const int textures_length = (sizeof (textures)) / (sizeof (textures[0]));
int main()
{
struct shaders shaders = {
.vertex = load_shaders(vertex_shader_paths, vertex_shader_paths_length),
.fragment = load_shaders(fragment_shader_paths, fragment_shader_paths_length),
.vertex_length = vertex_shader_paths_length,
.fragment_length = fragment_shader_paths_length,
};
void * rmmio = map_pci_resource2();
int fd = open("/dev/dri/card0", O_RDWR | O_CLOEXEC);
assert(fd != -1);
const int colorbuffer_size = 1600 * 1200 * 4;
int colorbuffer_handle[2];
int zbuffer_handle;
int * texturebuffer_handle;
int flush_handle;
int test_handle;
void * colorbuffer_ptr[2];
void * zbuffer_ptr;
void * test_ptr;
// colorbuffer
colorbuffer_handle[0] = create_buffer(fd, colorbuffer_size, &colorbuffer_ptr[0]);
colorbuffer_handle[1] = create_buffer(fd, colorbuffer_size, &colorbuffer_ptr[1]);
test_handle = create_buffer(fd, 1600 * 1200 * 4, &test_ptr);
zbuffer_handle = create_buffer(fd, colorbuffer_size, &zbuffer_ptr);
flush_handle = create_flush_buffer(fd);
texturebuffer_handle = load_textures(fd, textures, textures_length);
fprintf(stderr, "colorbuffer handle[0] %d\n", colorbuffer_handle[0]);
fprintf(stderr, "colorbuffer handle[1] %d\n", colorbuffer_handle[1]);
fprintf(stderr, "zbuffer handle %d\n", zbuffer_handle);
fprintf(stderr, "test handle %d\n", test_handle);
int colorbuffer_ix = 0;
float theta = 0;
{
int ib_dwords = _tile_texture(shaders,
TEXTUREBUFFER_RELOC_INDEX, // input
COLORBUFFER_RELOC_INDEX); // output
//int ib_dwords = indirect_buffer(shaders, theta);
printf("here2\n");
drm_radeon_cs(fd,
test_handle, // colorbuffer
zbuffer_handle, // unused
flush_handle,
texturebuffer_handle,
textures_length,
ib_dwords);
}
while (true) {
int ib_dwords = indirect_buffer(shaders, theta);
int ret = drm_radeon_cs(fd,
colorbuffer_handle[colorbuffer_ix],
zbuffer_handle,
flush_handle,
//texturebuffer_handle,
//textures_length,
&test_handle,
1,
ib_dwords);
if (ret == -1)
break;
primary_surface_address(rmmio, colorbuffer_ix);
// next state
theta += 0.01f;
colorbuffer_ix = (colorbuffer_ix + 1) & 1;
}
{
printf("test.data\n");
int out_fd = open("test.data", O_RDWR|O_CREAT, 0644);
assert(out_fd >= 0);
ssize_t write_length = write(out_fd, test_ptr, 1024 * 1024 * 4);
assert(write_length == 1024 * 1024 * 4);
close(out_fd);
}
close(fd);
}

546
src/particle.cpp Normal file
View File

@ -0,0 +1,546 @@
#include <assert.h>
#include <errno.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <stdlib.h>
#include <math.h>
#include <fcntl.h>
#include <sys/mman.h>
#include "r500/3d_registers.h"
#include "r500/3d_registers_undocumented.h"
#include "r500/3d_registers_bits.h"
#include "r500/indirect_buffer.h"
#include "r500/shader.h"
#include "r500/display_controller.h"
#include "drm/buffer.h"
#include "drm/drm.h"
#include "math/float_types.hpp"
#include "math/transform.hpp"
#include "math/constants.hpp"
#include "../model/model2.h"
#include "../model/plane.h"
#define CLEAR_SHADER 0
#define PLANE_SHADER 1
#define PARTICLE_SHADER 2
#define TEXTURE_TILE_SHADER 3
const char * vertex_shader_paths[] = {
"clear.vs.bin",
"particle_plane.vs.bin",
"particle_particle.vs.bin",
"texture_tile.vs.bin",
};
const int vertex_shader_paths_length = (sizeof (vertex_shader_paths)) / (sizeof (vertex_shader_paths[0]));
const char * fragment_shader_paths[] = {
"clear.fs.bin",
"particle_plane.fs.bin",
"particle_particle.fs.bin",
"texture_tile.fs.bin",
};
const int fragment_shader_paths_length = (sizeof (fragment_shader_paths)) / (sizeof (fragment_shader_paths[0]));
#define PLANE_TEXTURE 0
#define PARTICLE_TEXTURE 1
const char * textures[] = {
"../texture/plane_32x32_rgba8888.data",
"../texture/particle_32x32_rgba8888.data",
};
const int textures_length = (sizeof (textures)) / (sizeof (textures[0]));
struct shaders {
struct shader_offset * vertex;
struct shader_offset * fragment;
int vertex_length;
int fragment_length;
};
void _3d_clear(struct shaders& shaders)
{
ib_rs_instructions(0);
//////////////////////////////////////////////////////////////////////////////
// VAP OUT
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_OUT_VTX_FMT_0
, VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1));
T0V(VAP_OUT_VTX_FMT_1
, 0);
//
ib_zbuffer(ZBUFFER_RELOC_INDEX, 1600, 7); // always
ib_texture__0();
ib_vap_stream_cntl__2();
// shaders
T0V(US_PIXSIZE
, US_PIXSIZE__PIX_SIZE(1)
);
ib_ga_us(&shaders.fragment[CLEAR_SHADER]);
ib_vap_pvs(&shaders.vertex[CLEAR_SHADER]);
//////////////////////////////////////////////////////////////////////////////
// VAP INDEX
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_INDEX_OFFSET, 0);
T0V(VAP_VF_MAX_VTX_INDX
, VAP_VF_MAX_VTX_INDX__MAX_INDX(0)
);
T0V(VAP_VF_MIN_VTX_INDX
, VAP_VF_MIN_VTX_INDX__MIN_INDX(0)
);
//////////////////////////////////////////////////////////////////////////////
// VAP
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_CLIP_CNTL
, VAP_CLIP_CNTL__CLIP_DISABLE(1)
);
T0V(VAP_VTE_CNTL
, VAP_VTE_CNTL__VTX_XY_FMT(1) // disable W division
| VAP_VTE_CNTL__VTX_Z_FMT(1) // disable W division
);
T0V(VAP_CNTL_STATUS
, VAP_CNTL_STATUS__PVS_BYPASS(0)
);
//////////////////////////////////////////////////////////////////////////////
// GA POINT SIZE
//////////////////////////////////////////////////////////////////////////////
T0V(GA_POINT_SIZE
, GA_POINT_SIZE__HEIGHT(600 * 12)
| GA_POINT_SIZE__WIDTH(800 * 12)
);
//////////////////////////////////////////////////////////////////////////////
// 3D_DRAW
//////////////////////////////////////////////////////////////////////////////
const int dwords_per_vtx = 2;
T0V(VAP_VTX_SIZE
, VAP_VTX_SIZE__DWORDS_PER_VTX(dwords_per_vtx)
);
const float center[] = {
800.0f, 600.0f,
};
const int vertex_count = 1;
T3(_3D_DRAW_IMMD_2, (1 + vertex_count * dwords_per_vtx) - 1);
TU( VAP_VF_CNTL__PRIM_TYPE(1) // point list
| VAP_VF_CNTL__PRIM_WALK(3)
| VAP_VF_CNTL__INDEX_SIZE(0)
| VAP_VF_CNTL__VTX_REUSE_DIS(0)
| VAP_VF_CNTL__DUAL_INDEX_MODE(0)
| VAP_VF_CNTL__USE_ALT_NUM_VERTS(0)
| VAP_VF_CNTL__NUM_VERTICES(vertex_count)
);
for (int i = 0; i < 2; i++) {
TF(center[i]);
}
}
mat4x4 perspective(float low1, float high1,
float low2, float high2,
float low3, float high3)
{
float scale2 = (high2 - low2) / (high1 - low1);
float scale3 = (high3 - low3) / (high1 - low1);
mat4x4 m1 = mat4x4(1, 0, 0, 0,
0, 1, 0, 0,
0, 0, 1, -low1,
0, 0, 0, 1
);
mat4x4 m2 = mat4x4(1, 0, 0, 0,
0, 1, 0, 0,
0, 0, scale2, low2,
0, 0, scale3, low3
);
return m2 * m1;
}
void _3d_plane_inner(mat4x4 trans)
{
T0V(VAP_PVS_STATE_FLUSH_REG, 0x00000000);
//////////////////////////////////////////////////////////////////////////////
// VAP_PVS
//////////////////////////////////////////////////////////////////////////////
const float consts[] = {
// 0
trans[0][0], trans[0][1], trans[0][2], trans[0][3],
trans[1][0], trans[1][1], trans[1][2], trans[1][3],
trans[2][0], trans[2][1], trans[2][2], trans[2][3],
trans[3][0], trans[3][1], trans[3][2], trans[3][3],
};
ib_vap_pvs_const_cntl(consts, (sizeof (consts)));
//////////////////////////////////////////////////////////////////////////////
// 3D_DRAW
//////////////////////////////////////////////////////////////////////////////
const model * model = &plane_model;
const object * obj = model->object[0];
const int triangle_count = obj->triangle_count;
const int vertex_count = triangle_count * 3;
const int dwords_per_vtx = 5;
T0V(VAP_VTX_SIZE
, VAP_VTX_SIZE__DWORDS_PER_VTX(dwords_per_vtx)
);
T3(_3D_DRAW_IMMD_2, (1 + vertex_count * dwords_per_vtx) - 1);
TU( VAP_VF_CNTL__PRIM_TYPE(4)
| VAP_VF_CNTL__PRIM_WALK(3)
| VAP_VF_CNTL__INDEX_SIZE(0)
| VAP_VF_CNTL__VTX_REUSE_DIS(0)
| VAP_VF_CNTL__DUAL_INDEX_MODE(0)
| VAP_VF_CNTL__USE_ALT_NUM_VERTS(0)
| VAP_VF_CNTL__NUM_VERTICES(vertex_count)
);
for (int i = 0; i < triangle_count; i++) {
for (int j = 0; j < 3; j++) {
vec3 p = model->position[obj->triangle[i][j].position];
vec2 t = model->texture[obj->triangle[i][j].texture];
TF(p.x);
TF(p.y);
TF(p.z);
TF(t.x);
TF(t.y);
}
}
}
void _3d_plane(struct shaders& shaders,
const mat4x4& world_to_clip,
float theta)
{
ib_rs_instructions(1);
//////////////////////////////////////////////////////////////////////////////
// VAP OUT
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_OUT_VTX_FMT_0
, VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1));
T0V(VAP_OUT_VTX_FMT_1
, VAP_OUT_VTX_FMT_1__TEX_0_COMP_CNT(4));
//
ib_zbuffer(ZBUFFER_RELOC_INDEX, 1600, 1); // less
int width = 32;
int height = 32;
int macrotile = 0;
int microtile = 0;
int clamp = 0; // wrap/repeat
ib_texture__1(TEXTUREBUFFER_RELOC_INDEX + PLANE_TEXTURE,
width, height,
macrotile, microtile,
clamp);
ib_vap_stream_cntl__32();
// shaders
T0V(US_PIXSIZE
, US_PIXSIZE__PIX_SIZE(4)
);
ib_ga_us(&shaders.fragment[PLANE_SHADER]);
ib_vap_pvs(&shaders.vertex[PLANE_SHADER]);
//////////////////////////////////////////////////////////////////////////////
// VAP
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_CLIP_CNTL
, VAP_CLIP_CNTL__PS_UCP_MODE(3)
);
T0V(VAP_VTE_CNTL
, VAP_VTE_CNTL__VPORT_X_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_X_OFFSET_ENA(1)
| VAP_VTE_CNTL__VPORT_Y_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_Y_OFFSET_ENA(1)
| VAP_VTE_CNTL__VPORT_Z_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_Z_OFFSET_ENA(1)
| VAP_VTE_CNTL__VTX_XY_FMT(0) // enable W division
| VAP_VTE_CNTL__VTX_Z_FMT(0) // enable W division
| VAP_VTE_CNTL__VTX_W0_FMT(1)
| VAP_VTE_CNTL__SERIAL_PROC_ENA(0)
);
T0V(VAP_CNTL_STATUS
, VAP_CNTL_STATUS__PVS_BYPASS(0)
);
//////////////////////////////////////////////////////////////////////////////
// matrix
//////////////////////////////////////////////////////////////////////////////
mat4x4 s = scale(1.0f);
mat4x4 rx = rotate_x(-PI / 2.0f);
mat4x4 local_to_world = s * rx;
mat4x4 trans = world_to_clip * local_to_world;
_3d_plane_inner(trans);
}
void _3d_particle(struct shaders& shaders,
const mat4x4& world_to_clip,
float theta)
{
// enable blending
T0V(RB3D_BLENDCNTL
, RB3D_BLENDCNTL__ALPHA_BLEND_ENABLE__ENABLE
| RB3D_BLENDCNTL__READ_ENABLE(1)
| RB3D_BLENDCNTL__SRCBLEND__GL_ONE
| RB3D_BLENDCNTL__DESTBLEND__GL_ONE
| RB3D_BLENDCNTL__SRC_ALPHA_0_NO_READ(0)
| RB3D_BLENDCNTL__SRC_ALPHA_1_NO_READ(0)
);
ib_rs_instructions(1);
//////////////////////////////////////////////////////////////////////////////
// VAP OUT
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_OUT_VTX_FMT_0
, VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1));
T0V(VAP_OUT_VTX_FMT_1
, VAP_OUT_VTX_FMT_1__TEX_0_COMP_CNT(4));
//
ib_zbuffer(ZBUFFER_RELOC_INDEX, 1600, 1); // less
int width = 32;
int height = 32;
int macrotile = 0;
int microtile = 0;
int clamp = 0; // wrap/repeat
ib_texture__1(TEXTUREBUFFER_RELOC_INDEX + PARTICLE_TEXTURE,
width, height,
macrotile, microtile,
clamp);
ib_vap_stream_cntl__32();
// shaders
T0V(US_PIXSIZE
, US_PIXSIZE__PIX_SIZE(4)
);
ib_ga_us(&shaders.fragment[PLANE_SHADER]);
ib_vap_pvs(&shaders.vertex[PLANE_SHADER]);
//////////////////////////////////////////////////////////////////////////////
// VAP
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_CLIP_CNTL
, VAP_CLIP_CNTL__PS_UCP_MODE(3)
);
T0V(VAP_VTE_CNTL
, VAP_VTE_CNTL__VPORT_X_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_X_OFFSET_ENA(1)
| VAP_VTE_CNTL__VPORT_Y_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_Y_OFFSET_ENA(1)
| VAP_VTE_CNTL__VPORT_Z_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_Z_OFFSET_ENA(1)
| VAP_VTE_CNTL__VTX_XY_FMT(0) // enable W division
| VAP_VTE_CNTL__VTX_Z_FMT(0) // enable W division
| VAP_VTE_CNTL__VTX_W0_FMT(1)
| VAP_VTE_CNTL__SERIAL_PROC_ENA(0)
);
T0V(VAP_CNTL_STATUS
, VAP_CNTL_STATUS__PVS_BYPASS(0)
);
//////////////////////////////////////////////////////////////////////////////
// matrix
//////////////////////////////////////////////////////////////////////////////
mat4x4 s = scale(1.0f);
mat4x4 local_to_world = s;
mat4x4 trans = world_to_clip * local_to_world;
_3d_plane_inner(trans);
}
int indirect_buffer(shaders& shaders,
float theta)
{
int width = 1600;
int height = 1200;
int pitch = width;
ib_ix = 0;
ib_generic_initialization();
T0V(RB3D_BLENDCNTL, 0);
T0V(RB3D_ABLENDCNTL, 0);
ib_viewport(width, height);
ib_colorbuffer(COLORBUFFER_RELOC_INDEX, pitch, 0, 0);
T0V(GB_ENABLE, 0);
T0V(US_OUT_FMT_0
, US_OUT_FMT__OUT_FMT(0) // C4_8
| US_OUT_FMT__C0_SEL__BLUE
| US_OUT_FMT__C1_SEL__GREEN
| US_OUT_FMT__C2_SEL__RED
| US_OUT_FMT__C3_SEL__ALPHA
| US_OUT_FMT__OUT_SIGN(0)
);
T0V(US_OUT_FMT_1
, US_OUT_FMT__OUT_FMT(15) // render target is not used
);
T0V(US_OUT_FMT_2
, US_OUT_FMT__OUT_FMT(15) // render target is not used
);
T0V(US_OUT_FMT_2
, US_OUT_FMT__OUT_FMT(15) // render target is not used
);
load_pvs_shaders(shaders.vertex, shaders.vertex_length);
load_us_shaders(shaders.fragment, shaders.fragment_length);
//////////////////////////////////////////////////////////////////////////////
// DRAW
//////////////////////////////////////////////////////////////////////////////
mat4x4 aspect = scale(vec3(3.0f/4.0f, 1, 1));
mat4x4 p = perspective(0.01f, 3.0f,
0.001f, 0.999f,
1.0f, 3.0f);
mat4x4 t = translate(vec3(0, 0, 1));
mat4x4 rx = rotate_x(-PI / 8.0f);
mat4x4 ry = rotate_y(theta * 0.8f);
mat4x4 world_to_view = t * rx * ry;
mat4x4 world_to_clip = aspect * p * world_to_view;
_3d_clear(shaders);
_3d_plane(shaders, world_to_clip, theta);
_3d_particle(shaders, world_to_clip, theta);
//_3d_zbuffer(shaders);
//////////////////////////////////////////////////////////////////////////////
// padding
//////////////////////////////////////////////////////////////////////////////
while ((ib_ix % 8) != 0) {
TU(0x80000000);
}
return ib_ix;
}
int main()
{
struct shaders shaders = {
.vertex = load_shaders(vertex_shader_paths, vertex_shader_paths_length),
.fragment = load_shaders(fragment_shader_paths, fragment_shader_paths_length),
.vertex_length = vertex_shader_paths_length,
.fragment_length = fragment_shader_paths_length,
};
void * rmmio = map_pci_resource2();
int fd = open("/dev/dri/card0", O_RDWR | O_CLOEXEC);
assert(fd != -1);
const int colorbuffer_size = 1600 * 1200 * 4;
int colorbuffer_handle[2];
int zbuffer_handle;
int * texturebuffer_handle;
int flush_handle;
void * colorbuffer_ptr[2];
void * zbuffer_ptr;
// colorbuffer
colorbuffer_handle[0] = create_buffer(fd, colorbuffer_size, &colorbuffer_ptr[0]);
colorbuffer_handle[1] = create_buffer(fd, colorbuffer_size, &colorbuffer_ptr[1]);
zbuffer_handle = create_buffer(fd, colorbuffer_size, &zbuffer_ptr);
flush_handle = create_flush_buffer(fd);
texturebuffer_handle = load_textures(fd, textures, textures_length);
fprintf(stderr, "colorbuffer handle[0] %d\n", colorbuffer_handle[0]);
fprintf(stderr, "colorbuffer handle[1] %d\n", colorbuffer_handle[1]);
fprintf(stderr, "zbuffer handle %d\n", zbuffer_handle);
int colorbuffer_ix = 0;
float theta = PI * 0.5;
while (true) {
int ib_dwords = indirect_buffer(shaders, theta);
drm_radeon_cs(fd,
colorbuffer_handle[colorbuffer_ix],
zbuffer_handle,
flush_handle,
texturebuffer_handle,
textures_length,
ib_dwords);
primary_surface_address(rmmio, colorbuffer_ix);
// next state
theta += 0.01f;
colorbuffer_ix = (colorbuffer_ix + 1) & 1;
break;
}
{
printf("colorbuffer0.data\n");
int out_fd = open("colorbuffer0.data", O_RDWR|O_CREAT, 0644);
assert(out_fd >= 0);
ssize_t write_length = write(out_fd, colorbuffer_ptr[0], colorbuffer_size);
assert(write_length == colorbuffer_size);
close(out_fd);
}
{
printf("zbuffer.data\n");
int out_fd = open("zbuffer.data", O_RDWR|O_CREAT, 0644);
assert(out_fd >= 0);
ssize_t write_length = write(out_fd, zbuffer_ptr, colorbuffer_size);
assert(write_length == colorbuffer_size);
close(out_fd);
}
close(fd);
}

574
src/particle_oriented.cpp Normal file
View File

@ -0,0 +1,574 @@
#include <assert.h>
#include <errno.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <stdlib.h>
#include <math.h>
#include <fcntl.h>
#include <sys/mman.h>
#include "r500/3d_registers.h"
#include "r500/3d_registers_undocumented.h"
#include "r500/3d_registers_bits.h"
#include "r500/indirect_buffer.h"
#include "r500/shader.h"
#include "r500/display_controller.h"
#include "drm/buffer.h"
#include "drm/drm.h"
#include "math/float_types.hpp"
#include "math/transform.hpp"
#include "math/constants.hpp"
#include "../model/model2.h"
#include "../model/plane.h"
#define CLEAR_SHADER 0
#define PLANE_SHADER 1
#define PARTICLE_SHADER 2
#define TEXTURE_TILE_SHADER 3
const char * vertex_shader_paths[] = {
"clear.vs.bin",
"particle_plane.vs.bin",
"particle_particle.vs.bin",
"texture_tile.vs.bin",
};
const int vertex_shader_paths_length = (sizeof (vertex_shader_paths)) / (sizeof (vertex_shader_paths[0]));
const char * fragment_shader_paths[] = {
"clear.fs.bin",
"particle_plane.fs.bin",
"particle_particle.fs.bin",
"texture_tile.fs.bin",
};
const int fragment_shader_paths_length = (sizeof (fragment_shader_paths)) / (sizeof (fragment_shader_paths[0]));
#define PLANE_TEXTURE 0
#define PARTICLE_TEXTURE 1
const char * textures[] = {
"../texture/plane_32x32_rgba8888.data",
"../texture/particle_32x32_rgba8888.data",
};
const int textures_length = (sizeof (textures)) / (sizeof (textures[0]));
struct shaders {
struct shader_offset * vertex;
struct shader_offset * fragment;
int vertex_length;
int fragment_length;
};
void _3d_clear(struct shaders& shaders)
{
ib_rs_instructions(0);
//////////////////////////////////////////////////////////////////////////////
// VAP OUT
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_OUT_VTX_FMT_0
, VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1));
T0V(VAP_OUT_VTX_FMT_1
, 0);
//
ib_zbuffer(ZBUFFER_RELOC_INDEX, 1600, 7); // always
ib_texture__0();
ib_vap_stream_cntl__2();
// shaders
T0V(US_PIXSIZE
, US_PIXSIZE__PIX_SIZE(1)
);
ib_ga_us(&shaders.fragment[CLEAR_SHADER]);
ib_vap_pvs(&shaders.vertex[CLEAR_SHADER]);
//////////////////////////////////////////////////////////////////////////////
// VAP INDEX
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_INDEX_OFFSET, 0);
T0V(VAP_VF_MAX_VTX_INDX
, VAP_VF_MAX_VTX_INDX__MAX_INDX(0)
);
T0V(VAP_VF_MIN_VTX_INDX
, VAP_VF_MIN_VTX_INDX__MIN_INDX(0)
);
//////////////////////////////////////////////////////////////////////////////
// VAP
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_CLIP_CNTL
, VAP_CLIP_CNTL__CLIP_DISABLE(1)
);
T0V(VAP_VTE_CNTL
, VAP_VTE_CNTL__VTX_XY_FMT(1) // disable W division
| VAP_VTE_CNTL__VTX_Z_FMT(1) // disable W division
);
T0V(VAP_CNTL_STATUS
, VAP_CNTL_STATUS__PVS_BYPASS(0)
);
//////////////////////////////////////////////////////////////////////////////
// GA POINT SIZE
//////////////////////////////////////////////////////////////////////////////
T0V(GA_POINT_SIZE
, GA_POINT_SIZE__HEIGHT(600 * 12)
| GA_POINT_SIZE__WIDTH(800 * 12)
);
//////////////////////////////////////////////////////////////////////////////
// 3D_DRAW
//////////////////////////////////////////////////////////////////////////////
const int dwords_per_vtx = 2;
T0V(VAP_VTX_SIZE
, VAP_VTX_SIZE__DWORDS_PER_VTX(dwords_per_vtx)
);
const float center[] = {
800.0f, 600.0f,
};
const int vertex_count = 1;
T3(_3D_DRAW_IMMD_2, (1 + vertex_count * dwords_per_vtx) - 1);
TU( VAP_VF_CNTL__PRIM_TYPE(1) // point list
| VAP_VF_CNTL__PRIM_WALK(3)
| VAP_VF_CNTL__INDEX_SIZE(0)
| VAP_VF_CNTL__VTX_REUSE_DIS(0)
| VAP_VF_CNTL__DUAL_INDEX_MODE(0)
| VAP_VF_CNTL__USE_ALT_NUM_VERTS(0)
| VAP_VF_CNTL__NUM_VERTICES(vertex_count)
);
for (int i = 0; i < 2; i++) {
TF(center[i]);
}
}
mat4x4 perspective(float low1, float high1,
float low2, float high2,
float low3, float high3)
{
float scale2 = (high2 - low2) / (high1 - low1);
float scale3 = (high3 - low3) / (high1 - low1);
mat4x4 m1 = mat4x4(1, 0, 0, 0,
0, 1, 0, 0,
0, 0, 1, -low1,
0, 0, 0, 1
);
mat4x4 m2 = mat4x4(1, 0, 0, 0,
0, 1, 0, 0,
0, 0, scale2, low2,
0, 0, scale3, low3
);
return m2 * m1;
}
void _3d_plane_inner(mat4x4 trans)
{
//////////////////////////////////////////////////////////////////////////////
// 3D_DRAW
//////////////////////////////////////////////////////////////////////////////
const model * model = &plane_model;
const object * obj = model->object[0];
const int triangle_count = obj->triangle_count;
const int vertex_count = triangle_count * 3;
const int dwords_per_vtx = 5;
T0V(VAP_VTX_SIZE
, VAP_VTX_SIZE__DWORDS_PER_VTX(dwords_per_vtx)
);
T3(_3D_DRAW_IMMD_2, (1 + vertex_count * dwords_per_vtx) - 1);
TU( VAP_VF_CNTL__PRIM_TYPE(4)
| VAP_VF_CNTL__PRIM_WALK(3)
| VAP_VF_CNTL__INDEX_SIZE(0)
| VAP_VF_CNTL__VTX_REUSE_DIS(0)
| VAP_VF_CNTL__DUAL_INDEX_MODE(0)
| VAP_VF_CNTL__USE_ALT_NUM_VERTS(0)
| VAP_VF_CNTL__NUM_VERTICES(vertex_count)
);
for (int i = 0; i < triangle_count; i++) {
for (int j = 0; j < 3; j++) {
vec3 p = model->position[obj->triangle[i][j].position];
vec2 t = model->texture[obj->triangle[i][j].texture];
TF(p.x);
TF(p.y);
TF(p.z);
TF(t.x);
TF(t.y);
}
}
}
void _3d_plane(struct shaders& shaders,
const mat4x4& world_to_clip,
float theta)
{
ib_rs_instructions(1);
//////////////////////////////////////////////////////////////////////////////
// VAP OUT
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_OUT_VTX_FMT_0
, VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1));
T0V(VAP_OUT_VTX_FMT_1
, VAP_OUT_VTX_FMT_1__TEX_0_COMP_CNT(4));
//
ib_zbuffer(ZBUFFER_RELOC_INDEX, 1600, 1); // less
int width = 32;
int height = 32;
int macrotile = 0;
int microtile = 0;
int clamp = 0; // wrap/repeat
ib_texture__1(TEXTUREBUFFER_RELOC_INDEX + PLANE_TEXTURE,
width, height,
macrotile, microtile,
clamp);
ib_vap_stream_cntl__32();
// shaders
T0V(US_PIXSIZE
, US_PIXSIZE__PIX_SIZE(4)
);
ib_ga_us(&shaders.fragment[PLANE_SHADER]);
ib_vap_pvs(&shaders.vertex[PLANE_SHADER]);
//////////////////////////////////////////////////////////////////////////////
// VAP
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_CLIP_CNTL
, VAP_CLIP_CNTL__PS_UCP_MODE(3)
);
T0V(VAP_VTE_CNTL
, VAP_VTE_CNTL__VPORT_X_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_X_OFFSET_ENA(1)
| VAP_VTE_CNTL__VPORT_Y_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_Y_OFFSET_ENA(1)
| VAP_VTE_CNTL__VPORT_Z_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_Z_OFFSET_ENA(1)
| VAP_VTE_CNTL__VTX_XY_FMT(0) // enable W division
| VAP_VTE_CNTL__VTX_Z_FMT(0) // enable W division
| VAP_VTE_CNTL__VTX_W0_FMT(1)
| VAP_VTE_CNTL__SERIAL_PROC_ENA(0)
);
T0V(VAP_CNTL_STATUS
, VAP_CNTL_STATUS__PVS_BYPASS(0)
);
//////////////////////////////////////////////////////////////////////////////
// matrix
//////////////////////////////////////////////////////////////////////////////
mat4x4 s = scale(1.0f);
mat4x4 rx = rotate_x(-PI / 2.0f);
mat4x4 local_to_world = s * rx;
mat4x4 trans = world_to_clip * local_to_world;
T0V(VAP_PVS_STATE_FLUSH_REG, 0x00000000);
//////////////////////////////////////////////////////////////////////////////
// consts
//////////////////////////////////////////////////////////////////////////////
const float consts[] = {
// 0
trans[0][0], trans[0][1], trans[0][2], trans[0][3],
trans[1][0], trans[1][1], trans[1][2], trans[1][3],
trans[2][0], trans[2][1], trans[2][2], trans[2][3],
trans[3][0], trans[3][1], trans[3][2], trans[3][3],
};
ib_vap_pvs_const_cntl(consts, (sizeof (consts)));
// plane_inner
_3d_plane_inner(trans);
}
void _3d_particle(struct shaders& shaders,
const mat4x4& world_to_clip,
const mat4x4& world_to_view,
float theta)
{
// enable blending
T0V(RB3D_BLENDCNTL
, RB3D_BLENDCNTL__ALPHA_BLEND_ENABLE__ENABLE
| RB3D_BLENDCNTL__READ_ENABLE(1)
| RB3D_BLENDCNTL__SRCBLEND__GL_ONE
| RB3D_BLENDCNTL__DESTBLEND__GL_ONE
| RB3D_BLENDCNTL__SRC_ALPHA_0_NO_READ(0)
| RB3D_BLENDCNTL__SRC_ALPHA_1_NO_READ(0)
);
ib_rs_instructions(1);
//////////////////////////////////////////////////////////////////////////////
// VAP OUT
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_OUT_VTX_FMT_0
, VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1));
T0V(VAP_OUT_VTX_FMT_1
, VAP_OUT_VTX_FMT_1__TEX_0_COMP_CNT(4));
//
ib_zbuffer(ZBUFFER_RELOC_INDEX, 1600, 1); // less
int width = 32;
int height = 32;
int macrotile = 0;
int microtile = 0;
int clamp = 0; // wrap/repeat
ib_texture__1(TEXTUREBUFFER_RELOC_INDEX + PARTICLE_TEXTURE,
width, height,
macrotile, microtile,
clamp);
ib_vap_stream_cntl__32();
// shaders
T0V(US_PIXSIZE
, US_PIXSIZE__PIX_SIZE(4)
);
ib_ga_us(&shaders.fragment[PARTICLE_SHADER]);
ib_vap_pvs(&shaders.vertex[PARTICLE_SHADER]);
//////////////////////////////////////////////////////////////////////////////
// VAP
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_CLIP_CNTL
, VAP_CLIP_CNTL__PS_UCP_MODE(3)
);
T0V(VAP_VTE_CNTL
, VAP_VTE_CNTL__VPORT_X_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_X_OFFSET_ENA(1)
| VAP_VTE_CNTL__VPORT_Y_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_Y_OFFSET_ENA(1)
| VAP_VTE_CNTL__VPORT_Z_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_Z_OFFSET_ENA(1)
| VAP_VTE_CNTL__VTX_XY_FMT(0) // enable W division
| VAP_VTE_CNTL__VTX_Z_FMT(0) // enable W division
| VAP_VTE_CNTL__VTX_W0_FMT(1)
| VAP_VTE_CNTL__SERIAL_PROC_ENA(0)
);
T0V(VAP_CNTL_STATUS
, VAP_CNTL_STATUS__PVS_BYPASS(0)
);
//////////////////////////////////////////////////////////////////////////////
// matrix
//////////////////////////////////////////////////////////////////////////////
mat4x4 s = scale(1.0f);
mat4x4 local_to_world = s;
mat4x4 local_to_view = world_to_view * local_to_world;
mat4x4 trans = world_to_clip * local_to_world;
//////////////////////////////////////////////////////////////////////////////
// consts
//////////////////////////////////////////////////////////////////////////////
const float consts[] = {
// 0
trans[0][0], trans[0][1], trans[0][2], trans[0][3],
trans[1][0], trans[1][1], trans[1][2], trans[1][3],
trans[2][0], trans[2][1], trans[2][2], trans[2][3],
trans[3][0], trans[3][1], trans[3][2], trans[3][3],
// 4: particle_position
0, 0, 0, 0,
// 5: dx (right)
local_to_view[0][0], local_to_view[0][1], local_to_view[0][2], 0,
// 6: dy (up)
local_to_view[1][0], local_to_view[1][1], local_to_view[1][2], 0,
};
ib_vap_pvs_const_cntl(consts, (sizeof (consts)));
// plane_inner
_3d_plane_inner(trans);
}
int indirect_buffer(shaders& shaders,
float theta)
{
int width = 1600;
int height = 1200;
int pitch = width;
ib_ix = 0;
ib_generic_initialization();
T0V(RB3D_BLENDCNTL, 0);
T0V(RB3D_ABLENDCNTL, 0);
ib_viewport(width, height);
ib_colorbuffer(COLORBUFFER_RELOC_INDEX, pitch, 0, 0);
T0V(GB_ENABLE, 0);
T0V(US_OUT_FMT_0
, US_OUT_FMT__OUT_FMT(0) // C4_8
| US_OUT_FMT__C0_SEL__BLUE
| US_OUT_FMT__C1_SEL__GREEN
| US_OUT_FMT__C2_SEL__RED
| US_OUT_FMT__C3_SEL__ALPHA
| US_OUT_FMT__OUT_SIGN(0)
);
T0V(US_OUT_FMT_1
, US_OUT_FMT__OUT_FMT(15) // render target is not used
);
T0V(US_OUT_FMT_2
, US_OUT_FMT__OUT_FMT(15) // render target is not used
);
T0V(US_OUT_FMT_2
, US_OUT_FMT__OUT_FMT(15) // render target is not used
);
load_pvs_shaders(shaders.vertex, shaders.vertex_length);
load_us_shaders(shaders.fragment, shaders.fragment_length);
//////////////////////////////////////////////////////////////////////////////
// DRAW
//////////////////////////////////////////////////////////////////////////////
mat4x4 aspect = scale(vec3(3.0f/4.0f, 1, 1));
mat4x4 p = perspective(0.01f, 3.0f,
0.001f, 0.999f,
1.0f, 3.0f);
mat4x4 t = translate(vec3(0, 0, 1));
mat4x4 rx = rotate_x(-PI / 8.0f);
mat4x4 ry = rotate_y(theta * 0.8f);
mat4x4 world_to_view = t * rx * ry;
mat4x4 world_to_clip = aspect * p * world_to_view;
_3d_clear(shaders);
_3d_plane(shaders, world_to_clip, theta);
_3d_particle(shaders, world_to_clip, world_to_view, theta);
//_3d_zbuffer(shaders);
//////////////////////////////////////////////////////////////////////////////
// padding
//////////////////////////////////////////////////////////////////////////////
while ((ib_ix % 8) != 0) {
TU(0x80000000);
}
return ib_ix;
}
int main()
{
struct shaders shaders = {
.vertex = load_shaders(vertex_shader_paths, vertex_shader_paths_length),
.fragment = load_shaders(fragment_shader_paths, fragment_shader_paths_length),
.vertex_length = vertex_shader_paths_length,
.fragment_length = fragment_shader_paths_length,
};
void * rmmio = map_pci_resource2();
int fd = open("/dev/dri/card0", O_RDWR | O_CLOEXEC);
assert(fd != -1);
const int colorbuffer_size = 1600 * 1200 * 4;
int colorbuffer_handle[2];
int zbuffer_handle;
int * texturebuffer_handle;
int flush_handle;
void * colorbuffer_ptr[2];
void * zbuffer_ptr;
// colorbuffer
colorbuffer_handle[0] = create_buffer(fd, colorbuffer_size, &colorbuffer_ptr[0]);
colorbuffer_handle[1] = create_buffer(fd, colorbuffer_size, &colorbuffer_ptr[1]);
zbuffer_handle = create_buffer(fd, colorbuffer_size, &zbuffer_ptr);
flush_handle = create_flush_buffer(fd);
texturebuffer_handle = load_textures(fd, textures, textures_length);
fprintf(stderr, "colorbuffer handle[0] %d\n", colorbuffer_handle[0]);
fprintf(stderr, "colorbuffer handle[1] %d\n", colorbuffer_handle[1]);
fprintf(stderr, "zbuffer handle %d\n", zbuffer_handle);
int colorbuffer_ix = 0;
float theta = PI * 0.5;
while (true) {
int ib_dwords = indirect_buffer(shaders, theta);
drm_radeon_cs(fd,
colorbuffer_handle[colorbuffer_ix],
zbuffer_handle,
flush_handle,
texturebuffer_handle,
textures_length,
ib_dwords);
primary_surface_address(rmmio, colorbuffer_ix);
// next state
theta += 0.01f;
colorbuffer_ix = (colorbuffer_ix + 1) & 1;
}
{
printf("colorbuffer0.data\n");
int out_fd = open("colorbuffer0.data", O_RDWR|O_CREAT, 0644);
assert(out_fd >= 0);
ssize_t write_length = write(out_fd, colorbuffer_ptr[0], colorbuffer_size);
assert(write_length == colorbuffer_size);
close(out_fd);
}
{
printf("zbuffer.data\n");
int out_fd = open("zbuffer.data", O_RDWR|O_CREAT, 0644);
assert(out_fd >= 0);
ssize_t write_length = write(out_fd, zbuffer_ptr, colorbuffer_size);
assert(write_length == colorbuffer_size);
close(out_fd);
}
close(fd);
}

View File

@ -0,0 +1,698 @@
#include <assert.h>
#include <errno.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <stdlib.h>
#include <math.h>
#include <fcntl.h>
#include <sys/mman.h>
#include "r500/3d_registers.h"
#include "r500/3d_registers_undocumented.h"
#include "r500/3d_registers_bits.h"
#include "r500/indirect_buffer.h"
#include "r500/shader.h"
#include "r500/display_controller.h"
#include "drm/buffer.h"
#include "drm/drm.h"
#include "math/float_types.hpp"
#include "math/transform.hpp"
#include "math/constants.hpp"
#include "../model/model2.h"
#include "../model/plane.h"
#define CLEAR_SHADER 0
#define PLANE_SHADER 1
#define PARTICLE_SHADER 2
#define TEXTURE_TILE_SHADER 3
const char * vertex_shader_paths[] = {
"clear.vs.bin",
"particle_plane.vs.bin",
"particle_particle_animated.vs.bin",
"texture_tile.vs.bin",
};
const int vertex_shader_paths_length = (sizeof (vertex_shader_paths)) / (sizeof (vertex_shader_paths[0]));
const char * fragment_shader_paths[] = {
"clear.fs.bin",
"particle_plane.fs.bin",
"particle_particle.fs.bin",
"texture_tile.fs.bin",
};
const int fragment_shader_paths_length = (sizeof (fragment_shader_paths)) / (sizeof (fragment_shader_paths[0]));
#define PLANE_TEXTURE 0
#define PARTICLE_TEXTURE 1
const char * textures[] = {
"../texture/plane_32x32_rgba8888.data",
"../texture/particle_32x32_rgba8888.data",
};
const int textures_length = (sizeof (textures)) / (sizeof (textures[0]));
struct shaders {
struct shader_offset * vertex;
struct shader_offset * fragment;
int vertex_length;
int fragment_length;
};
static inline uint32_t xorshift32(uint32_t state)
{
uint32_t x = state;
x ^= x << 13;
x ^= x >> 17;
x ^= x << 5;
return x;
}
static inline float xorshift32f(uint32_t& state)
{
state = xorshift32(state);
return (float)(state & 0xffffff) * (1.0f / 16777215.0f);
}
const float max_age = 3.0f;
struct particle {
vec3 position;
float time;
float delta;
vec3 velocity;
};
void _3d_clear(const shaders& shaders)
{
ib_rs_instructions(0);
//////////////////////////////////////////////////////////////////////////////
// VAP OUT
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_OUT_VTX_FMT_0
, VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1));
T0V(VAP_OUT_VTX_FMT_1
, 0);
//
ib_zbuffer(ZBUFFER_RELOC_INDEX, 1600, 7); // always
ib_texture__0();
ib_vap_stream_cntl__2();
// shaders
T0V(US_PIXSIZE
, US_PIXSIZE__PIX_SIZE(1)
);
ib_ga_us(&shaders.fragment[CLEAR_SHADER]);
ib_vap_pvs(&shaders.vertex[CLEAR_SHADER]);
//////////////////////////////////////////////////////////////////////////////
// VAP INDEX
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_INDEX_OFFSET, 0);
T0V(VAP_VF_MAX_VTX_INDX
, VAP_VF_MAX_VTX_INDX__MAX_INDX(0)
);
T0V(VAP_VF_MIN_VTX_INDX
, VAP_VF_MIN_VTX_INDX__MIN_INDX(0)
);
//////////////////////////////////////////////////////////////////////////////
// VAP
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_CLIP_CNTL
, VAP_CLIP_CNTL__CLIP_DISABLE(1)
);
T0V(VAP_VTE_CNTL
, VAP_VTE_CNTL__VTX_XY_FMT(1) // disable W division
| VAP_VTE_CNTL__VTX_Z_FMT(1) // disable W division
);
T0V(VAP_CNTL_STATUS
, VAP_CNTL_STATUS__PVS_BYPASS(0)
);
//////////////////////////////////////////////////////////////////////////////
// GA POINT SIZE
//////////////////////////////////////////////////////////////////////////////
T0V(GA_POINT_SIZE
, GA_POINT_SIZE__HEIGHT(600 * 12)
| GA_POINT_SIZE__WIDTH(800 * 12)
);
//////////////////////////////////////////////////////////////////////////////
// 3D_DRAW
//////////////////////////////////////////////////////////////////////////////
const int dwords_per_vtx = 2;
T0V(VAP_VTX_SIZE
, VAP_VTX_SIZE__DWORDS_PER_VTX(dwords_per_vtx)
);
const float center[] = {
800.0f, 600.0f,
};
const int vertex_count = 1;
T3(_3D_DRAW_IMMD_2, (1 + vertex_count * dwords_per_vtx) - 1);
TU( VAP_VF_CNTL__PRIM_TYPE(1) // point list
| VAP_VF_CNTL__PRIM_WALK(3)
| VAP_VF_CNTL__INDEX_SIZE(0)
| VAP_VF_CNTL__VTX_REUSE_DIS(0)
| VAP_VF_CNTL__DUAL_INDEX_MODE(0)
| VAP_VF_CNTL__USE_ALT_NUM_VERTS(0)
| VAP_VF_CNTL__NUM_VERTICES(vertex_count)
);
for (int i = 0; i < 2; i++) {
TF(center[i]);
}
}
mat4x4 perspective(float low1, float high1,
float low2, float high2,
float low3, float high3)
{
float scale2 = (high2 - low2) / (high1 - low1);
float scale3 = (high3 - low3) / (high1 - low1);
mat4x4 m1 = mat4x4(1, 0, 0, 0,
0, 1, 0, 0,
0, 0, 1, -low1,
0, 0, 0, 1
);
mat4x4 m2 = mat4x4(1, 0, 0, 0,
0, 1, 0, 0,
0, 0, scale2, low2,
0, 0, scale3, low3
);
return m2 * m1;
}
void _3d_plane_inner(mat4x4 trans)
{
//////////////////////////////////////////////////////////////////////////////
// 3D_DRAW
//////////////////////////////////////////////////////////////////////////////
const model * model = &plane_model;
const object * obj = model->object[0];
const int triangle_count = obj->triangle_count;
const int vertex_count = triangle_count * 3;
const int dwords_per_vtx = 5;
T0V(VAP_VTX_SIZE
, VAP_VTX_SIZE__DWORDS_PER_VTX(dwords_per_vtx)
);
T3(_3D_DRAW_IMMD_2, (1 + vertex_count * dwords_per_vtx) - 1);
TU( VAP_VF_CNTL__PRIM_TYPE(4)
| VAP_VF_CNTL__PRIM_WALK(3)
| VAP_VF_CNTL__INDEX_SIZE(0)
| VAP_VF_CNTL__VTX_REUSE_DIS(0)
| VAP_VF_CNTL__DUAL_INDEX_MODE(0)
| VAP_VF_CNTL__USE_ALT_NUM_VERTS(0)
| VAP_VF_CNTL__NUM_VERTICES(vertex_count)
);
for (int i = 0; i < triangle_count; i++) {
for (int j = 0; j < 3; j++) {
vec3 p = model->position[obj->triangle[i][j].position];
vec2 t = model->texture[obj->triangle[i][j].texture];
TF(p.x);
TF(p.y);
TF(p.z);
TF(t.x);
TF(t.y);
}
}
}
void _3d_plane(const shaders& shaders,
const mat4x4& world_to_clip,
float theta)
{
ib_rs_instructions(1);
//////////////////////////////////////////////////////////////////////////////
// VAP OUT
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_OUT_VTX_FMT_0
, VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1));
T0V(VAP_OUT_VTX_FMT_1
, VAP_OUT_VTX_FMT_1__TEX_0_COMP_CNT(4));
//
ib_zbuffer(ZBUFFER_RELOC_INDEX, 1600, 1); // less
int width = 32;
int height = 32;
int macrotile = 0;
int microtile = 0;
int clamp = 0; // wrap/repeat
ib_texture__1(TEXTUREBUFFER_RELOC_INDEX + PLANE_TEXTURE,
width, height,
macrotile, microtile,
clamp);
ib_vap_stream_cntl__32();
// shaders
T0V(US_PIXSIZE
, US_PIXSIZE__PIX_SIZE(4)
);
ib_ga_us(&shaders.fragment[PLANE_SHADER]);
ib_vap_pvs(&shaders.vertex[PLANE_SHADER]);
//////////////////////////////////////////////////////////////////////////////
// VAP
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_CLIP_CNTL
, VAP_CLIP_CNTL__PS_UCP_MODE(3)
);
T0V(VAP_VTE_CNTL
, VAP_VTE_CNTL__VPORT_X_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_X_OFFSET_ENA(1)
| VAP_VTE_CNTL__VPORT_Y_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_Y_OFFSET_ENA(1)
| VAP_VTE_CNTL__VPORT_Z_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_Z_OFFSET_ENA(1)
| VAP_VTE_CNTL__VTX_XY_FMT(0) // enable W division
| VAP_VTE_CNTL__VTX_Z_FMT(0) // enable W division
| VAP_VTE_CNTL__VTX_W0_FMT(1)
| VAP_VTE_CNTL__SERIAL_PROC_ENA(0)
);
T0V(VAP_CNTL_STATUS
, VAP_CNTL_STATUS__PVS_BYPASS(0)
);
//////////////////////////////////////////////////////////////////////////////
// matrix
//////////////////////////////////////////////////////////////////////////////
mat4x4 s = scale(1.0f);
mat4x4 rx = rotate_x(-PI / 2.0f);
mat4x4 local_to_world = s * rx;
mat4x4 trans = world_to_clip * local_to_world;
//////////////////////////////////////////////////////////////////////////////
// consts
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_PVS_STATE_FLUSH_REG, 0x00000000);
const float consts[] = {
// 0
trans[0][0], trans[0][1], trans[0][2], trans[0][3],
trans[1][0], trans[1][1], trans[1][2], trans[1][3],
trans[2][0], trans[2][1], trans[2][2], trans[2][3],
trans[3][0], trans[3][1], trans[3][2], trans[3][3],
};
ib_vap_pvs_const_cntl(consts, (sizeof (consts)));
// plane_inner
_3d_plane_inner(trans);
}
void _3d_particle(const shaders& shaders,
const mat4x4& world_to_clip,
const mat4x4& world_to_view,
const particle * particles,
const int particles_length,
const float theta)
{
// enable blending
T0V(RB3D_BLENDCNTL
, RB3D_BLENDCNTL__ALPHA_BLEND_ENABLE__ENABLE
| RB3D_BLENDCNTL__READ_ENABLE(1)
| RB3D_BLENDCNTL__SRCBLEND__GL_ONE
| RB3D_BLENDCNTL__DESTBLEND__GL_ONE
| RB3D_BLENDCNTL__SRC_ALPHA_0_NO_READ(0)
| RB3D_BLENDCNTL__SRC_ALPHA_1_NO_READ(0)
);
ib_rs_instructions(1);
//////////////////////////////////////////////////////////////////////////////
// VAP OUT
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_OUT_VTX_FMT_0
, VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1));
T0V(VAP_OUT_VTX_FMT_1
, VAP_OUT_VTX_FMT_1__TEX_0_COMP_CNT(4));
//
//ib_zbuffer(ZBUFFER_RELOC_INDEX, 1600, 1); // less
T0V(ZB_CNTL
, 0
);
T0V(ZB_ZSTENCILCNTL
, 0
);
int width = 32;
int height = 32;
int macrotile = 0;
int microtile = 0;
int clamp = 0; // wrap/repeat
ib_texture__1(TEXTUREBUFFER_RELOC_INDEX + PARTICLE_TEXTURE,
width, height,
macrotile, microtile,
clamp);
ib_vap_stream_cntl__32();
// shaders
T0V(US_PIXSIZE
, US_PIXSIZE__PIX_SIZE(4)
);
ib_ga_us(&shaders.fragment[PARTICLE_SHADER]);
ib_vap_pvs(&shaders.vertex[PARTICLE_SHADER]);
//////////////////////////////////////////////////////////////////////////////
// VAP
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_CLIP_CNTL
, VAP_CLIP_CNTL__PS_UCP_MODE(3)
);
T0V(VAP_VTE_CNTL
, VAP_VTE_CNTL__VPORT_X_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_X_OFFSET_ENA(1)
| VAP_VTE_CNTL__VPORT_Y_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_Y_OFFSET_ENA(1)
| VAP_VTE_CNTL__VPORT_Z_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_Z_OFFSET_ENA(1)
| VAP_VTE_CNTL__VTX_XY_FMT(0) // enable W division
| VAP_VTE_CNTL__VTX_Z_FMT(0) // enable W division
| VAP_VTE_CNTL__VTX_W0_FMT(1)
| VAP_VTE_CNTL__SERIAL_PROC_ENA(0)
);
T0V(VAP_CNTL_STATUS
, VAP_CNTL_STATUS__PVS_BYPASS(0)
);
//////////////////////////////////////////////////////////////////////////////
// matrix
//////////////////////////////////////////////////////////////////////////////
mat4x4 s = scale(1.0f);
mat4x4 local_to_world = s;
mat4x4 local_to_view = world_to_view * local_to_world;
mat4x4 local_to_clip = world_to_clip * local_to_world;
//////////////////////////////////////////////////////////////////////////////
// consts
//////////////////////////////////////////////////////////////////////////////
for (int i = 0; i < particles_length; i++) {
T0V(VAP_PVS_STATE_FLUSH_REG, 0x00000000);
const vec3 position = particles[i].position;
// 6: position
const float scale = 0.005f;
//const float position_consts[] = { position.x, position.y, position.z, scale };
//ib_vap_pvs_const_offset(position_consts, (sizeof (position_consts)), 6);
const float consts[] = {
// 0: local space to clip space transformation matrix
local_to_clip[0][0], local_to_clip[0][1], local_to_clip[0][2], local_to_clip[0][3],
local_to_clip[1][0], local_to_clip[1][1], local_to_clip[1][2], local_to_clip[1][3],
local_to_clip[2][0], local_to_clip[2][1], local_to_clip[2][2], local_to_clip[2][3],
local_to_clip[3][0], local_to_clip[3][1], local_to_clip[3][2], local_to_clip[3][3],
// 4: dx ("right" change of basis vector)
local_to_view[0][0], local_to_view[0][1], local_to_view[0][2], 0,
// 5: dy ("up" change of basis vector)
local_to_view[1][0], local_to_view[1][1], local_to_view[1][2], 0,
// 6: particle position, scale
position.x, position.y, position.z, scale
};
ib_vap_pvs_const_cntl(consts, (sizeof (consts)));
// plane_inner
_3d_plane_inner(local_to_clip);
}
}
int indirect_buffer(const shaders& shaders,
const particle * particles,
const int particles_length,
float theta)
{
int width = 1600;
int height = 1200;
int pitch = width;
ib_ix = 0;
ib_generic_initialization();
T0V(RB3D_BLENDCNTL, 0);
T0V(RB3D_ABLENDCNTL, 0);
ib_viewport(width, height);
ib_colorbuffer(COLORBUFFER_RELOC_INDEX, pitch, 0, 0);
T0V(GB_ENABLE, 0);
T0V(US_OUT_FMT_0
, US_OUT_FMT__OUT_FMT(0) // C4_8
| US_OUT_FMT__C0_SEL__BLUE
| US_OUT_FMT__C1_SEL__GREEN
| US_OUT_FMT__C2_SEL__RED
| US_OUT_FMT__C3_SEL__ALPHA
| US_OUT_FMT__OUT_SIGN(0)
);
T0V(US_OUT_FMT_1
, US_OUT_FMT__OUT_FMT(15) // render target is not used
);
T0V(US_OUT_FMT_2
, US_OUT_FMT__OUT_FMT(15) // render target is not used
);
T0V(US_OUT_FMT_2
, US_OUT_FMT__OUT_FMT(15) // render target is not used
);
load_pvs_shaders(shaders.vertex, shaders.vertex_length);
load_us_shaders(shaders.fragment, shaders.fragment_length);
//////////////////////////////////////////////////////////////////////////////
// DRAW
//////////////////////////////////////////////////////////////////////////////
mat4x4 aspect = scale(vec3(3.0f/4.0f, 1, 1));
mat4x4 p = perspective(0.01f, 3.0f,
0.001f, 0.999f,
1.0f, 3.0f);
mat4x4 t = translate(vec3(0, 0, 1));
mat4x4 rx = rotate_x(-PI / 8.0f);
mat4x4 ry = rotate_y(theta * 0.8f);
mat4x4 world_to_view = t * rx * ry;
mat4x4 world_to_clip = aspect * p * world_to_view;
_3d_clear(shaders);
_3d_plane(shaders, world_to_clip, theta);
_3d_particle(shaders,
world_to_clip,
world_to_view,
particles,
particles_length,
theta);
//////////////////////////////////////////////////////////////////////////////
// padding
//////////////////////////////////////////////////////////////////////////////
while ((ib_ix % 8) != 0) {
TU(0x80000000);
}
assert((unsigned int)ib_ix < (sizeof (ib)) / (sizeof (ib[0])));
return ib_ix;
}
void reset_particle(particle& p)
{
//vec3 pos = normalize(p.position);
p.position = normalize(vec3(p.velocity.x,
0,
p.velocity.z)) * 20.0f;
//printf("position %f %f %f\n", p.position.x, p.position.y, p.position.z);
p.velocity = vec3(p.velocity.x,
2.0f * p.delta,
p.velocity.z);
//printf("velocity %f %f %f\n\n", p.velocity.x, p.velocity.y, p.velocity.z);
}
void init_particles(particle * particles, const int particles_length)
{
uint32_t state = 0x12345678;
const float rl = 1.0f / (float)(particles_length);
for (int i = 0; i < particles_length; i++) {
float fi = ((float)i);
float sx = xorshift32f(state) * 2.0f - 1.0f;
float sy = xorshift32f(state) * 2.0f - 1.0f;
float sz = xorshift32f(state) * 2.0f - 1.0f;
float delta = xorshift32f(state) * 0.5f + 0.5f;
float vx = xorshift32f(state) * 2.0f - 1.0f;
float vz = xorshift32f(state) * 2.0f - 1.0f;
particles[i].time = max_age * sinf(fi * rl * 2) * 0.5f + 0.5f;
particles[i].delta = delta;
particles[i].position.x = sx;
particles[i].position.y = sy;
particles[i].position.z = sz;
particles[i].velocity = normalize(vec3(vx * 0.5f, 0.0f, vz * 0.5f));
}
}
int main()
{
struct shaders shaders = {
.vertex = load_shaders(vertex_shader_paths, vertex_shader_paths_length),
.fragment = load_shaders(fragment_shader_paths, fragment_shader_paths_length),
.vertex_length = vertex_shader_paths_length,
.fragment_length = fragment_shader_paths_length,
};
void * rmmio = map_pci_resource2();
int fd = open("/dev/dri/card0", O_RDWR | O_CLOEXEC);
assert(fd != -1);
const int colorbuffer_size = 1600 * 1200 * 4;
int colorbuffer_handle[2];
int zbuffer_handle;
int * texturebuffer_handle;
int flush_handle;
void * colorbuffer_ptr[2];
void * zbuffer_ptr;
// colorbuffer
colorbuffer_handle[0] = create_buffer(fd, colorbuffer_size, &colorbuffer_ptr[0]);
colorbuffer_handle[1] = create_buffer(fd, colorbuffer_size, &colorbuffer_ptr[1]);
zbuffer_handle = create_buffer(fd, colorbuffer_size, &zbuffer_ptr);
flush_handle = create_flush_buffer(fd);
texturebuffer_handle = load_textures(fd, textures, textures_length);
fprintf(stderr, "colorbuffer handle[0] %d\n", colorbuffer_handle[0]);
fprintf(stderr, "colorbuffer handle[1] %d\n", colorbuffer_handle[1]);
fprintf(stderr, "zbuffer handle %d\n", zbuffer_handle);
int colorbuffer_ix = 0;
float theta = PI * 0.5;
particle particles[1000] = {};
const int particles_length = (sizeof (particles)) / (sizeof (particles[0]));
init_particles(particles, particles_length);
while (true) {
int ib_dwords = indirect_buffer(shaders,
particles,
particles_length,
theta);
int ret = drm_radeon_cs(fd,
colorbuffer_handle[colorbuffer_ix],
zbuffer_handle,
flush_handle,
texturebuffer_handle,
textures_length,
ib_dwords);
if (ret == -1)
break;
primary_surface_address(rmmio, colorbuffer_ix);
// next state
theta += 0.01f;
colorbuffer_ix = (colorbuffer_ix + 1) & 1;
//
// update particles
//
for (int i = 0; i < particles_length; i++) {
if (particles[i].time <= 0) {
particles[i].time += max_age;
reset_particle(particles[i]);
} else {
particles[i].time -= 0.01f;
particles[i].position += vec3(particles[i].velocity.x * 0.9f,
particles[i].velocity.y * 5.0f,
particles[i].velocity.z * 0.9f);
particles[i].velocity += vec3(0, -0.04, 0);
if (particles[i].position.y < 0) {
particles[i].position.y = fabsf(particles[i].position.y);
particles[i].velocity.y *= -0.6f;
}
}
}
}
{
printf("colorbuffer0.data\n");
int out_fd = open("colorbuffer0.data", O_RDWR|O_CREAT, 0644);
assert(out_fd >= 0);
ssize_t write_length = write(out_fd, colorbuffer_ptr[0], colorbuffer_size);
assert(write_length == colorbuffer_size);
close(out_fd);
}
{
printf("zbuffer.data\n");
int out_fd = open("zbuffer.data", O_RDWR|O_CREAT, 0644);
assert(out_fd >= 0);
ssize_t write_length = write(out_fd, zbuffer_ptr, colorbuffer_size);
assert(write_length == colorbuffer_size);
close(out_fd);
}
close(fd);
}

View File

@ -0,0 +1,774 @@
#include <assert.h>
#include <errno.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <stdlib.h>
#include <math.h>
#include <fcntl.h>
#include <sys/mman.h>
#include "r500/3d_registers.h"
#include "r500/3d_registers_undocumented.h"
#include "r500/3d_registers_bits.h"
#include "r500/indirect_buffer.h"
#include "r500/shader.h"
#include "r500/display_controller.h"
#include "drm/buffer.h"
#include "drm/drm.h"
#include "math/float_types.hpp"
#include "math/transform.hpp"
#include "math/constants.hpp"
#include "../model/model2.h"
#define CLEAR_SHADER 0
#define PLANE_SHADER 1
#define PARTICLE_SHADER 2
#define TEXTURE_TILE_SHADER 3
const char * vertex_shader_paths[] = {
"clear.vs.bin",
"particle_plane_fan.vs.bin",
"particle_particle_animated_fan.vs.bin",
"texture_tile.vs.bin",
};
const int vertex_shader_paths_length = (sizeof (vertex_shader_paths)) / (sizeof (vertex_shader_paths[0]));
const char * fragment_shader_paths[] = {
"clear.fs.bin",
"particle_plane.fs.bin",
"particle_particle.fs.bin",
"texture_tile.fs.bin",
};
const int fragment_shader_paths_length = (sizeof (fragment_shader_paths)) / (sizeof (fragment_shader_paths[0]));
#define PLANE_TEXTURE 0
#define PARTICLE_TEXTURE 1
const char * textures[] = {
"../texture/plane_32x32_rgba8888.data",
"../texture/particle_32x32_rgba8888.data",
};
const int textures_length = (sizeof (textures)) / (sizeof (textures[0]));
struct shaders {
struct shader_offset * vertex;
struct shader_offset * fragment;
int vertex_length;
int fragment_length;
};
static inline uint32_t xorshift32(uint32_t state)
{
uint32_t x = state;
x ^= x << 13;
x ^= x >> 17;
x ^= x << 5;
return x;
}
static inline float xorshift32f(uint32_t& state)
{
state = xorshift32(state);
return (float)(state & 0xffffff) * (1.0f / 16777215.0f);
}
const float max_age = 3.0f;
struct particle {
vec3 position;
float time;
float delta;
vec3 velocity;
};
void _3d_clear(const shaders& shaders)
{
ib_rs_instructions(0);
//////////////////////////////////////////////////////////////////////////////
// VAP OUT
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_OUT_VTX_FMT_0
, VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1));
T0V(VAP_OUT_VTX_FMT_1
, 0);
//
ib_zbuffer(ZBUFFER_RELOC_INDEX, 1600, 7); // always
ib_texture__0();
ib_vap_stream_cntl__2();
// shaders
T0V(US_PIXSIZE
, US_PIXSIZE__PIX_SIZE(1)
);
ib_ga_us(&shaders.fragment[CLEAR_SHADER]);
ib_vap_pvs(&shaders.vertex[CLEAR_SHADER]);
//////////////////////////////////////////////////////////////////////////////
// VAP INDEX
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_INDEX_OFFSET, 0);
T0V(VAP_VF_MAX_VTX_INDX
, VAP_VF_MAX_VTX_INDX__MAX_INDX(0)
);
T0V(VAP_VF_MIN_VTX_INDX
, VAP_VF_MIN_VTX_INDX__MIN_INDX(0)
);
//////////////////////////////////////////////////////////////////////////////
// VAP
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_CLIP_CNTL
, VAP_CLIP_CNTL__CLIP_DISABLE(1)
);
T0V(VAP_VTE_CNTL
, VAP_VTE_CNTL__VTX_XY_FMT(1) // disable W division
| VAP_VTE_CNTL__VTX_Z_FMT(1) // disable W division
);
T0V(VAP_CNTL_STATUS
, VAP_CNTL_STATUS__PVS_BYPASS(0)
);
//////////////////////////////////////////////////////////////////////////////
// GA POINT SIZE
//////////////////////////////////////////////////////////////////////////////
T0V(GA_POINT_SIZE
, GA_POINT_SIZE__HEIGHT(600 * 12)
| GA_POINT_SIZE__WIDTH(800 * 12)
);
//////////////////////////////////////////////////////////////////////////////
// 3D_DRAW
//////////////////////////////////////////////////////////////////////////////
const int dwords_per_vtx = 2;
T0V(VAP_VTX_SIZE
, VAP_VTX_SIZE__DWORDS_PER_VTX(dwords_per_vtx)
);
const float center[] = {
800.0f, 600.0f,
};
const int vertex_count = 1;
T3(_3D_DRAW_IMMD_2, (1 + vertex_count * dwords_per_vtx) - 1);
TU( VAP_VF_CNTL__PRIM_TYPE(1) // point list
| VAP_VF_CNTL__PRIM_WALK(3)
| VAP_VF_CNTL__INDEX_SIZE(0)
| VAP_VF_CNTL__VTX_REUSE_DIS(0)
| VAP_VF_CNTL__DUAL_INDEX_MODE(0)
| VAP_VF_CNTL__USE_ALT_NUM_VERTS(0)
| VAP_VF_CNTL__NUM_VERTICES(vertex_count)
);
for (int i = 0; i < 2; i++) {
TF(center[i]);
}
}
mat4x4 perspective(float low1, float high1,
float low2, float high2,
float low3, float high3)
{
float scale2 = (high2 - low2) / (high1 - low1);
float scale3 = (high3 - low3) / (high1 - low1);
mat4x4 m1 = mat4x4(1, 0, 0, 0,
0, 1, 0, 0,
0, 0, 1, -low1,
0, 0, 0, 1
);
mat4x4 m2 = mat4x4(1, 0, 0, 0,
0, 1, 0, 0,
0, 0, scale2, low2,
0, 0, scale3, low3
);
return m2 * m1;
}
void _3d_plane_inner()
{
//////////////////////////////////////////////////////////////////////////////
// 3D_DRAW
//////////////////////////////////////////////////////////////////////////////
const int dwords_per_vtx = 2;
T0V(VAP_VTX_SIZE
, VAP_VTX_SIZE__DWORDS_PER_VTX(dwords_per_vtx)
);
const vec2 vertices[] = {
{0.0, 0.0f},
{1.0, 0.0f},
{1.0, 1.0f},
{0.0, 1.0f},
};
const int vertex_count = 4;
T3(_3D_DRAW_IMMD_2, (1 + vertex_count * dwords_per_vtx) - 1);
TU( VAP_VF_CNTL__PRIM_TYPE(5) // triangle fan
| VAP_VF_CNTL__PRIM_WALK(3)
| VAP_VF_CNTL__INDEX_SIZE(0)
| VAP_VF_CNTL__VTX_REUSE_DIS(0)
| VAP_VF_CNTL__DUAL_INDEX_MODE(0)
| VAP_VF_CNTL__USE_ALT_NUM_VERTS(0)
| VAP_VF_CNTL__NUM_VERTICES(vertex_count)
);
for (int i = 0; i < vertex_count; i++) {
TF(vertices[i].x);
TF(vertices[i].y);
}
}
void _3d_particle_inner()
{
T0V(VAP_VF_MAX_VTX_INDX
, VAP_VF_MAX_VTX_INDX__MAX_INDX(3)
);
T0V(VAP_VF_MIN_VTX_INDX
, VAP_VF_MIN_VTX_INDX__MIN_INDX(0)
);
//////////////////////////////////////////////////////////////////////////////
// AOS
//////////////////////////////////////////////////////////////////////////////
T3(_3D_LOAD_VBPNTR, (3 - 1));
TU( // VAP_VTX_NUM_ARRAYS
VAP_VTX_NUM_ARRAYS__VTX_NUM_ARRAYS(1)
| VAP_VTX_NUM_ARRAYS__VC_FORCE_PREFETCH(1)
);
TU( // VAP_VTX_AOS_ATTR01
VAP_VTX_AOS_ATTR__VTX_AOS_COUNT0(2)
| VAP_VTX_AOS_ATTR__VTX_AOS_STRIDE0(2)
);
TU( // VAP_VTX_AOS_ADDR0
(4 * 0);
);
T3(_NOP, 0);
TU(VERTEXBUFFER_RELOC_INDEX * 4); // index into relocs array for VAP_VTX_AOS_ADDR0
//////////////////////////////////////////////////////////////////////////////
// 3D_DRAW
//////////////////////////////////////////////////////////////////////////////
const int vertex_count = 4;
T3(_3D_DRAW_VBUF_2, (1 - 1));
TU( VAP_VF_CNTL__PRIM_TYPE(5) // triangle fan
| VAP_VF_CNTL__PRIM_WALK(2) // vertex list (data fetched from memory)
| VAP_VF_CNTL__INDEX_SIZE(0)
| VAP_VF_CNTL__VTX_REUSE_DIS(0)
| VAP_VF_CNTL__DUAL_INDEX_MODE(0)
| VAP_VF_CNTL__USE_ALT_NUM_VERTS(0)
| VAP_VF_CNTL__NUM_VERTICES(vertex_count)
);
}
void _3d_plane(const shaders& shaders,
const mat4x4& world_to_clip,
float theta)
{
ib_rs_instructions(1);
//////////////////////////////////////////////////////////////////////////////
// VAP OUT
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_OUT_VTX_FMT_0
, VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1));
T0V(VAP_OUT_VTX_FMT_1
, VAP_OUT_VTX_FMT_1__TEX_0_COMP_CNT(4));
//
ib_zbuffer(ZBUFFER_RELOC_INDEX, 1600, 1); // less
int width = 32;
int height = 32;
int macrotile = 0;
int microtile = 0;
int clamp = 0; // wrap/repeat
ib_texture__1(TEXTUREBUFFER_RELOC_INDEX + PLANE_TEXTURE,
width, height,
macrotile, microtile,
clamp);
ib_vap_stream_cntl__2();
// shaders
T0V(US_PIXSIZE
, US_PIXSIZE__PIX_SIZE(4)
);
ib_ga_us(&shaders.fragment[PLANE_SHADER]);
ib_vap_pvs(&shaders.vertex[PLANE_SHADER]);
//////////////////////////////////////////////////////////////////////////////
// VAP
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_CLIP_CNTL
, VAP_CLIP_CNTL__PS_UCP_MODE(3)
);
T0V(VAP_VTE_CNTL
, VAP_VTE_CNTL__VPORT_X_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_X_OFFSET_ENA(1)
| VAP_VTE_CNTL__VPORT_Y_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_Y_OFFSET_ENA(1)
| VAP_VTE_CNTL__VPORT_Z_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_Z_OFFSET_ENA(1)
| VAP_VTE_CNTL__VTX_XY_FMT(0) // enable W division
| VAP_VTE_CNTL__VTX_Z_FMT(0) // enable W division
| VAP_VTE_CNTL__VTX_W0_FMT(1)
| VAP_VTE_CNTL__SERIAL_PROC_ENA(0)
);
T0V(VAP_CNTL_STATUS
, VAP_CNTL_STATUS__PVS_BYPASS(0)
);
//////////////////////////////////////////////////////////////////////////////
// matrix
//////////////////////////////////////////////////////////////////////////////
mat4x4 s = scale(1.0f);
mat4x4 rx = rotate_x(-PI / 2.0f);
mat4x4 local_to_world = s * rx;
mat4x4 trans = world_to_clip * local_to_world;
//////////////////////////////////////////////////////////////////////////////
// consts
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_PVS_STATE_FLUSH_REG, 0x00000000);
const float consts[] = {
// 0
trans[0][0], trans[0][1], trans[0][2], trans[0][3],
trans[1][0], trans[1][1], trans[1][2], trans[1][3],
trans[2][0], trans[2][1], trans[2][2], trans[2][3],
trans[3][0], trans[3][1], trans[3][2], trans[3][3],
// 4
-2.0f, 0, 0, 0,
};
ib_vap_pvs_const_cntl(consts, (sizeof (consts)));
// plane_inner
_3d_plane_inner();
}
void _3d_particle(const shaders& shaders,
const mat4x4& world_to_clip,
const mat4x4& world_to_view,
const particle * particles,
const int particles_length,
const float theta)
{
// enable blending
T0V(RB3D_BLENDCNTL
, RB3D_BLENDCNTL__ALPHA_BLEND_ENABLE__ENABLE
| RB3D_BLENDCNTL__READ_ENABLE(1)
| RB3D_BLENDCNTL__SRCBLEND__GL_ONE
| RB3D_BLENDCNTL__DESTBLEND__GL_ONE
| RB3D_BLENDCNTL__SRC_ALPHA_0_NO_READ(0)
| RB3D_BLENDCNTL__SRC_ALPHA_1_NO_READ(0)
);
ib_rs_instructions(1);
//////////////////////////////////////////////////////////////////////////////
// VAP OUT
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_OUT_VTX_FMT_0
, VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1));
T0V(VAP_OUT_VTX_FMT_1
, VAP_OUT_VTX_FMT_1__TEX_0_COMP_CNT(4));
//
//ib_zbuffer(ZBUFFER_RELOC_INDEX, 1600, 1); // less
T0V(ZB_CNTL
, 0
);
T0V(ZB_ZSTENCILCNTL
, 0
);
int width = 32;
int height = 32;
int macrotile = 0;
int microtile = 0;
int clamp = 0; // wrap/repeat
ib_texture__1(TEXTUREBUFFER_RELOC_INDEX + PARTICLE_TEXTURE,
width, height,
macrotile, microtile,
clamp);
ib_vap_stream_cntl__2();
// shaders
T0V(US_PIXSIZE
, US_PIXSIZE__PIX_SIZE(4)
);
ib_ga_us(&shaders.fragment[PARTICLE_SHADER]);
ib_vap_pvs(&shaders.vertex[PARTICLE_SHADER]);
//////////////////////////////////////////////////////////////////////////////
// VAP
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_CLIP_CNTL
, VAP_CLIP_CNTL__PS_UCP_MODE(3)
);
T0V(VAP_VTE_CNTL
, VAP_VTE_CNTL__VPORT_X_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_X_OFFSET_ENA(1)
| VAP_VTE_CNTL__VPORT_Y_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_Y_OFFSET_ENA(1)
| VAP_VTE_CNTL__VPORT_Z_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_Z_OFFSET_ENA(1)
| VAP_VTE_CNTL__VTX_XY_FMT(0) // enable W division
| VAP_VTE_CNTL__VTX_Z_FMT(0) // enable W division
| VAP_VTE_CNTL__VTX_W0_FMT(1)
| VAP_VTE_CNTL__SERIAL_PROC_ENA(0)
);
T0V(VAP_CNTL_STATUS
, VAP_CNTL_STATUS__PVS_BYPASS(0)
);
//////////////////////////////////////////////////////////////////////////////
// matrix
//////////////////////////////////////////////////////////////////////////////
mat4x4 s = scale(1.0f);
mat4x4 local_to_world = s;
mat4x4 local_to_view = world_to_view * local_to_world;
mat4x4 trans = world_to_clip * local_to_world;
//////////////////////////////////////////////////////////////////////////////
// consts
//////////////////////////////////////////////////////////////////////////////
for (int i = 0; i < particles_length; i++) {
T0V(VAP_PVS_STATE_FLUSH_REG, 0x00000000);
const vec3 position = particles[i].position;
// 6: position
const float scale = 0.005f;
//const float position_consts[] = { position.x, position.y, position.z, scale };
//ib_vap_pvs_const_offset(position_consts, (sizeof (position_consts)), 6);
const float consts[] = {
// 0
trans[0][0], trans[0][1], trans[0][2], trans[0][3],
trans[1][0], trans[1][1], trans[1][2], trans[1][3],
trans[2][0], trans[2][1], trans[2][2], trans[2][3],
trans[3][0], trans[3][1], trans[3][2], trans[3][3],
// 4: dx (right)
local_to_view[0][0], local_to_view[0][1], local_to_view[0][2], 0,
// 5: dy (up)
local_to_view[1][0], local_to_view[1][1], local_to_view[1][2], 0,
// 6: xyz:position w:scale
position.x, position.y, position.z, scale,
// 7:
-2.0, 0, 0, 0,
};
ib_vap_pvs_const_cntl(consts, (sizeof (consts)));
// plane_inner
//_3d_plane_inner();
_3d_particle_inner();
}
}
int indirect_buffer(const shaders& shaders,
const particle * particles,
const int particles_length,
float theta)
{
int width = 1600;
int height = 1200;
int pitch = width;
ib_ix = 0;
ib_generic_initialization();
T0V(RB3D_BLENDCNTL, 0);
T0V(RB3D_ABLENDCNTL, 0);
ib_viewport(width, height);
ib_colorbuffer(COLORBUFFER_RELOC_INDEX, pitch, 0, 0);
T0V(GB_ENABLE, 0);
T0V(US_OUT_FMT_0
, US_OUT_FMT__OUT_FMT(0) // C4_8
| US_OUT_FMT__C0_SEL__BLUE
| US_OUT_FMT__C1_SEL__GREEN
| US_OUT_FMT__C2_SEL__RED
| US_OUT_FMT__C3_SEL__ALPHA
| US_OUT_FMT__OUT_SIGN(0)
);
T0V(US_OUT_FMT_1
, US_OUT_FMT__OUT_FMT(15) // render target is not used
);
T0V(US_OUT_FMT_2
, US_OUT_FMT__OUT_FMT(15) // render target is not used
);
T0V(US_OUT_FMT_2
, US_OUT_FMT__OUT_FMT(15) // render target is not used
);
load_pvs_shaders(shaders.vertex, shaders.vertex_length);
load_us_shaders(shaders.fragment, shaders.fragment_length);
//////////////////////////////////////////////////////////////////////////////
// DRAW
//////////////////////////////////////////////////////////////////////////////
mat4x4 aspect = scale(vec3(3.0f/4.0f, 1, 1));
mat4x4 p = perspective(0.01f, 3.0f,
0.001f, 0.999f,
1.0f, 3.0f);
mat4x4 t = translate(vec3(0, 0, 1));
mat4x4 rx = rotate_x(-PI / 8.0f);
mat4x4 ry = rotate_y(theta * 0.8f);
mat4x4 world_to_view = t * rx * ry;
mat4x4 world_to_clip = aspect * p * world_to_view;
_3d_clear(shaders);
_3d_plane(shaders, world_to_clip, theta);
_3d_particle(shaders,
world_to_clip,
world_to_view,
particles,
particles_length,
theta);
//////////////////////////////////////////////////////////////////////////////
// padding
//////////////////////////////////////////////////////////////////////////////
while ((ib_ix % 8) != 0) {
TU(0x80000000);
}
assert((unsigned int)ib_ix < (sizeof (ib)) / (sizeof (ib[0])));
return ib_ix;
}
void reset_particle(particle& p)
{
//vec3 pos = normalize(p.position);
p.position = normalize(vec3(p.velocity.x,
0,
p.velocity.z)) * 20.0f;
//printf("position %f %f %f\n", p.position.x, p.position.y, p.position.z);
p.velocity = vec3(p.velocity.x,
2.0f * p.delta,
p.velocity.z);
//printf("velocity %f %f %f\n\n", p.velocity.x, p.velocity.y, p.velocity.z);
}
void init_particles(particle * particles, const int particles_length)
{
uint32_t state = 0x12345678;
const float rl = 1.0f / (float)(particles_length);
for (int i = 0; i < particles_length; i++) {
float fi = ((float)i);
float sx = xorshift32f(state) * 2.0f - 1.0f;
float sy = xorshift32f(state) * 2.0f - 1.0f;
float sz = xorshift32f(state) * 2.0f - 1.0f;
float delta = xorshift32f(state) * 0.5f + 0.5f;
float vx = xorshift32f(state) * 2.0f - 1.0f;
float vz = xorshift32f(state) * 2.0f - 1.0f;
particles[i].time = max_age * sinf(fi * rl * 2) * 0.5f + 0.5f;
particles[i].delta = delta;
particles[i].position.x = sx;
particles[i].position.y = sy;
particles[i].position.z = sz;
particles[i].velocity = normalize(vec3(vx * 0.5f, 0.0f, vz * 0.5f));
}
}
int init_particles_vertexbuffer(int fd, int particles_length)
{
const vec2 vertices[] = {
{0.0, 0.0f},
{1.0, 0.0f},
{1.0, 1.0f},
{0.0, 1.0f},
};
const int vertex_count = 4;
const int size = particles_length * vertex_count * 2 * (sizeof (float));
void * ptr;
int handle = create_buffer(fd, size, &ptr);
float * ptrf = (float*)ptr;
int ix = 0;
for (int j = 0; j < particles_length; j++) {
for (int i = 0; i < vertex_count; i++) {
ptrf[ix++] = vertices[i].x;
ptrf[ix++] = vertices[i].y;
}
}
munmap(ptr, size);
printf("init vertexbuffer %d %d\n", ix, size);
return handle;
}
int main()
{
struct shaders shaders = {
.vertex = load_shaders(vertex_shader_paths, vertex_shader_paths_length),
.fragment = load_shaders(fragment_shader_paths, fragment_shader_paths_length),
.vertex_length = vertex_shader_paths_length,
.fragment_length = fragment_shader_paths_length,
};
void * rmmio = map_pci_resource2();
int fd = open("/dev/dri/card0", O_RDWR | O_CLOEXEC);
assert(fd != -1);
const int colorbuffer_size = 1600 * 1200 * 4;
int colorbuffer_handle[2];
int zbuffer_handle;
int * texturebuffer_handle;
//int flush_handle;
int vertexbuffer_handle;
void * colorbuffer_ptr[2];
void * zbuffer_ptr;
// colorbuffer
colorbuffer_handle[0] = create_buffer(fd, colorbuffer_size, &colorbuffer_ptr[0]);
colorbuffer_handle[1] = create_buffer(fd, colorbuffer_size, &colorbuffer_ptr[1]);
zbuffer_handle = create_buffer(fd, colorbuffer_size, &zbuffer_ptr);
//flush_handle = create_flush_buffer(fd);
texturebuffer_handle = load_textures(fd, textures, textures_length);
fprintf(stderr, "colorbuffer handle[0] %d\n", colorbuffer_handle[0]);
fprintf(stderr, "colorbuffer handle[1] %d\n", colorbuffer_handle[1]);
fprintf(stderr, "zbuffer handle %d\n", zbuffer_handle);
int colorbuffer_ix = 0;
float theta = PI * 0.5;
particle particles[10] = {};
const int particles_length = (sizeof (particles)) / (sizeof (particles[0]));
vertexbuffer_handle = init_particles_vertexbuffer(fd, particles_length);
init_particles(particles, particles_length);
fprintf(stderr, "vertexbuffer handle %d\n", vertexbuffer_handle);
while (true) {
int ib_dwords = indirect_buffer(shaders,
particles,
particles_length,
theta);
int ret = drm_radeon_cs(fd,
colorbuffer_handle[colorbuffer_ix],
zbuffer_handle,
vertexbuffer_handle,
texturebuffer_handle,
textures_length,
ib_dwords);
if (ret == -1)
break;
primary_surface_address(rmmio, colorbuffer_ix);
// next state
theta += 0.01f;
colorbuffer_ix = (colorbuffer_ix + 1) & 1;
//
// update particles
//
for (int i = 0; i < particles_length; i++) {
if (particles[i].time <= 0) {
particles[i].time += max_age;
reset_particle(particles[i]);
} else {
particles[i].time -= 0.01f;
particles[i].position += vec3(particles[i].velocity.x * 0.9f,
particles[i].velocity.y * 5.0f,
particles[i].velocity.z * 0.9f);
particles[i].velocity += vec3(0, -0.04, 0);
if (particles[i].position.y < 0) {
particles[i].position.y = fabsf(particles[i].position.y);
particles[i].velocity.y *= -0.6f;
}
}
}
}
{
printf("colorbuffer0.data\n");
int out_fd = open("colorbuffer0.data", O_RDWR|O_CREAT, 0644);
assert(out_fd >= 0);
ssize_t write_length = write(out_fd, colorbuffer_ptr[0], colorbuffer_size);
assert(write_length == colorbuffer_size);
close(out_fd);
}
{
printf("zbuffer.data\n");
int out_fd = open("zbuffer.data", O_RDWR|O_CREAT, 0644);
assert(out_fd >= 0);
ssize_t write_length = write(out_fd, zbuffer_ptr, colorbuffer_size);
assert(write_length == colorbuffer_size);
close(out_fd);
}
close(fd);
}

View File

@ -0,0 +1,806 @@
#include <assert.h>
#include <errno.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <stdlib.h>
#include <math.h>
#include <fcntl.h>
#include <sys/mman.h>
#include "r500/3d_registers.h"
#include "r500/3d_registers_undocumented.h"
#include "r500/3d_registers_bits.h"
#include "r500/indirect_buffer.h"
#include "r500/shader.h"
#include "r500/display_controller.h"
#include "drm/buffer.h"
#include "drm/drm.h"
#include "math/float_types.hpp"
#include "math/transform.hpp"
#include "math/constants.hpp"
#include "../model/model2.h"
#define CLEAR_SHADER 0
#define PLANE_SHADER 1
#define PARTICLE_SHADER 2
#define TEXTURE_TILE_SHADER 3
const char * vertex_shader_paths[] = {
"clear.vs.bin",
"particle_plane_fan.vs.bin",
"particle_particle_animated_quad_vbuf.vs.bin",
"texture_tile.vs.bin",
};
const int vertex_shader_paths_length = (sizeof (vertex_shader_paths)) / (sizeof (vertex_shader_paths[0]));
const char * fragment_shader_paths[] = {
"clear.fs.bin",
"particle_plane.fs.bin",
"particle_particle.fs.bin",
"texture_tile.fs.bin",
};
const int fragment_shader_paths_length = (sizeof (fragment_shader_paths)) / (sizeof (fragment_shader_paths[0]));
#define PLANE_TEXTURE 0
#define PARTICLE_TEXTURE 1
const char * textures[] = {
"../texture/plane_32x32_rgba8888.data",
"../texture/particle_32x32_rgba8888.data",
};
const int textures_length = (sizeof (textures)) / (sizeof (textures[0]));
struct shaders {
struct shader_offset * vertex;
struct shader_offset * fragment;
int vertex_length;
int fragment_length;
};
static inline uint32_t xorshift32(uint32_t state)
{
uint32_t x = state;
x ^= x << 13;
x ^= x >> 17;
x ^= x << 5;
return x;
}
static inline float xorshift32f(uint32_t& state)
{
state = xorshift32(state);
return (float)(state & 0xffffff) * (1.0f / 16777215.0f);
}
const float max_age = 3.0f;
struct particle {
vec3 position;
float time;
float delta;
vec3 velocity;
};
void _3d_clear(const shaders& shaders)
{
ib_rs_instructions(0);
//////////////////////////////////////////////////////////////////////////////
// VAP OUT
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_OUT_VTX_FMT_0
, VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1));
T0V(VAP_OUT_VTX_FMT_1
, 0);
//
ib_zbuffer(ZBUFFER_RELOC_INDEX, 1600, 7); // always
ib_texture__0();
ib_vap_stream_cntl__2();
// shaders
T0V(US_PIXSIZE
, US_PIXSIZE__PIX_SIZE(1)
);
ib_ga_us(&shaders.fragment[CLEAR_SHADER]);
ib_vap_pvs(&shaders.vertex[CLEAR_SHADER]);
//////////////////////////////////////////////////////////////////////////////
// VAP INDEX
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_INDEX_OFFSET, 0);
T0V(VAP_VF_MAX_VTX_INDX
, VAP_VF_MAX_VTX_INDX__MAX_INDX(0)
);
T0V(VAP_VF_MIN_VTX_INDX
, VAP_VF_MIN_VTX_INDX__MIN_INDX(0)
);
//////////////////////////////////////////////////////////////////////////////
// VAP
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_CLIP_CNTL
, VAP_CLIP_CNTL__CLIP_DISABLE(1)
);
T0V(VAP_VTE_CNTL
, VAP_VTE_CNTL__VTX_XY_FMT(1) // disable W division
| VAP_VTE_CNTL__VTX_Z_FMT(1) // disable W division
);
T0V(VAP_CNTL_STATUS
, VAP_CNTL_STATUS__PVS_BYPASS(0)
);
//////////////////////////////////////////////////////////////////////////////
// GA POINT SIZE
//////////////////////////////////////////////////////////////////////////////
T0V(GA_POINT_SIZE
, GA_POINT_SIZE__HEIGHT(600 * 12)
| GA_POINT_SIZE__WIDTH(800 * 12)
);
//////////////////////////////////////////////////////////////////////////////
// 3D_DRAW
//////////////////////////////////////////////////////////////////////////////
const int dwords_per_vtx = 2;
T0V(VAP_VTX_SIZE
, VAP_VTX_SIZE__DWORDS_PER_VTX(dwords_per_vtx)
);
const float center[] = {
800.0f, 600.0f,
};
const int vertex_count = 1;
T3(_3D_DRAW_IMMD_2, (1 + vertex_count * dwords_per_vtx) - 1);
TU( VAP_VF_CNTL__PRIM_TYPE(1) // point list
| VAP_VF_CNTL__PRIM_WALK(3)
| VAP_VF_CNTL__INDEX_SIZE(0)
| VAP_VF_CNTL__VTX_REUSE_DIS(0)
| VAP_VF_CNTL__DUAL_INDEX_MODE(0)
| VAP_VF_CNTL__USE_ALT_NUM_VERTS(0)
| VAP_VF_CNTL__NUM_VERTICES(vertex_count)
);
for (int i = 0; i < 2; i++) {
TF(center[i]);
}
}
mat4x4 perspective(float low1, float high1,
float low2, float high2,
float low3, float high3)
{
float scale2 = (high2 - low2) / (high1 - low1);
float scale3 = (high3 - low3) / (high1 - low1);
mat4x4 m1 = mat4x4(1, 0, 0, 0,
0, 1, 0, 0,
0, 0, 1, -low1,
0, 0, 0, 1
);
mat4x4 m2 = mat4x4(1, 0, 0, 0,
0, 1, 0, 0,
0, 0, scale2, low2,
0, 0, scale3, low3
);
return m2 * m1;
}
void _3d_plane_inner()
{
//////////////////////////////////////////////////////////////////////////////
// 3D_DRAW
//////////////////////////////////////////////////////////////////////////////
const int dwords_per_vtx = 2;
T0V(VAP_VTX_SIZE
, VAP_VTX_SIZE__DWORDS_PER_VTX(dwords_per_vtx)
);
const vec2 vertices[] = {
{0.0, 0.0f},
{1.0, 0.0f},
{1.0, 1.0f},
{0.0, 1.0f},
};
const int vertex_count = 4;
T3(_3D_DRAW_IMMD_2, (1 + vertex_count * dwords_per_vtx) - 1);
TU( VAP_VF_CNTL__PRIM_TYPE(5) // triangle fan
| VAP_VF_CNTL__PRIM_WALK(3)
| VAP_VF_CNTL__INDEX_SIZE(0)
| VAP_VF_CNTL__VTX_REUSE_DIS(0)
| VAP_VF_CNTL__DUAL_INDEX_MODE(0)
| VAP_VF_CNTL__USE_ALT_NUM_VERTS(0)
| VAP_VF_CNTL__NUM_VERTICES(vertex_count)
);
for (int i = 0; i < vertex_count; i++) {
TF(vertices[i].x);
TF(vertices[i].y);
}
}
void _3d_particle_inner(int particles_length, int position_offset)
{
const int vertex_count = 4 * particles_length;
assert(vertex_count <= 0xffffff);
//////////////////////////////////////////////////////////////////////////////
// VF
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_VF_MAX_VTX_INDX
, VAP_VF_MAX_VTX_INDX__MAX_INDX(vertex_count - 1)
);
T0V(VAP_VF_MIN_VTX_INDX
, VAP_VF_MIN_VTX_INDX__MIN_INDX(0)
);
//////////////////////////////////////////////////////////////////////////////
// AOS
//////////////////////////////////////////////////////////////////////////////
T3(_3D_LOAD_VBPNTR, (4 - 1));
TU( // VAP_VTX_NUM_ARRAYS
VAP_VTX_NUM_ARRAYS__VTX_NUM_ARRAYS(2)
| VAP_VTX_NUM_ARRAYS__VC_FORCE_PREFETCH(1)
);
TU( // VAP_VTX_AOS_ATTR01
VAP_VTX_AOS_ATTR__VTX_AOS_COUNT0(3)
| VAP_VTX_AOS_ATTR__VTX_AOS_STRIDE0(3)
| VAP_VTX_AOS_ATTR__VTX_AOS_COUNT1(2)
| VAP_VTX_AOS_ATTR__VTX_AOS_STRIDE1(2)
);
TU( // VAP_VTX_AOS_ADDR0
(4 * position_offset);
);
TU( // VAP_VTX_AOS_ADDR1
(4 * 0);
);
T3(_NOP, 0);
TU(VERTEXBUFFER_RELOC_INDEX * 4); // index into relocs array for VAP_VTX_AOS_ADDR0
T3(_NOP, 0);
TU(VERTEXBUFFER_RELOC_INDEX * 4); // index into relocs array for VAP_VTX_AOS_ADDR0
//////////////////////////////////////////////////////////////////////////////
// 3D_DRAW
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_ALT_NUM_VERTICES,
vertex_count);
T3(_3D_DRAW_VBUF_2, (1 - 1));
TU( VAP_VF_CNTL__PRIM_TYPE(13) // quad list
| VAP_VF_CNTL__PRIM_WALK(2) // vertex list (data fetched from memory)
| VAP_VF_CNTL__INDEX_SIZE(0)
| VAP_VF_CNTL__VTX_REUSE_DIS(0)
| VAP_VF_CNTL__DUAL_INDEX_MODE(0)
| VAP_VF_CNTL__USE_ALT_NUM_VERTS(1)
| VAP_VF_CNTL__NUM_VERTICES(0)
);
}
void _3d_plane(const shaders& shaders,
const mat4x4& world_to_clip,
float theta)
{
ib_rs_instructions(1);
//////////////////////////////////////////////////////////////////////////////
// VAP OUT
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_OUT_VTX_FMT_0
, VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1));
T0V(VAP_OUT_VTX_FMT_1
, VAP_OUT_VTX_FMT_1__TEX_0_COMP_CNT(4));
//
ib_zbuffer(ZBUFFER_RELOC_INDEX, 1600, 1); // less
int width = 32;
int height = 32;
int macrotile = 0;
int microtile = 0;
int clamp = 0; // wrap/repeat
ib_texture__1(TEXTUREBUFFER_RELOC_INDEX + PLANE_TEXTURE,
width, height,
macrotile, microtile,
clamp);
ib_vap_stream_cntl__2();
// shaders
T0V(US_PIXSIZE
, US_PIXSIZE__PIX_SIZE(4)
);
ib_ga_us(&shaders.fragment[PLANE_SHADER]);
ib_vap_pvs(&shaders.vertex[PLANE_SHADER]);
//////////////////////////////////////////////////////////////////////////////
// VAP
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_CLIP_CNTL
, VAP_CLIP_CNTL__PS_UCP_MODE(3)
);
T0V(VAP_VTE_CNTL
, VAP_VTE_CNTL__VPORT_X_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_X_OFFSET_ENA(1)
| VAP_VTE_CNTL__VPORT_Y_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_Y_OFFSET_ENA(1)
| VAP_VTE_CNTL__VPORT_Z_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_Z_OFFSET_ENA(1)
| VAP_VTE_CNTL__VTX_XY_FMT(0) // enable W division
| VAP_VTE_CNTL__VTX_Z_FMT(0) // enable W division
| VAP_VTE_CNTL__VTX_W0_FMT(1)
| VAP_VTE_CNTL__SERIAL_PROC_ENA(0)
);
T0V(VAP_CNTL_STATUS
, VAP_CNTL_STATUS__PVS_BYPASS(0)
);
//////////////////////////////////////////////////////////////////////////////
// matrix
//////////////////////////////////////////////////////////////////////////////
mat4x4 s = scale(1.0f);
mat4x4 rx = rotate_x(-PI / 2.0f);
mat4x4 local_to_world = s * rx;
mat4x4 trans = world_to_clip * local_to_world;
//////////////////////////////////////////////////////////////////////////////
// consts
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_PVS_STATE_FLUSH_REG, 0x00000000);
const float consts[] = {
// 0
trans[0][0], trans[0][1], trans[0][2], trans[0][3],
trans[1][0], trans[1][1], trans[1][2], trans[1][3],
trans[2][0], trans[2][1], trans[2][2], trans[2][3],
trans[3][0], trans[3][1], trans[3][2], trans[3][3],
// 4
-2.0f, 0, 0, 0,
};
ib_vap_pvs_const_cntl(consts, (sizeof (consts)));
// plane_inner
_3d_plane_inner();
}
void _3d_particle(const shaders& shaders,
const mat4x4& world_to_clip,
const mat4x4& world_to_view,
const particle * particles,
const int particles_length,
const float theta,
float * vertexbuffer_ptr)
{
// enable blending
T0V(RB3D_BLENDCNTL
, RB3D_BLENDCNTL__ALPHA_BLEND_ENABLE__ENABLE
| RB3D_BLENDCNTL__READ_ENABLE(1)
| RB3D_BLENDCNTL__SRCBLEND__GL_ONE
| RB3D_BLENDCNTL__DESTBLEND__GL_ONE
| RB3D_BLENDCNTL__SRC_ALPHA_0_NO_READ(0)
| RB3D_BLENDCNTL__SRC_ALPHA_1_NO_READ(0)
);
ib_rs_instructions(1);
//////////////////////////////////////////////////////////////////////////////
// VAP OUT
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_OUT_VTX_FMT_0
, VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1));
T0V(VAP_OUT_VTX_FMT_1
, VAP_OUT_VTX_FMT_1__TEX_0_COMP_CNT(4));
//
//ib_zbuffer(ZBUFFER_RELOC_INDEX, 1600, 1); // less
T0V(ZB_CNTL
, 0
);
T0V(ZB_ZSTENCILCNTL
, 0
);
int width = 32;
int height = 32;
int macrotile = 0;
int microtile = 0;
int clamp = 0; // wrap/repeat
ib_texture__1(TEXTUREBUFFER_RELOC_INDEX + PARTICLE_TEXTURE,
width, height,
macrotile, microtile,
clamp);
ib_vap_stream_cntl__32();
// shaders
T0V(US_PIXSIZE
, US_PIXSIZE__PIX_SIZE(4)
);
ib_ga_us(&shaders.fragment[PARTICLE_SHADER]);
ib_vap_pvs(&shaders.vertex[PARTICLE_SHADER]);
//////////////////////////////////////////////////////////////////////////////
// VAP
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_CLIP_CNTL
, VAP_CLIP_CNTL__PS_UCP_MODE(3)
);
T0V(VAP_VTE_CNTL
, VAP_VTE_CNTL__VPORT_X_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_X_OFFSET_ENA(1)
| VAP_VTE_CNTL__VPORT_Y_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_Y_OFFSET_ENA(1)
| VAP_VTE_CNTL__VPORT_Z_SCALE_ENA(1)
| VAP_VTE_CNTL__VPORT_Z_OFFSET_ENA(1)
| VAP_VTE_CNTL__VTX_XY_FMT(0) // enable W division
| VAP_VTE_CNTL__VTX_Z_FMT(0) // enable W division
| VAP_VTE_CNTL__VTX_W0_FMT(1)
| VAP_VTE_CNTL__SERIAL_PROC_ENA(0)
);
T0V(VAP_CNTL_STATUS
, VAP_CNTL_STATUS__PVS_BYPASS(0)
);
//////////////////////////////////////////////////////////////////////////////
// matrix
//////////////////////////////////////////////////////////////////////////////
mat4x4 s = scale(1.0f);
mat4x4 local_to_world = s;
mat4x4 local_to_view = world_to_view * local_to_world;
mat4x4 trans = world_to_clip * local_to_world;
//////////////////////////////////////////////////////////////////////////////
// consts
//////////////////////////////////////////////////////////////////////////////
T0V(VAP_PVS_STATE_FLUSH_REG, 0x00000000);
const float scale = 0.005f;
const float consts[] = {
// 0
trans[0][0], trans[0][1], trans[0][2], trans[0][3],
trans[1][0], trans[1][1], trans[1][2], trans[1][3],
trans[2][0], trans[2][1], trans[2][2], trans[2][3],
trans[3][0], trans[3][1], trans[3][2], trans[3][3],
// 4: dx (right)
local_to_view[0][0], local_to_view[0][1], local_to_view[0][2], 0,
// 5: dy (up)
local_to_view[1][0], local_to_view[1][1], local_to_view[1][2], 0,
// 6: xyz:position w:scale
0, 0, 0, scale,
// 7:
-2.0, 0, 0, 0,
};
ib_vap_pvs_const_cntl(consts, (sizeof (consts)));
int offset = particles_length * 4 * 2;
int ix = 0;
for (int i = 0; i < particles_length; i++) {
const vec3& position = particles[i].position;
for (int j = 0; j < 4; j++) {
vertexbuffer_ptr[offset + ix] = position.x;
ix++;
vertexbuffer_ptr[offset + ix] = position.y;
ix++;
vertexbuffer_ptr[offset + ix] = position.z;
ix++;
};
}
asm volatile ("" ::: "memory");
_3d_particle_inner(particles_length, offset);
}
int indirect_buffer(const shaders& shaders,
const particle * particles,
const int particles_length,
float theta,
float * vertexbuffer_ptr)
{
int width = 1600;
int height = 1200;
int pitch = width;
ib_ix = 0;
ib_generic_initialization();
T0V(RB3D_BLENDCNTL, 0);
T0V(RB3D_ABLENDCNTL, 0);
ib_viewport(width, height);
ib_colorbuffer(COLORBUFFER_RELOC_INDEX, pitch, 0, 0);
T0V(GB_ENABLE, 0);
T0V(US_OUT_FMT_0
, US_OUT_FMT__OUT_FMT(0) // C4_8
| US_OUT_FMT__C0_SEL__BLUE
| US_OUT_FMT__C1_SEL__GREEN
| US_OUT_FMT__C2_SEL__RED
| US_OUT_FMT__C3_SEL__ALPHA
| US_OUT_FMT__OUT_SIGN(0)
);
T0V(US_OUT_FMT_1
, US_OUT_FMT__OUT_FMT(15) // render target is not used
);
T0V(US_OUT_FMT_2
, US_OUT_FMT__OUT_FMT(15) // render target is not used
);
T0V(US_OUT_FMT_2
, US_OUT_FMT__OUT_FMT(15) // render target is not used
);
load_pvs_shaders(shaders.vertex, shaders.vertex_length);
load_us_shaders(shaders.fragment, shaders.fragment_length);
//////////////////////////////////////////////////////////////////////////////
// DRAW
//////////////////////////////////////////////////////////////////////////////
mat4x4 aspect = scale(vec3(3.0f/4.0f, 1, 1));
mat4x4 p = perspective(0.01f, 3.0f,
0.001f, 0.999f,
1.0f, 3.0f);
mat4x4 t = translate(vec3(0, 0, 1));
mat4x4 rx = rotate_x(-PI / 8.0f);
mat4x4 ry = rotate_y(theta * 0.8f);
mat4x4 world_to_view = t * rx * ry;
mat4x4 world_to_clip = aspect * p * world_to_view;
_3d_clear(shaders);
_3d_plane(shaders, world_to_clip, theta);
_3d_particle(shaders,
world_to_clip,
world_to_view,
particles,
particles_length,
theta,
vertexbuffer_ptr);
//////////////////////////////////////////////////////////////////////////////
// padding
//////////////////////////////////////////////////////////////////////////////
while ((ib_ix % 8) != 0) {
TU(0x80000000);
}
assert((unsigned int)ib_ix < (sizeof (ib)) / (sizeof (ib[0])));
return ib_ix;
}
void reset_particle(particle& p)
{
//vec3 pos = normalize(p.position);
p.position = normalize(vec3(p.velocity.x,
0,
p.velocity.z)) * 20.0f;
//printf("position %f %f %f\n", p.position.x, p.position.y, p.position.z);
p.velocity = vec3(p.velocity.x,
2.0f * p.delta,
p.velocity.z);
//printf("velocity %f %f %f\n\n", p.velocity.x, p.velocity.y, p.velocity.z);
}
void init_particles(particle * particles, const int particles_length)
{
uint32_t state = 0x12345678;
const float rl = 1.0f / (float)(particles_length);
for (int i = 0; i < particles_length; i++) {
float fi = ((float)i);
float sx = xorshift32f(state) * 2.0f - 1.0f;
float sy = xorshift32f(state) * 2.0f - 1.0f;
float sz = xorshift32f(state) * 2.0f - 1.0f;
float delta = xorshift32f(state) * 0.5f + 0.5f;
float vx = xorshift32f(state) * 2.0f - 1.0f;
float vz = xorshift32f(state) * 2.0f - 1.0f;
particles[i].time = max_age * sinf(fi * rl * 2) * 0.5f + 0.5f;
particles[i].delta = delta;
particles[i].position.x = sx;
particles[i].position.y = sy;
particles[i].position.z = sz;
particles[i].velocity = normalize(vec3(vx * 0.5f, 0.0f, vz * 0.5f));
}
}
int init_particles_vertexbuffer(int fd, int particles_length, float ** ptr_out)
{
const vec2 vertices[] = {
{0.0, 0.0f},
{1.0, 0.0f},
{1.0, 1.0f},
{0.0, 1.0f},
};
const int vertex_count = 4;
const int size = particles_length * vertex_count * 2 * (sizeof (float))
+ particles_length * vertex_count * 3 * (sizeof (float));
printf("%d size %d\n", particles_length, size);
void * ptr;
int handle = create_buffer(fd, size, &ptr);
float * ptrf = (float*)ptr;
int ix = 0;
for (int j = 0; j < particles_length; j++) {
for (int i = 0; i < vertex_count; i++) {
ptrf[ix++] = vertices[i].x;
ptrf[ix++] = vertices[i].y;
}
}
printf("init vertexbuffer %d %d\n", ix, size);
assert(ptr_out != NULL);
*ptr_out = ptrf;
return handle;
}
int main()
{
struct shaders shaders = {
.vertex = load_shaders(vertex_shader_paths, vertex_shader_paths_length),
.fragment = load_shaders(fragment_shader_paths, fragment_shader_paths_length),
.vertex_length = vertex_shader_paths_length,
.fragment_length = fragment_shader_paths_length,
};
void * rmmio = map_pci_resource2();
int fd = open("/dev/dri/card0", O_RDWR | O_CLOEXEC);
assert(fd != -1);
const int colorbuffer_size = 1600 * 1200 * 4;
int colorbuffer_handle[2];
int zbuffer_handle;
int * texturebuffer_handle;
//int flush_handle;
int vertexbuffer_handle;
void * colorbuffer_ptr[2];
void * zbuffer_ptr;
float * vertexbuffer_ptr;
// colorbuffer
colorbuffer_handle[0] = create_buffer(fd, colorbuffer_size, &colorbuffer_ptr[0]);
colorbuffer_handle[1] = create_buffer(fd, colorbuffer_size, &colorbuffer_ptr[1]);
zbuffer_handle = create_buffer(fd, colorbuffer_size, &zbuffer_ptr);
//flush_handle = create_flush_buffer(fd);
texturebuffer_handle = load_textures(fd, textures, textures_length);
fprintf(stderr, "colorbuffer handle[0] %d\n", colorbuffer_handle[0]);
fprintf(stderr, "colorbuffer handle[1] %d\n", colorbuffer_handle[1]);
fprintf(stderr, "zbuffer handle %d\n", zbuffer_handle);
int colorbuffer_ix = 0;
float theta = PI * 0.5;
particle particles[512 * 256] = {};
const int particles_length = (sizeof (particles)) / (sizeof (particles[0]));
vertexbuffer_handle = init_particles_vertexbuffer(fd, particles_length, &vertexbuffer_ptr);
init_particles(particles, particles_length);
fprintf(stderr, "vertexbuffer handle %d\n", vertexbuffer_handle);
while (true) {
int ib_dwords = indirect_buffer(shaders,
particles,
particles_length,
theta,
vertexbuffer_ptr);
int ret = drm_radeon_cs(fd,
colorbuffer_handle[colorbuffer_ix],
zbuffer_handle,
vertexbuffer_handle,
texturebuffer_handle,
textures_length,
ib_dwords);
if (ret == -1)
break;
primary_surface_address(rmmio, colorbuffer_ix);
// next state
theta += 0.01f;
colorbuffer_ix = (colorbuffer_ix + 1) & 1;
//
// update particles
//
for (int i = 0; i < particles_length; i++) {
if (particles[i].time <= 0) {
particles[i].time += max_age;
reset_particle(particles[i]);
} else {
particles[i].time -= 0.01f;
particles[i].position += vec3(particles[i].velocity.x * 0.9f,
particles[i].velocity.y * 5.0f,
particles[i].velocity.z * 0.9f);
particles[i].velocity += vec3(0, -0.04, 0);
if (particles[i].position.y < 0) {
particles[i].position.y = fabsf(particles[i].position.y);
particles[i].velocity.y *= -0.6f;
}
}
}
}
{
printf("colorbuffer0.data\n");
int out_fd = open("colorbuffer0.data", O_RDWR|O_CREAT, 0644);
assert(out_fd >= 0);
ssize_t write_length = write(out_fd, colorbuffer_ptr[0], colorbuffer_size);
assert(write_length == colorbuffer_size);
close(out_fd);
}
{
printf("zbuffer.data\n");
int out_fd = open("zbuffer.data", O_RDWR|O_CREAT, 0644);
assert(out_fd >= 0);
ssize_t write_length = write(out_fd, zbuffer_ptr, colorbuffer_size);
assert(write_length == colorbuffer_size);
close(out_fd);
}
close(fd);
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,10 @@
-- temp[0]: texture coordinate
TEX TEX_SEM_WAIT TEX_SEM_ACQUIRE
temp[0].rgba = LD tex[0].rgba temp[0].rgaa ;
OUT TEX_SEM_WAIT
src0.a = temp[0] ,
src0.rgb = temp[0] :
out[0].a = MAX src0.a src0.a ,
out[0].rgb = MAX src0.rgb src0.rgb ;

Binary file not shown.

View File

@ -0,0 +1,30 @@
-- const[0-3]: transform matrix
-- const[4]: particle_position
-- const[5]: dx
-- const[6]: dy
-- input[0]: position coordinate
-- input[1]: texture coordinate
--
-- dot(m[0], v), dot(m[1], v), dot(m[2], v), dot(m[3], v)
--
-- ppos = particle_position
temp[0].xyz = VE_ADD const[4].xyz_ const[4].000_ ;
-- ppos = position.xxx * dx + ppos
temp[0].xyz = VE_MAD input[0].xxx_ const[5].xyz_ temp[0].xyz_ ;
-- ppos = position.yyy * dy + ppos
temp[0].xyz = VE_MAD input[0].yyy_ const[6].xyz_ temp[0].xyz_ ;
-- scale
temp[0].xyzw = VE_MUL temp[0].xyz1 temp[0].1111 ;
-- ppos = transform_matrix * ppos
temp[1].x = VE_DOT const[0].xyzw temp[0].xyzw ;
temp[1].y = VE_DOT const[1].xyzw temp[0].xyzw ;
temp[1].z = VE_DOT const[2].xyzw temp[0].xyzw ;
temp[1].w = VE_DOT const[3].xyzw temp[0].xyzw ;
out[0].xyzw = VE_MAD temp[1].xyzw temp[1].1111 temp[1].0000 ;
out[1].xyzw = VE_MAX input[1].xyzw input[1].xyzw ;

Binary file not shown.

View File

@ -0,0 +1,32 @@
-- const[0-3] : transform matrix
-- const[4] : dx
-- const[5] : dy
-- const[6].xyz: particle_position
-- const[6].w : scale
-- input[0]: position coordinate
-- input[1]: texture coordinate
--
-- dot(m[0], v), dot(m[1], v), dot(m[2], v), dot(m[3], v)
--
-- ppos = particle_position
temp[0].xyz = VE_ADD const[6].xyz_ const[6].000_ ;
-- ppos = position.xxx * dx + ppos
temp[0].xyz = VE_MAD input[0].xxx_ const[4].xyz_ temp[0].xyz_ ;
-- ppos = position.yyy * dy + ppos
temp[0].xyz = VE_MAD input[0].yyy_ const[5].xyz_ temp[0].xyz_ ;
-- ppos *= scale
temp[0].xyzw = VE_MUL temp[0].xyz1 const[6].www1 ;
-- ppos = transform_matrix * ppos
temp[1].x = VE_DOT const[0].xyzw temp[0].xyzw ;
temp[1].y = VE_DOT const[1].xyzw temp[0].xyzw ;
temp[1].z = VE_DOT const[2].xyzw temp[0].xyzw ;
temp[1].w = VE_DOT const[3].xyzw temp[0].xyzw ;
out[0].xyzw = VE_MAD temp[1].xyzw temp[1].1111 temp[1].0000 ;
out[1].xyzw = VE_MAX input[1].xyzw input[1].xyzw ;

Binary file not shown.

View File

@ -0,0 +1,37 @@
-- const[0-3] : transform matrix
-- const[4] : dx
-- const[5] : dy
-- const[6].xyz: particle_position
-- const[6].w : scale
-- const[7].x : -2.0
-- input[0]: texture coordinate
--
-- dot(m[0], v), dot(m[1], v), dot(m[2], v), dot(m[3], v)
--
-- calculate position from texture coordinate
-- x = y * -2 + 1
-- y = x * -2 + 1
temp[2].xy = VE_MAD input[0].yx__ const[7].xx__ input[0].11__ ;
-- ppos = particle_position
temp[0].xyz = VE_ADD const[6].xyz_ const[6].000_ ;
-- ppos = position.xxx * dx + ppos
temp[0].xyz = VE_MAD temp[2].xxx_ const[4].xyz_ temp[0].xyz_ ;
-- ppos = position.yyy * dy + ppos
temp[0].xyz = VE_MAD temp[2].yyy_ const[5].xyz_ temp[0].xyz_ ;
-- ppos *= scale
temp[0].xyzw = VE_MUL temp[0].xyz1 const[6].www1 ;
-- ppos = transform_matrix * ppos
temp[1].x = VE_DOT const[0].xyzw temp[0].xyzw ;
temp[1].y = VE_DOT const[1].xyzw temp[0].xyzw ;
temp[1].z = VE_DOT const[2].xyzw temp[0].xyzw ;
temp[1].w = VE_DOT const[3].xyzw temp[0].xyzw ;
out[0].xyzw = VE_MAD temp[1].xyzw temp[1].1111 temp[1].0000 ;
out[1].xyzw = VE_MAX input[0].xy00 input[0].xy00 ;

Binary file not shown.

View File

@ -0,0 +1,38 @@
-- const[0-3] : transform matrix
-- const[4] : dx
-- const[5] : dy
---- const[6].xyz: particle_position
-- const[6].w : scale
-- const[7].x : -2.0
-- input[0]: particle position
-- input[1]: texture coordinate
--
-- dot(m[0], v), dot(m[1], v), dot(m[2], v), dot(m[3], v)
--
-- calculate position from texture coordinate
-- x = y * -2 + 1
-- y = x * -2 + 1
temp[2].xy = VE_MAD input[1].yx__ const[7].xx__ input[1].11__ ;
-- ppos = particle_position
temp[0].xyz = VE_ADD input[0].xyz_ input[0].000_ ;
-- ppos = position.xxx * dx + ppos
temp[0].xyz = VE_MAD temp[2].xxx_ const[4].xyz_ temp[0].xyz_ ;
-- ppos = position.yyy * dy + ppos
temp[0].xyz = VE_MAD temp[2].yyy_ const[5].xyz_ temp[0].xyz_ ;
-- ppos = vec4(ppos.xyz * scale.xyz, age)
temp[0].xyzw = VE_MAD temp[0].xyz0 const[6].www0 input[0].000w ;
-- ppos = transform_matrix * ppos
temp[1].x = VE_DOT const[0].xyzw temp[0].xyz1 ;
temp[1].y = VE_DOT const[1].xyzw temp[0].xyz1 ;
temp[1].z = VE_DOT const[2].xyzw temp[0].xyz1 ;
temp[1].w = VE_DOT const[3].xyzw temp[0].xyz1 ;
out[0].xyzw = VE_ADD temp[1].xyzw temp[1].0000 ;
out[1].xyzw = VE_ADD input[1].xy00 temp[0].00zw ; -- age

Binary file not shown.

Some files were not shown because too many files have changed in this diff Show More