diff --git a/cube_ta_fullscreen.c b/cube_ta_fullscreen.c new file mode 100644 index 0000000..0194e3a --- /dev/null +++ b/cube_ta_fullscreen.c @@ -0,0 +1,783 @@ +#include + +/* + This demo does not work in emulators: + + - Flycast does not work because it strangely is incapable of displaying a + single rendered frame. + + - Devcast does not work because it does not perform (the equivalent of) boot + rom initialization when loading .elf files + + In an attempt to reduce boilerplate, this demo presumes the boot rom has + initialized Holly with the values needed to display the "PRODUCED BY OR UNDER + LICENSE FROM SEGA ENTERPRESES, LTD." screen, and that no register values have + been modified beyond boot rom initialization. + */ + +/* Texture memory access + + texture_memory64 and texture_memory32 refer two different addressing schemes + over the same 8MB of physical texture memory. + + Generally speaking the texture_memory64 address scheme is used for textures + (any texture memory address referenced by `texture_control_word`), and + texture_memory32 is used for everything else. + + E_DC_HW_outline.pdf "2.4 System memory mapping" (PDF page 10) + */ +const uint32_t texture_memory32 = 0xa5000000; + +/* The "TA Polygon Converter FIFO" is a Holly functional unit. */ +const uint32_t ta_polygon_converter_fifo = 0x10000000; + +/* The "Store Queue" is a SH4 functional unit. */ +const uint32_t store_queue = 0xe0000000; + +/****************************************************************************** + Region array + ******************************************************************************/ + +/* + These "region array entries" are briefly illustrated in DCDBSysArc990907E.pdf + page 168, 177-180. + + The number of list pointers per region array entry is affected by + FPU_PARAM_CFG "Region Header Type" (page 368). This struct models the + "6 × 32bit/Tile Type 2" mode. +*/ +typedef struct region_array_entry { + uint32_t tile; + struct { + uint32_t opaque; + uint32_t opaque_modifier_volume; + uint32_t translucent; + uint32_t translucent_modifier_volume; + uint32_t punch_through; + } list_pointer; +} region_array_entry; +static_assert((sizeof (struct region_array_entry)) == 4 * 6); + +/* + DCDBSysArc990907E.pdf page 216-217 describes the REGION_ARRAY__ bit fields: + */ +#define REGION_ARRAY__TILE__LAST_REGION (1 << 31) +#define REGION_ARRAY__TILE__Y_POSITION(n) (((n) & 0x3f) << 8) +#define REGION_ARRAY__TILE__X_POSITION(n) (((n) & 0x3f) << 2) + +#define REGION_ARRAY__LIST_POINTER__EMPTY (1 << 31) +#define REGION_ARRAY__LIST_POINTER__OBJECT_LIST(n) (((n) & 0xfffffc) << 0) + +void transfer_region_array(uint32_t region_array_start, + uint32_t opaque_list_pointer) +{ + /* + Create a minimal region array with a single entry: + - one tile at tile coordinate (0, 0) with one opaque list pointer + */ + + /* + Holly reads the region array from "32-bit" texture memory address space, + so the region array is correspondingly written from "32-bit" address space. + */ + volatile region_array_entry * region_array = (volatile region_array_entry *)(texture_memory32 + region_array_start); + + const int num_tiles_x = 640 / 32; + const int num_tiles_y = 480 / 32; + const int num_tiles = num_tiles_x * num_tiles_y; + + for (int i = 0; i < num_tiles; i++) { + /* define one region array entry per 32×32 px tile over a 640x480 px area */ + + int x = i % num_tiles_x; + int y = i / num_tiles_x; + + bool last_tile = (i == (num_tiles - 1)); + + region_array[i].tile + = (last_tile ? REGION_ARRAY__TILE__LAST_REGION : 0) + | REGION_ARRAY__TILE__Y_POSITION(y) + | REGION_ARRAY__TILE__X_POSITION(x); + + /* + list pointers are offsets relative to the beginning of "32-bit" texture memory. + + Each list type uses different rasterization steps, "opaque" being the fastest and most efficient. + */ + + /* + In all previous demos, a single `opaque_list_pointer` was used for all + tiles. This was correct in the cases where: + + - we were generating our own object lists + - we were using the TA with a single tile + + However, this is no longer correct for this example, where we are both + using the TA and multiple tiles simultaneously. In this case, the TA's + "object pointer block" allocation strategy needs to implemented here. + + See DCDBSysArc990907E.pdf page 178,179 and 186 for a relatively weak + explanation of the TA's OPB allocation behavior. + */ + + + // This example is using TA_ALLOC_CTRL__O_OPB__8X4BYTE, so each OPB is 8×4 + // bytes. They are (un)coincidentally stored in the same order that the + // tile x position and tile y position coordinates are calculated above. + int opb_pointer = opaque_list_pointer + i * 8 * 4; + region_array[i].list_pointer.opaque = REGION_ARRAY__LIST_POINTER__OBJECT_LIST(opb_pointer); + region_array[i].list_pointer.opaque_modifier_volume = REGION_ARRAY__LIST_POINTER__EMPTY; + region_array[i].list_pointer.translucent = REGION_ARRAY__LIST_POINTER__EMPTY; + region_array[i].list_pointer.translucent_modifier_volume = REGION_ARRAY__LIST_POINTER__EMPTY; + region_array[i].list_pointer.punch_through = REGION_ARRAY__LIST_POINTER__EMPTY; + } +} + +/****************************************************************************** + ISP/TSP Parameter + ******************************************************************************/ + +/* + Other examples of possible ISP/TSP parameter formats are shown on + DCDBSysArc990907E.pdf page 221. Page 221 is non-exhaustive, and many + permutations are possible. + + Parameter format selection is controlled mostly by the value of the + `isp_tsp_instruction_word` (always present). + + This is most similar to the "2 Stripped Triangle Polygon (Non-Textured, + Gouraud)" example (except this is for a non-strip triangle). +*/ +typedef struct isp_tsp_parameter__vertex { + float x; + float y; + float z; + uint32_t color; +} isp_tsp_parameter__vertex; + +typedef struct isp_tsp_parameter__polygon { + uint32_t isp_tsp_instruction_word; + uint32_t tsp_instruction_word; + uint32_t texture_control_word; + isp_tsp_parameter__vertex a; + isp_tsp_parameter__vertex b; + isp_tsp_parameter__vertex c; +} isp_tsp_parameter__polygon; + +/* + isp_tsp_instruction_word bits + + DCDBSysArc990907E.pdf page 222-225 + */ +#define ISP_TSP_INSTRUCTION_WORD__DEPTH_COMPARE_MODE__ALWAYS (7 << 29) +#define ISP_TSP_INSTRUCTION_WORD__DEPTH_COMPARE_MODE__GREATER (4 << 29) + +#define ISP_TSP_INSTRUCTION_WORD__CULLING_MODE__NO_CULLING (0 << 27) + +#define ISP_TSP_INSTRUCTION_WORD__GOURAUD_SHADING (1 << 23) + +/* + tsp_instruction_word bits + + DCDBSysArc990907E.pdf page 226-232 + */ +#define TSP_INSTRUCTION_WORD__SRC_ALPHA_INSTR__ONE (1 << 29) +#define TSP_INSTRUCTION_WORD__DST_ALPHA_INSTR__ZERO (0 << 26) +#define TSP_INSTRUCTION_WORD__FOG_CONTROL__NO_FOG (0b10 << 22) + +void transfer_isp_tsp_background_parameter(uint32_t isp_tsp_parameter_start) +{ + /* + Create a minimal background parameter: + - non-textured + - packed color + - single volume + */ + + volatile isp_tsp_parameter__polygon * params = (volatile isp_tsp_parameter__polygon *)(texture_memory32 + isp_tsp_parameter_start); + + params[0].isp_tsp_instruction_word = ISP_TSP_INSTRUCTION_WORD__DEPTH_COMPARE_MODE__ALWAYS + | ISP_TSP_INSTRUCTION_WORD__CULLING_MODE__NO_CULLING; + + params[0].tsp_instruction_word = TSP_INSTRUCTION_WORD__SRC_ALPHA_INSTR__ONE + | TSP_INSTRUCTION_WORD__DST_ALPHA_INSTR__ZERO + | TSP_INSTRUCTION_WORD__FOG_CONTROL__NO_FOG; + + params[0].texture_control_word = 0; + + // top left + params[0].a.x = 0.0f; + params[0].a.y = 0.0f; + params[0].a.z = 0.00001f; + params[0].a.color = 0xff00ff; // magenta + + // top right + params[0].b.x = 32.0f; + params[0].b.y = 0.0f; + params[0].b.z = 0.00001f; + params[0].b.color = 0xff00ff; // magenta + + // bottom right + params[0].c.x = 32.0f; + params[0].c.y = 32.0f; + params[0].c.z = 0.00001f; + params[0].c.color = 0xff00ff; // magenta + + // bottom left (implied) +} + +/* background */ +#define ISP_BACKGND_T__SKIP(n) (((n) & 0x7) << 24) +#define ISP_BACKGND_T__TAG_ADDRESS(n) (((n) & 0x1fffff) << 3) +#define ISP_BACKGND_T__TAG_OFFSET(n) (((n) & 0x7) << 0) + +/****************************************************************************** + SH4 store queue + ******************************************************************************/ + +/* + The TA polygon converter FIFO requires 32-byte bus access. Attempts to access + the TA with smaller bus accesses will result in incorrect TA operation. The + Dreamcast has three mechanisms that can generate 32-byte writes: + + - SH4 store queue (commonly used) + + - Holly CH2-DMA (commonly used) + + - meticulous and clever use of SH4 cache writeback (esoteric forbidden technique) + + Of these, the mechanism that requires the least code is the SH4 store queue, + so this demo will also use the SH4 store queue for that reason. + + The SH4 store queue is described in sh7091pm_e.pdf printed page 61-64 and + 79-81. +*/ + +// sh7091pm_e.pdf: +// > Issuing a PREF instruction for P4 area H'E000 0000 to H'E3FF FFFC starts a +// > burst transfer from the SQs to external memory. +#define pref(address) \ + { asm volatile ("pref @%0" : : "r" (address) : "memory"); } + +volatile uint32_t * SH7091__CCN__QACR0 = (volatile uint32_t *)(0xff000000 + 0x38); +volatile uint32_t * SH7091__CCN__QACR1 = (volatile uint32_t *)(0xff000000 + 0x3c); + +/****************************************************************************** + TA Parameters + ******************************************************************************/ + +/* + The primary advantage of using the TA: it will generate object lists on your + behalf, and does a reasonable job of excluding object list entries from tiles + that are entirely outside the area of that triangle. + + In addition, the TA can be used to perform floating point to integer color + packing, including color component clamping. On the SH4, each floating point + to integer color conversion requires at least 50-60 clock cycles, whereas the + TA can do the same conversion much more quickly (~1 clock cycle). + + Floating point color is typical when performing (colored) lighting/shading + calculations. + */ + +/* + TA parameters are roughly superset of CORE ISP/TSP parameters. + + There are a few differences: + + - the TA overwrites certain ISP/TSP Instruction Word bits, based on duplicated + values in the TA Parameter Control Word (DCDBSysArc990907E.pdf page 200) + + - the TA supports several (floating point) vertex color formats, whereas CORE + exclusively supports 32-bit packed integer ARGB color. + */ + +typedef struct ta_global_parameter__polygon_type_0 { + uint32_t parameter_control_word; + uint32_t isp_tsp_instruction_word; + uint32_t tsp_instruction_word; + uint32_t texture_control_word; + uint32_t _zero0; + uint32_t _zero1; + uint32_t data_size_for_sort_dma; + uint32_t next_address_for_sort_dma; +} ta_global_parameter__polygon_type_0; +static_assert((sizeof (struct ta_global_parameter__polygon_type_0)) == 32); + +typedef struct ta_global_parameter__end_of_list { + uint32_t parameter_control_word; + uint32_t _zero0; + uint32_t _zero1; + uint32_t _zero2; + uint32_t _zero3; + uint32_t _zero4; + uint32_t _zero5; + uint32_t _zero6; +} ta_global_parameter__end_of_list; +static_assert((sizeof (struct ta_global_parameter__end_of_list)) == 32); + +/* + The TA only supports polygon/triangle vertex input represented as a triangle + strip. TA triangle strips can be any length between 1 and infinity (or the end + of texture memory, whichever comes first). CORE triangle strips can be any + length between 1 and 6. The TA automatically splits infinite-length strips + into strip lengths that CORE supports. + + See DCDBSysArc990907E.pdf page 181. + */ +typedef struct ta_vertex_parameter__polygon_type_0 { + uint32_t parameter_control_word; + float x; + float y; + float z; + uint32_t _zero0; + uint32_t _zero1; + uint32_t base_color; + uint32_t _zero2; +} ta_vertex_parameter__polygon_type_0; +static_assert((sizeof (struct ta_vertex_parameter__polygon_type_0)) == 32); + +#define PARAMETER_CONTROL_WORD__PARA_CONTROL__PARA_TYPE__END_OF_LIST (0 << 29) +#define PARAMETER_CONTROL_WORD__PARA_CONTROL__PARA_TYPE__POLYGON_OR_MODIFIER_VOLUME (4 << 29) +#define PARAMETER_CONTROL_WORD__PARA_CONTROL__PARA_TYPE__VERTEX_PARAMETER (7 << 29) +#define PARAMETER_CONTROL_WORD__PARA_CONTROL__END_OF_STRIP (1 << 28) +#define PARAMETER_CONTROL_WORD__PARA_CONTROL__LIST_TYPE__OPAQUE (0 << 24) +#define PARAMETER_CONTROL_WORD__OBJ_CONTROL__COL_TYPE__PACKED_COLOR (0 << 4) +#define PARAMETER_CONTROL_WORD__OBJ_CONTROL__GOURAUD (1 << 1) + +static inline uint32_t transfer_ta_global_end_of_list(uint32_t store_queue_ix) +{ + // + // TA "end of list" global transfer + // + + volatile ta_global_parameter__end_of_list * end_of_list = (volatile ta_global_parameter__end_of_list *)store_queue_ix; + + end_of_list->parameter_control_word = PARAMETER_CONTROL_WORD__PARA_CONTROL__PARA_TYPE__END_OF_LIST; + end_of_list->_zero0 = 0; + end_of_list->_zero1 = 0; + end_of_list->_zero2 = 0; + end_of_list->_zero3 = 0; + end_of_list->_zero4 = 0; + end_of_list->_zero5 = 0; + end_of_list->_zero6 = 0; + + // start store queue transfer of `end_of_list` to the TA + pref(store_queue_ix); + + store_queue_ix += (sizeof (ta_global_parameter__end_of_list)); + + return store_queue_ix; +} + +static inline uint32_t transfer_ta_vertex_triangle(uint32_t store_queue_ix, + float ax, float ay, float az, uint32_t ac, + float bx, float by, float bz, uint32_t bc, + float cx, float cy, float cz, uint32_t cc) +{ + // + // TA polygon vertex transfer + // + + volatile ta_vertex_parameter__polygon_type_0 * vertex = (volatile ta_vertex_parameter__polygon_type_0 *)store_queue_ix; + + // bottom left + vertex[0].parameter_control_word = PARAMETER_CONTROL_WORD__PARA_CONTROL__PARA_TYPE__VERTEX_PARAMETER; + vertex[0].x = ax; + vertex[0].y = ay; + vertex[0].z = az; + vertex[0]._zero0 = 0; + vertex[0]._zero1 = 0; + vertex[0].base_color = ac; + vertex[0]._zero2 = 0; + + // start store queue transfer of `vertex[0]` to the TA + pref(store_queue_ix + 32 * 0); + + // top center + vertex[1].parameter_control_word = PARAMETER_CONTROL_WORD__PARA_CONTROL__PARA_TYPE__VERTEX_PARAMETER; + vertex[1].x = bx; + vertex[1].y = by; + vertex[1].z = bz; + vertex[1]._zero0 = 0; + vertex[1]._zero1 = 0; + vertex[1].base_color = bc; + vertex[1]._zero2 = 0; + + // start store queue transfer of `vertex[1]` to the TA + pref(store_queue_ix + 32 * 1); + + // bottom right + vertex[2].parameter_control_word = PARAMETER_CONTROL_WORD__PARA_CONTROL__PARA_TYPE__VERTEX_PARAMETER + | PARAMETER_CONTROL_WORD__PARA_CONTROL__END_OF_STRIP; + vertex[2].x = cx; + vertex[2].y = cy; + vertex[2].z = cz; + vertex[2]._zero0 = 0; + vertex[2]._zero1 = 0; + vertex[2].base_color = cc; + vertex[2]._zero2 = 0; + + // start store queue transfer of `params[2]` to the TA + pref(store_queue_ix + 32 * 2); + + store_queue_ix += (sizeof (ta_vertex_parameter__polygon_type_0)) * 3; + + return store_queue_ix; +} + +static inline uint32_t transfer_ta_global_polygon(uint32_t store_queue_ix) +{ + // + // TA polygon global transfer + // + + volatile ta_global_parameter__polygon_type_0 * polygon = (volatile ta_global_parameter__polygon_type_0 *)store_queue_ix; + + polygon->parameter_control_word = PARAMETER_CONTROL_WORD__PARA_CONTROL__PARA_TYPE__POLYGON_OR_MODIFIER_VOLUME + | PARAMETER_CONTROL_WORD__PARA_CONTROL__LIST_TYPE__OPAQUE + | PARAMETER_CONTROL_WORD__OBJ_CONTROL__COL_TYPE__PACKED_COLOR + | PARAMETER_CONTROL_WORD__OBJ_CONTROL__GOURAUD; + + polygon->isp_tsp_instruction_word = ISP_TSP_INSTRUCTION_WORD__DEPTH_COMPARE_MODE__GREATER + | ISP_TSP_INSTRUCTION_WORD__CULLING_MODE__NO_CULLING; + // Note that it is not possible to use + // ISP_TSP_INSTRUCTION_WORD__GOURAUD_SHADING in this isp_tsp_instruction_word, + // because `gouraud` is one of the bits overwritten by the value in + // parameter_control_word. See DCDBSysArc990907E.pdf page 200. + + polygon->tsp_instruction_word = TSP_INSTRUCTION_WORD__SRC_ALPHA_INSTR__ONE + | TSP_INSTRUCTION_WORD__DST_ALPHA_INSTR__ZERO + | TSP_INSTRUCTION_WORD__FOG_CONTROL__NO_FOG; + + polygon->texture_control_word = 0; + + polygon->_zero0 = 0; + polygon->_zero1 = 0; + polygon->data_size_for_sort_dma = 0; + polygon->next_address_for_sort_dma = 0; + + // start store queue transfer of `polygon` to the TA + pref(store_queue_ix); + + store_queue_ix += (sizeof (ta_global_parameter__polygon_type_0)); + + return store_queue_ix; +} + +/* + These vertex and face definitions are a trivial transformation of the default + Blender cube, as exported by the .obj exporter (with triangulation enabled). + */ +typedef struct vec3 { + float x; + float y; + float z; +} vec3; + +static const vec3 cube_vertex_position[] = { + { 1.0f, 1.0f, -1.0f }, + { 1.0f, -1.0f, -1.0f }, + { 1.0f, 1.0f, 1.0f }, + { 1.0f, -1.0f, 1.0f }, + { -1.0f, 1.0f, -1.0f }, + { -1.0f, -1.0f, -1.0f }, + { -1.0f, 1.0f, 1.0f }, + { -1.0f, -1.0f, 1.0f }, +}; + +static const uint32_t cube_vertex_color[] = { + 0xff0000, // red + 0x00ff00, // green + 0x0000ff, // blue + 0xffff00, // yellow + 0x00ffff, // cyan + 0xff00ff, // magenta + 0xff7f00, // orange + 0x7f00ff, // violet +}; + +typedef struct face { + int a; + int b; + int c; +} face; + +/* + It is also possible to submit each cube face as a 4-vertex triangle strip, or + submit the entire cube as a single triangle strip. + + Separate 3-vertex triangles are chosen to make this example more + straightforward, but this is not the best approach if high performance is + desired. + */ + +static const face cube_faces[] = { + {4, 2, 0}, + {2, 7, 3}, + {6, 5, 7}, + {1, 7, 5}, + {0, 3, 1}, + {4, 1, 5}, + {4, 6, 2}, + {2, 6, 7}, + {6, 4, 5}, + {1, 3, 7}, + {0, 2, 3}, + {4, 0, 1}, +}; +static const int cube_faces_length = (sizeof (cube_faces)) / (sizeof (cube_faces[0])); + +#define cos(n) __builtin_cosf(n) +#define sin(n) __builtin_sinf(n) + +float theta = 0.7853981633974483f; // pi / 4 + +static inline vec3 vertex_rotate(vec3 v) +{ + // to make the cube's appearance more interesting, rotate the vertex on two + // axes + + float x0 = v.x; + float y0 = v.y; + float z0 = v.z; + + float x1 = x0 * cos(theta) - z0 * sin(theta); + float y1 = y0; + float z1 = x0 * sin(theta) + z0 * cos(theta); + + float x2 = x1; + float y2 = y1 * cos(theta) - z1 * sin(theta); + float z2 = y1 * sin(theta) + z1 * cos(theta); + + return (vec3){x2, y2, z2}; +} + +static inline vec3 vertex_perspective_divide(vec3 v) +{ + float w = 1.0f / (v.z + 3.0f); + return (vec3){v.x * w, v.y * w, w}; +} + +static inline vec3 vertex_screen_space(vec3 v) +{ + return (vec3){ + v.x * 240.f + 320.f, + v.y * 240.f + 240.f, + v.z, + }; +} + +void transfer_ta_cube() +{ + // set the store queue destination address to the TA Polygon Converter FIFO + *SH7091__CCN__QACR0 = ((ta_polygon_converter_fifo >> 24) & 0b11100); + *SH7091__CCN__QACR1 = ((ta_polygon_converter_fifo >> 24) & 0b11100); + + uint32_t store_queue_ix = store_queue; + + // See sh7091pm_e.pdf, printed page 79: + // + // > While the contents of one SQ are being transferred to external memory, + // > the other SQ can be written to without a penalty cycle, but writing to + // > the SQ involved in the transfer to external memory is deferred until the + // > transfer is completed. + // + // The reason for incrementing store_queue_ix is that it is a cheap way to + // track which store queue is the most/least recently used--encoded in bit 5 + // of the store queue address. + + store_queue_ix = transfer_ta_global_polygon(store_queue_ix); + + for (int face_ix = 0; face_ix < cube_faces_length; face_ix++) { + int ia = cube_faces[face_ix].a; + int ib = cube_faces[face_ix].b; + int ic = cube_faces[face_ix].c; + + vec3 va = vertex_screen_space( + vertex_perspective_divide( + vertex_rotate(cube_vertex_position[ia]))); + + vec3 vb = vertex_screen_space( + vertex_perspective_divide( + vertex_rotate(cube_vertex_position[ib]))); + + vec3 vc = vertex_screen_space( + vertex_perspective_divide( + vertex_rotate(cube_vertex_position[ic]))); + + uint32_t va_color = cube_vertex_color[ia]; + uint32_t vb_color = cube_vertex_color[ib]; + uint32_t vc_color = cube_vertex_color[ic]; + + store_queue_ix = transfer_ta_vertex_triangle(store_queue_ix, + va.x, va.y, va.z, va_color, + vb.x, vb.y, vb.z, vb_color, + vc.x, vc.y, vc.z, vc_color); + } + + store_queue_ix = transfer_ta_global_end_of_list(store_queue_ix); +} + +/****************************************************************************** + Holly register definitions + ******************************************************************************/ + +volatile uint32_t * SOFTRESET = (volatile uint32_t *)(0xa05f8000 + 0x08); +volatile uint32_t * STARTRENDER = (volatile uint32_t *)(0xa05f8000 + 0x14); +volatile uint32_t * PARAM_BASE = (volatile uint32_t *)(0xa05f8000 + 0x20); +volatile uint32_t * REGION_BASE = (volatile uint32_t *)(0xa05f8000 + 0x2c); +volatile uint32_t * FB_R_SOF1 = (volatile uint32_t *)(0xa05f8000 + 0x50); +volatile uint32_t * FB_W_SOF1 = (volatile uint32_t *)(0xa05f8000 + 0x60); +volatile uint32_t * ISP_BACKGND_T = (volatile uint32_t *)(0xa05f8000 + 0x8c); + +volatile uint32_t * SPG_STATUS = (volatile uint32_t *)(0xa05f8000 + 0x10c); + +#define SPG_STATUS__VSYNC (1 << 13) + +volatile uint32_t * TA_OL_BASE = (volatile uint32_t *)(0xa05f8000 + 0x124); +volatile uint32_t * TA_ISP_BASE = (volatile uint32_t *)(0xa05f8000 + 0x128); +volatile uint32_t * TA_OL_LIMIT = (volatile uint32_t *)(0xa05f8000 + 0x12c); +volatile uint32_t * TA_ISP_LIMIT = (volatile uint32_t *)(0xa05f8000 + 0x130); +volatile uint32_t * TA_GLOB_TILE_CLIP = (volatile uint32_t *)(0xa05f8000 + 0x13c); +volatile uint32_t * TA_ALLOC_CTRL = (volatile uint32_t *)(0xa05f8000 + 0x140); +volatile uint32_t * TA_LIST_INIT = (volatile uint32_t *)(0xa05f8000 + 0x144); + +#define TA_GLOB_TILE_CLIP__TILE_Y_NUM(n) (((n) & 0xf) << 16) +#define TA_GLOB_TILE_CLIP__TILE_X_NUM(n) (((n) & 0x1f) << 0) +#define TA_ALLOC_CTRL__OPB_MODE__INCREASING_ADDRESSES (0 << 20) +#define TA_ALLOC_CTRL__O_OPB__8X4BYTE (1 << 0) +#define TA_LIST_INIT__LIST_INIT (1 << 31) + +void main() +{ + /* + a very simple memory map: + + the ordering within texture memory is not significant, and could be + anything + */ + uint32_t framebuffer_start = 0x200000; // intentionally the same address that the boot rom used to draw the SEGA logo + uint32_t isp_tsp_parameter_start = 0x400000; + uint32_t region_array_start = 0x500000; + uint32_t object_list_start = 0x100000; + uint32_t opaque_list_pointer = object_list_start; + + // background_offset is relative to the beginning of isp_tsp_parameter_start + uint32_t background_offset = (sizeof (isp_tsp_parameter__polygon)) * 0; + + transfer_region_array(region_array_start, opaque_list_pointer); + + transfer_isp_tsp_background_parameter(isp_tsp_parameter_start); + + ////////////////////////////////////////////////////////////////////////////// + // configure the TA + ////////////////////////////////////////////////////////////////////////////// + + const int tile_y_num = 480 / 32; + const int tile_x_num = 640 / 32; + + // TA_GLOB_TILE_CLIP restricts which "object pointer blocks" are written + // to. + // + // This can also be used to implement "windowing", as long as the desired + // window size happens to be a multiple of 32 pixels. The "User Tile Clip" TA + // control parameter can also ~equivalently be used as many times as desired + // within a single TA initialization to produce an identical effect. + // + // See DCDBSysArc990907E.pdf page 183. + *TA_GLOB_TILE_CLIP = TA_GLOB_TILE_CLIP__TILE_Y_NUM(tile_y_num - 1) + | TA_GLOB_TILE_CLIP__TILE_X_NUM(tile_x_num - 1); + + // While CORE supports arbitrary-length object lists, the TA uses "object + // pointer blocks" as a memory allocation strategy. These fixed-length blocks + // can still have infinite length via "object pointer block links". This + // mechanism is illustrated in DCDBSysArc990907E.pdf page 188. + *TA_ALLOC_CTRL = TA_ALLOC_CTRL__OPB_MODE__INCREASING_ADDRESSES + | TA_ALLOC_CTRL__O_OPB__8X4BYTE; + + // While building object lists, the TA contains an internal index (exposed as + // the read-only TA_ITP_CURRENT) for the next address that new ISP/TSP will be + // stored at. The initial value of this index is TA_ISP_BASE. + *TA_ISP_BASE = isp_tsp_parameter_start + (sizeof (isp_tsp_parameter__polygon)) * 1; + *TA_ISP_LIMIT = isp_tsp_parameter_start + 0x100000; + + // Similarly, the TA also contains, for up to 600 tiles, an internal index for + // the next address that an object list entry will be stored for each + // tile. These internal indicies are partially exposed via the read-only + // TA_OL_POINTERS. + *TA_OL_BASE = object_list_start; + + // TA_OL_LIMIT, DCDBSysArc990907E.pdf page 385: + // + // > Because the TA may automatically store data in the address that is + // > specified by this register, it must not be used for other data. For + // > example, the address specified here must not be the same as the address + // > in the TA_ISP_BASE register. + *TA_OL_LIMIT = object_list_start + 0x100000 - 32; + + ////////////////////////////////////////////////////////////////////////////// + // configure CORE + ////////////////////////////////////////////////////////////////////////////// + + // REGION_BASE is the (texture memory-relative) address of the region array. + *REGION_BASE = region_array_start; + + // PARAM_BASE is the (texture memory-relative) address of ISP/TSP parameters. + // Anything that references an ISP/TSP parameter does so relative to this + // address (and not relative to the beginning of texture memory). + *PARAM_BASE = isp_tsp_parameter_start; + + // Set the offset of the background ISP/TSP parameter, relative to PARAM_BASE + // SKIP is related to the size of each vertex + *ISP_BACKGND_T = ISP_BACKGND_T__TAG_ADDRESS(background_offset / 4) + | ISP_BACKGND_T__TAG_OFFSET(0) + | ISP_BACKGND_T__SKIP(1); + + // FB_W_SOF1 is the (texture memory-relative) address of the framebuffer that + // will be written to when a tile is rendered/flushed. + *FB_W_SOF1 = framebuffer_start; + + // without waiting for rendering to actually complete, immediately display the + // framebuffer. + *FB_R_SOF1 = framebuffer_start; + + ////////////////////////////////////////////////////////////////////////////// + // animated drawing + ////////////////////////////////////////////////////////////////////////////// + + // draw 500 frames of cube rotation + for (int i = 0; i < 500; i++) { + ////////////////////////////////////////////////////////////////////////////// + // transfer cube to texture memory via the TA polygon converter FIFO + ////////////////////////////////////////////////////////////////////////////// + + // TA_LIST_INIT needs to be written (every frame) prior to the first FIFO + // write. + *TA_LIST_INIT = TA_LIST_INIT__LIST_INIT; + + // dummy TA_LIST_INIT read; DCDBSysArc990907E.pdf in multiple places says this + // step is required. + (void)*TA_LIST_INIT; + + transfer_ta_cube(); + + ////////////////////////////////////////////////////////////////////////////// + // start the actual rasterization + ////////////////////////////////////////////////////////////////////////////// + + // start the actual render--the rendering process begins by interpreting the + // region array + *STARTRENDER = 1; + + // wait for vertical synchronization + while (!((*SPG_STATUS) & SPG_STATUS__VSYNC)); + while (((*SPG_STATUS) & SPG_STATUS__VSYNC)); + + // increment theta for the cube rotation animation + // (used by the `vertex_rotate` function) + theta += 0.01f; + } + + // return from main; this will effectively jump back to the serial loader +}