example: add macaw_multipass

I don't like this example very much, but it was an interesting
experiment regardless.
This commit is contained in:
Zack Buhman 2023-12-14 13:07:26 +08:00
parent 9fc40136af
commit 95a30b5164
12 changed files with 313 additions and 34 deletions

View File

@ -21,6 +21,18 @@ MACAW_OBJ = \
example/macaw.elf: LDSCRIPT = $(LIB)/alt.lds
example/macaw.elf: $(START_OBJ) $(MACAW_OBJ)
MACAW_MULTIPASS_OBJ = \
example/macaw_multipass.o \
vga.o \
holly/core.o \
holly/region_array.o \
holly/background.o \
holly/ta_fifo_polygon_converter.o \
macaw.data.o
example/macaw_multipass.elf: LDSCRIPT = $(LIB)/alt.lds
example/macaw_multipass.elf: $(START_OBJ) $(MACAW_MULTIPASS_OBJ)
CUBE_OBJ = \
example/cube.o \
vga.o \

View File

@ -9,6 +9,9 @@
#include "holly/core_bits.hpp"
#include "holly/ta_fifo_polygon_converter.hpp"
#include "holly/ta_parameter.hpp"
#include "holly/region_array.hpp"
#include "holly/background.hpp"
#include "memorymap.hpp"
#include "macaw.hpp"
@ -25,8 +28,8 @@ const struct vertex strip_vertices[4] = {
// [ position ] [ uv coordinates ] [color ]
{ -0.5f, 0.5f, 0.f, 0.f , 127.f/128.f, 0x00000000}, // the first two base colors in a
{ -0.5f, -0.5f, 0.f, 0.f , 0.f , 0x00000000}, // non-Gouraud triangle strip are ignored
{ 0.5f, 0.5f, 0.f, 127.f/128.f, 127.f/128.f, 0xffff00ff},
{ 0.5f, -0.5f, 0.f, 127.f/128.f, 0.f , 0xffffff00},
{ 0.5f, 0.5f, 0.f, 127.f/128.f, 127.f/128.f, 0x00000000},
{ 0.5f, -0.5f, 0.f, 127.f/128.f, 0.f , 0x00000000},
};
constexpr uint32_t strip_length = (sizeof (strip_vertices)) / (sizeof (struct vertex));
@ -73,6 +76,19 @@ uint32_t transform(uint32_t * ta_parameter_buf,
return parameter.offset;
}
void init_texture_memory()
{
volatile texture_memory_alloc * mem = reinterpret_cast<volatile texture_memory_alloc *>(texture_memory);
background_parameter(mem->background);
region_array(mem->region_array,
(offsetof (struct texture_memory_alloc, object_list)),
640 / 32, // width
480 / 32 // height
);
}
uint32_t _ta_parameter_buf[((32 * (strip_length + 2)) + 32) / 4];
void main()
@ -96,17 +112,20 @@ void main()
holly.SOFTRESET = 0;
core_init();
core_init_texture_memory();
init_texture_memory();
// The address of `ta_parameter_buf` must be a multiple of 32 bytes.
// This is mandatory for ch2-dma to the ta fifo polygon converter.
uint32_t * ta_parameter_buf = align_32byte(_ta_parameter_buf);
constexpr uint32_t tiles = (640 / 32) * (320 / 32);
constexpr uint32_t opb_total_size = tiles * 16 * 4;
while (true) {
v_sync_out();
v_sync_in();
ta_polygon_converter_init();
ta_polygon_converter_init(opb_total_size);
uint32_t ta_parameter_size = transform(ta_parameter_buf, strip_vertices, strip_length);
ta_polygon_converter_transfer(ta_parameter_buf, ta_parameter_size);
ta_wait_opaque_list();

153
example/macaw_multipass.cpp Normal file
View File

@ -0,0 +1,153 @@
#include <cstdint>
#include "align.hpp"
#include "vga.hpp"
#include "holly/texture_memory_alloc.hpp"
#include "holly.hpp"
#include "holly/core.hpp"
#include "holly/core_bits.hpp"
#include "holly/ta_fifo_polygon_converter.hpp"
#include "holly/ta_parameter.hpp"
#include "holly/background.hpp"
#include "holly/region_array.hpp"
#include "memorymap.hpp"
#include "macaw.hpp"
struct vertex {
float x;
float y;
float z;
float u;
float v;
uint32_t color;
};
const struct vertex strip_vertices[4] = {
// [ position ] [ uv coordinates ] [color ]
{ -0.5f, 0.5f, 0.f, 0.f , 127.f/128.f, 0x7fff0000}, // the first two base colors in a
{ -0.5f, -0.5f, 0.f, 0.f , 0.f , 0x7f00ff00}, // non-Gouraud triangle strip are ignored
{ 0.5f, 0.5f, 0.f, 127.f/128.f, 127.f/128.f, 0x7f0000ff},
{ 0.5f, -0.5f, 0.f, 127.f/128.f, 0.f , 0x7fff00ff},
};
constexpr uint32_t strip_length = (sizeof (strip_vertices)) / (sizeof (struct vertex));
static float theta = 0;
constexpr float half_degree = 0.01745329f / 2.f;
uint32_t transform(uint32_t * ta_parameter_buf,
const vertex * strip_vertices,
const uint32_t strip_length,
const uint32_t render_pass)
{
auto parameter = ta_parameter_writer(ta_parameter_buf);
uint32_t texture_address = (offsetof (struct texture_memory_alloc, texture));
if (render_pass == 0) {
// textured
parameter.append<global_polygon_type_0>() = global_polygon_type_0(texture_address);
} else {
// untextured
parameter.append<global_polygon_type_0>() = global_polygon_type_0();
}
for (uint32_t i = 0; i < strip_length; i++) {
bool end_of_strip = i == strip_length - 1;
float x = strip_vertices[i].x;
float y = strip_vertices[i].y;
float z = strip_vertices[i].z;
float x1;
x1 = x * __builtin_cosf(theta) - z * __builtin_sinf(theta);
z = x * __builtin_sinf(theta) + z * __builtin_cosf(theta);
x = x1;
x *= 240.f;
y *= 240.f;
x += 320.f;
y += 240.f;
z = 1.f / (z + 10.f);
parameter.append<vertex_polygon_type_3>() =
vertex_polygon_type_3(x, y, z,
strip_vertices[i].u,
strip_vertices[i].v,
strip_vertices[i].color,
end_of_strip);
}
parameter.append<global_end_of_list>() = global_end_of_list();
return parameter.offset;
}
void init_texture_memory()
{
volatile texture_memory_alloc * mem = reinterpret_cast<volatile texture_memory_alloc *>(texture_memory);
background_parameter(mem->background);
region_array_multipass(mem->region_array,
(offsetof (struct texture_memory_alloc, object_list)),
640 / 32, // width
480 / 32, // height
2 // num_render_passes
);
}
uint32_t _ta_parameter_buf[((32 * (strip_length + 2)) + 32) / 4];
void main()
{
vga();
auto src = reinterpret_cast<const uint8_t *>(&_binary_macaw_data_start);
auto size = reinterpret_cast<const uint32_t>(&_binary_macaw_data_size);
auto mem = reinterpret_cast<texture_memory_alloc *>(0xa400'0000);
for (uint32_t px = 0; px < size / 3; px++) {
uint8_t r = src[px * 3 + 0];
uint8_t g = src[px * 3 + 1];
uint8_t b = src[px * 3 + 2];
uint16_t rgb565 = ((r / 8) << 11) | ((g / 4) << 5) | ((b / 8) << 0);
mem->texture[px] = rgb565;
}
holly.SOFTRESET = softreset::pipeline_soft_reset
| softreset::ta_soft_reset;
holly.SOFTRESET = 0;
core_init();
init_texture_memory();
// The address of `ta_parameter_buf` must be a multiple of 32 bytes.
// This is mandatory for ch2-dma to the ta fifo polygon converter.
uint32_t * ta_parameter_buf = align_32byte(_ta_parameter_buf);
constexpr uint32_t tiles = (640 / 32) * (320 / 32);
constexpr uint32_t opb_per_pass_size = tiles * 16 * 4;
constexpr uint32_t opb_total_size = opb_per_pass_size * 2;
while (true) {
v_sync_out();
v_sync_in();
// first render pass
ta_polygon_converter_init(opb_total_size);
uint32_t ta_parameter_size_pass_1 = transform(ta_parameter_buf, strip_vertices, strip_length, 0);
ta_polygon_converter_transfer(ta_parameter_buf, ta_parameter_size_pass_1);
ta_wait_opaque_list();
// second render pass
ta_polygon_converter_cont(opb_per_pass_size);
uint32_t ta_parameter_size_pass_2 = transform(ta_parameter_buf, strip_vertices, strip_length, 1);
ta_polygon_converter_transfer(ta_parameter_buf, ta_parameter_size_pass_2);
ta_wait_translucent_list();
constexpr int frame_ix = 0;
constexpr int num_frames = 0;
core_start_render(frame_ix, num_frames);
theta += half_degree;
}
}

View File

@ -48,18 +48,6 @@ void core_init()
| fpu_param_cfg::pointer_first_burst_size(7); // half of pointer burst size(?)
}
void core_init_texture_memory()
{
volatile texture_memory_alloc * mem = reinterpret_cast<volatile texture_memory_alloc *>(texture_memory);
background_parameter(mem->background);
region_array(mem->region_array,
(offsetof (struct texture_memory_alloc, object_list)),
640 / 32, // width
480 / 32 // height
);
}
void core_start_render(int frame_ix, int num_frames)
{
holly.REGION_BASE = (offsetof (struct texture_memory_alloc, region_array));

View File

@ -1,5 +1,4 @@
#pragma once
void core_init();
void core_init_texture_memory();
void core_start_render(int frame_ix, int num_frames);

View File

@ -26,7 +26,7 @@ struct region_array_entry {
// opaque list pointer offset: OPB size * tile index * 4
void region_array(volatile uint32_t * buf,
uint32_t ol_base,
const uint32_t ol_base,
const uint32_t width, // in tile units (1 tile unit = 32 pixels)
const uint32_t height) // in tile units (1 tile unit = 32 pixels)
{
@ -64,3 +64,57 @@ void region_array(volatile uint32_t * buf,
}
}
}
void region_array_multipass(volatile uint32_t * buf,
const uint32_t ol_base,
const uint32_t width, // in tile units (1 tile unit = 32 pixels)
const uint32_t height, // in tile units (1 tile unit = 32 pixels)
const uint32_t num_render_passes)
{
volatile region_array_entry * region_array = reinterpret_cast<volatile region_array_entry *>(buf);
uint32_t ix = 0;
// create a "dummy region array [item]" for CORE & TA-related bug #21:
// "Misshapen tiles or missing tiles occur"
region_array[ix].tile = REGION_ARRAY__FLUSH_ACCUMULATE;
region_array[ix].opaque_list_pointer = REGION_ARRAY__LIST_POINTER__EMPTY;
region_array[ix].opaque_modifier_volume_list_pointer = REGION_ARRAY__LIST_POINTER__EMPTY;
region_array[ix].translucent_list_pointer = REGION_ARRAY__LIST_POINTER__EMPTY;
region_array[ix].translucent_modifier_volume_list_pointer = REGION_ARRAY__LIST_POINTER__EMPTY;
region_array[ix].punch_through_list_pointer = REGION_ARRAY__LIST_POINTER__EMPTY;
ix += 1;
constexpr uint32_t opaque_list_opb_size = 16 * 4; // for a single OPB in bytes; this must match O_OPB in TA_ALLOC_CTRL
const uint32_t opaque_opb_render_pass_size = width * height * opaque_list_opb_size; // the sum of the size of all OPB for a single pass
for (uint32_t y = 0; y < height; y++) {
for (uint32_t x = 0; x < width; x++) {
for (uint32_t render_pass = 0; render_pass < num_render_passes; render_pass++) {
region_array[ix].tile = REGION_ARRAY__TILE_Y_POSITION(y)
| REGION_ARRAY__TILE_X_POSITION(x);
if (render_pass != (num_render_passes - 1))
region_array[ix].tile |= REGION_ARRAY__FLUSH_ACCUMULATE;
if (render_pass > 0)
region_array[ix].tile |= REGION_ARRAY__Z_CLEAR;
if (render_pass == (num_render_passes - 1) &&
y == (height - 1) && x == (width - 1))
region_array[ix].tile |= REGION_ARRAY__LAST_REGION;
uint32_t tile_index = y * width + x;
uint32_t pass_ol_base = ol_base + (opaque_opb_render_pass_size * render_pass);
region_array[ix].opaque_list_pointer = pass_ol_base + (opaque_list_opb_size * tile_index);
region_array[ix].opaque_modifier_volume_list_pointer = REGION_ARRAY__LIST_POINTER__EMPTY;
region_array[ix].translucent_list_pointer = REGION_ARRAY__LIST_POINTER__EMPTY;
region_array[ix].translucent_modifier_volume_list_pointer = REGION_ARRAY__LIST_POINTER__EMPTY;
region_array[ix].punch_through_list_pointer = REGION_ARRAY__LIST_POINTER__EMPTY;
ix++;
}
}
}
}

View File

@ -3,6 +3,12 @@
#include <cstdint>
void region_array(volatile uint32_t * buf,
uint32_t ol_base,
const uint32_t ol_base,
const uint32_t width, // in tile units (1 tile unit = 32 pixels)
const uint32_t height); // in tile units (1 tile unit = 32 pixels)
void region_array_multipass(volatile uint32_t * buf,
const uint32_t ol_base,
const uint32_t width, // in tile units (1 tile unit = 32 pixels)
const uint32_t height, // in tile units (1 tile unit = 32 pixels)
const uint32_t num_render_passes);

View File

@ -12,7 +12,7 @@
#include "ta_fifo_polygon_converter.hpp"
void ta_polygon_converter_init()
void ta_polygon_converter_init(uint32_t opb_total_size) // for all render passes
{
holly.SOFTRESET = softreset::ta_soft_reset;
holly.SOFTRESET = 0;
@ -31,17 +31,33 @@ void ta_polygon_converter_init()
holly.TA_ISP_LIMIT = (offsetof (struct texture_memory_alloc, object_list)); // the end of isp_tsp_parameters
holly.TA_OL_BASE = (offsetof (struct texture_memory_alloc, object_list));
holly.TA_OL_LIMIT = (offsetof (struct texture_memory_alloc, _res0)); // the end of the object_list
holly.TA_NEXT_OPB_INIT = (offsetof (struct texture_memory_alloc, object_list));
//holly.TA_NEXT_OPB_INIT = (offsetof (struct texture_memory_alloc, object_list))
// + (640 / 32) * (320 / 32) * 16 * 4;
holly.TA_NEXT_OPB_INIT = (offsetof (struct texture_memory_alloc, object_list))
+ opb_total_size; // opb_size is the total size of all OPBs for all passes
holly.TA_LIST_INIT = ta_list_init::list_init;
volatile uint32_t _dummy_read = holly.TA_LIST_INIT;
uint32_t _dummy_read = holly.TA_LIST_INIT;
(void)_dummy_read;
}
void ta_polygon_converter_cont(uint32_t ol_base_offset)
{
holly.TA_ALLOC_CTRL = ta_alloc_ctrl::opb_mode::increasing_addresses
| ta_alloc_ctrl::pt_opb::no_list
| ta_alloc_ctrl::tm_opb::no_list
| ta_alloc_ctrl::t_opb::_16x4byte
| ta_alloc_ctrl::om_opb::no_list
| ta_alloc_ctrl::o_opb::no_list;
holly.TA_OL_BASE = (offsetof (struct texture_memory_alloc, object_list))
+ ol_base_offset;
holly.TA_LIST_CONT = ta_list_cont::list_cont;
uint32_t _dummy_read = holly.TA_LIST_CONT;
(void)_dummy_read;
}
extern void serial_string(const char * s);
void ta_polygon_converter_transfer(volatile uint32_t * buf, uint32_t size)
{
@ -97,3 +113,10 @@ void ta_wait_opaque_list()
system.ISTNRM = ISTNRM__END_OF_TRANSFERRING_OPAQUE_LIST;
}
void ta_wait_translucent_list()
{
while ((system.ISTNRM & ISTNRM__END_OF_TRANSFERRING_TRANSLUCENT_LIST) == 0);
system.ISTNRM = ISTNRM__END_OF_TRANSFERRING_TRANSLUCENT_LIST;
}

View File

@ -2,6 +2,8 @@
#include <cstdint>
void ta_polygon_converter_init();
void ta_polygon_converter_init(uint32_t opb_total_size); // total OPB size for all render passes
void ta_polygon_converter_cont(uint32_t ol_base_offset);
void ta_polygon_converter_transfer(volatile uint32_t * buf, uint32_t size);
void ta_wait_opaque_list();
void ta_wait_translucent_list();

View File

@ -141,27 +141,30 @@ struct global_polygon_type_0 {
uint32_t data_size_for_sort_dma;
uint32_t next_address_for_sort_dma;
/*
// untextured
global_polygon_type_0()
: parameter_control_word( para_control::para_type::polygon_or_modifier_volume
| para_control::list_type::opaque
| obj_control::col_type::packed_color )
| para_control::list_type::translucent
| obj_control::col_type::packed_color
| obj_control::gouraud )
, isp_tsp_instruction_word( isp_tsp_instruction_word::depth_compare_mode::always
| isp_tsp_instruction_word::culling_mode::no_culling )
| isp_tsp_instruction_word::culling_mode::no_culling )
, tsp_instruction_word( tsp_instruction_word::src_alpha_instr::one
| tsp_instruction_word::dst_alpha_instr::zero
| tsp_instruction_word::fog_control::no_fog )
| tsp_instruction_word::dst_alpha_instr::src_alpha
| tsp_instruction_word::fog_control::no_fog
| tsp_instruction_word::use_alpha )
, texture_control_word( 0 )
, texture_control_word(0)
, _res0(0)
, _res1(0)
, data_size_for_sort_dma(0)
, next_address_for_sort_dma(0)
{ }
*/
// textured
global_polygon_type_0(const uint32_t texture_address)
: parameter_control_word( para_control::para_type::polygon_or_modifier_volume
| para_control::list_type::opaque

View File

@ -2,6 +2,14 @@
#include <cstdint>
/*
object_list[0x00100000 / 4] is enough space for 81 sets of
0x3200-byte lists (16 * 4 * (640 / 32) * (320 / 32))
(that is, it is significantly more space than required for trivial
TA/CORE drawings)
*/
struct texture_memory_alloc {
uint32_t isp_tsp_parameters[0x00100000 / 4]; // TA_ISP_BASE / PARAM_BASE (the actual objects)
uint32_t object_list[0x00100000 / 4]; // TA_OL_BASE (contains object pointer blocks)

12
notes/ta-multipass.txt Normal file
View File

@ -0,0 +1,12 @@
page 179
"If list continuation processing is performed through the TA_LIST_CONT
register, the TA initializes its internal status in the same manner as
before, but leaves the TA_NEXT_OPB register unchanged."
TA_NEXT_OPB is only used in the case where the TA needs to allocate a
object pointer in a tile that has a full OPB (e.g: for TA_ALLOC_CTRL
0x2, to write the 16th opaque object to a tile, the next OPB for that
tile is at the address TA_NEXT_OPB).