diff --git a/example/example.mk b/example/example.mk index 879914d..9b0b679 100644 --- a/example/example.mk +++ b/example/example.mk @@ -21,6 +21,18 @@ MACAW_OBJ = \ example/macaw.elf: LDSCRIPT = $(LIB)/alt.lds example/macaw.elf: $(START_OBJ) $(MACAW_OBJ) +MACAW_MULTIPASS_OBJ = \ + example/macaw_multipass.o \ + vga.o \ + holly/core.o \ + holly/region_array.o \ + holly/background.o \ + holly/ta_fifo_polygon_converter.o \ + macaw.data.o + +example/macaw_multipass.elf: LDSCRIPT = $(LIB)/alt.lds +example/macaw_multipass.elf: $(START_OBJ) $(MACAW_MULTIPASS_OBJ) + CUBE_OBJ = \ example/cube.o \ vga.o \ diff --git a/example/macaw.cpp b/example/macaw.cpp index 1143794..6cbebdf 100644 --- a/example/macaw.cpp +++ b/example/macaw.cpp @@ -9,6 +9,9 @@ #include "holly/core_bits.hpp" #include "holly/ta_fifo_polygon_converter.hpp" #include "holly/ta_parameter.hpp" +#include "holly/region_array.hpp" +#include "holly/background.hpp" +#include "memorymap.hpp" #include "macaw.hpp" @@ -25,8 +28,8 @@ const struct vertex strip_vertices[4] = { // [ position ] [ uv coordinates ] [color ] { -0.5f, 0.5f, 0.f, 0.f , 127.f/128.f, 0x00000000}, // the first two base colors in a { -0.5f, -0.5f, 0.f, 0.f , 0.f , 0x00000000}, // non-Gouraud triangle strip are ignored - { 0.5f, 0.5f, 0.f, 127.f/128.f, 127.f/128.f, 0xffff00ff}, - { 0.5f, -0.5f, 0.f, 127.f/128.f, 0.f , 0xffffff00}, + { 0.5f, 0.5f, 0.f, 127.f/128.f, 127.f/128.f, 0x00000000}, + { 0.5f, -0.5f, 0.f, 127.f/128.f, 0.f , 0x00000000}, }; constexpr uint32_t strip_length = (sizeof (strip_vertices)) / (sizeof (struct vertex)); @@ -73,6 +76,19 @@ uint32_t transform(uint32_t * ta_parameter_buf, return parameter.offset; } +void init_texture_memory() +{ + volatile texture_memory_alloc * mem = reinterpret_cast(texture_memory); + + background_parameter(mem->background); + + region_array(mem->region_array, + (offsetof (struct texture_memory_alloc, object_list)), + 640 / 32, // width + 480 / 32 // height + ); +} + uint32_t _ta_parameter_buf[((32 * (strip_length + 2)) + 32) / 4]; void main() @@ -96,17 +112,20 @@ void main() holly.SOFTRESET = 0; core_init(); - core_init_texture_memory(); + init_texture_memory(); // The address of `ta_parameter_buf` must be a multiple of 32 bytes. // This is mandatory for ch2-dma to the ta fifo polygon converter. uint32_t * ta_parameter_buf = align_32byte(_ta_parameter_buf); + constexpr uint32_t tiles = (640 / 32) * (320 / 32); + constexpr uint32_t opb_total_size = tiles * 16 * 4; + while (true) { v_sync_out(); v_sync_in(); - ta_polygon_converter_init(); + ta_polygon_converter_init(opb_total_size); uint32_t ta_parameter_size = transform(ta_parameter_buf, strip_vertices, strip_length); ta_polygon_converter_transfer(ta_parameter_buf, ta_parameter_size); ta_wait_opaque_list(); diff --git a/example/macaw_multipass.cpp b/example/macaw_multipass.cpp new file mode 100644 index 0000000..b69b7b6 --- /dev/null +++ b/example/macaw_multipass.cpp @@ -0,0 +1,153 @@ +#include + +#include "align.hpp" +#include "vga.hpp" + +#include "holly/texture_memory_alloc.hpp" +#include "holly.hpp" +#include "holly/core.hpp" +#include "holly/core_bits.hpp" +#include "holly/ta_fifo_polygon_converter.hpp" +#include "holly/ta_parameter.hpp" +#include "holly/background.hpp" +#include "holly/region_array.hpp" +#include "memorymap.hpp" + +#include "macaw.hpp" + +struct vertex { + float x; + float y; + float z; + float u; + float v; + uint32_t color; +}; + +const struct vertex strip_vertices[4] = { + // [ position ] [ uv coordinates ] [color ] + { -0.5f, 0.5f, 0.f, 0.f , 127.f/128.f, 0x7fff0000}, // the first two base colors in a + { -0.5f, -0.5f, 0.f, 0.f , 0.f , 0x7f00ff00}, // non-Gouraud triangle strip are ignored + { 0.5f, 0.5f, 0.f, 127.f/128.f, 127.f/128.f, 0x7f0000ff}, + { 0.5f, -0.5f, 0.f, 127.f/128.f, 0.f , 0x7fff00ff}, +}; +constexpr uint32_t strip_length = (sizeof (strip_vertices)) / (sizeof (struct vertex)); + +static float theta = 0; +constexpr float half_degree = 0.01745329f / 2.f; + +uint32_t transform(uint32_t * ta_parameter_buf, + const vertex * strip_vertices, + const uint32_t strip_length, + const uint32_t render_pass) +{ + auto parameter = ta_parameter_writer(ta_parameter_buf); + uint32_t texture_address = (offsetof (struct texture_memory_alloc, texture)); + if (render_pass == 0) { + // textured + parameter.append() = global_polygon_type_0(texture_address); + } else { + // untextured + parameter.append() = global_polygon_type_0(); + } + + for (uint32_t i = 0; i < strip_length; i++) { + bool end_of_strip = i == strip_length - 1; + + float x = strip_vertices[i].x; + float y = strip_vertices[i].y; + float z = strip_vertices[i].z; + float x1; + + x1 = x * __builtin_cosf(theta) - z * __builtin_sinf(theta); + z = x * __builtin_sinf(theta) + z * __builtin_cosf(theta); + x = x1; + x *= 240.f; + y *= 240.f; + x += 320.f; + y += 240.f; + z = 1.f / (z + 10.f); + + parameter.append() = + vertex_polygon_type_3(x, y, z, + strip_vertices[i].u, + strip_vertices[i].v, + strip_vertices[i].color, + end_of_strip); + } + + parameter.append() = global_end_of_list(); + + return parameter.offset; +} + +void init_texture_memory() +{ + volatile texture_memory_alloc * mem = reinterpret_cast(texture_memory); + + background_parameter(mem->background); + + region_array_multipass(mem->region_array, + (offsetof (struct texture_memory_alloc, object_list)), + 640 / 32, // width + 480 / 32, // height + 2 // num_render_passes + ); +} + +uint32_t _ta_parameter_buf[((32 * (strip_length + 2)) + 32) / 4]; + +void main() +{ + vga(); + + auto src = reinterpret_cast(&_binary_macaw_data_start); + auto size = reinterpret_cast(&_binary_macaw_data_size); + auto mem = reinterpret_cast(0xa400'0000); + for (uint32_t px = 0; px < size / 3; px++) { + uint8_t r = src[px * 3 + 0]; + uint8_t g = src[px * 3 + 1]; + uint8_t b = src[px * 3 + 2]; + + uint16_t rgb565 = ((r / 8) << 11) | ((g / 4) << 5) | ((b / 8) << 0); + mem->texture[px] = rgb565; + } + + holly.SOFTRESET = softreset::pipeline_soft_reset + | softreset::ta_soft_reset; + holly.SOFTRESET = 0; + + core_init(); + init_texture_memory(); + + // The address of `ta_parameter_buf` must be a multiple of 32 bytes. + // This is mandatory for ch2-dma to the ta fifo polygon converter. + uint32_t * ta_parameter_buf = align_32byte(_ta_parameter_buf); + + constexpr uint32_t tiles = (640 / 32) * (320 / 32); + constexpr uint32_t opb_per_pass_size = tiles * 16 * 4; + constexpr uint32_t opb_total_size = opb_per_pass_size * 2; + + while (true) { + v_sync_out(); + v_sync_in(); + + // first render pass + ta_polygon_converter_init(opb_total_size); + uint32_t ta_parameter_size_pass_1 = transform(ta_parameter_buf, strip_vertices, strip_length, 0); + ta_polygon_converter_transfer(ta_parameter_buf, ta_parameter_size_pass_1); + ta_wait_opaque_list(); + + // second render pass + ta_polygon_converter_cont(opb_per_pass_size); + uint32_t ta_parameter_size_pass_2 = transform(ta_parameter_buf, strip_vertices, strip_length, 1); + ta_polygon_converter_transfer(ta_parameter_buf, ta_parameter_size_pass_2); + ta_wait_translucent_list(); + + constexpr int frame_ix = 0; + constexpr int num_frames = 0; + core_start_render(frame_ix, num_frames); + + theta += half_degree; + } +} diff --git a/holly/core.cpp b/holly/core.cpp index 435aa34..3398545 100644 --- a/holly/core.cpp +++ b/holly/core.cpp @@ -48,18 +48,6 @@ void core_init() | fpu_param_cfg::pointer_first_burst_size(7); // half of pointer burst size(?) } -void core_init_texture_memory() -{ - volatile texture_memory_alloc * mem = reinterpret_cast(texture_memory); - - background_parameter(mem->background); - region_array(mem->region_array, - (offsetof (struct texture_memory_alloc, object_list)), - 640 / 32, // width - 480 / 32 // height - ); -} - void core_start_render(int frame_ix, int num_frames) { holly.REGION_BASE = (offsetof (struct texture_memory_alloc, region_array)); diff --git a/holly/core.hpp b/holly/core.hpp index b281daf..0333b52 100644 --- a/holly/core.hpp +++ b/holly/core.hpp @@ -1,5 +1,4 @@ #pragma once void core_init(); -void core_init_texture_memory(); void core_start_render(int frame_ix, int num_frames); diff --git a/holly/region_array.cpp b/holly/region_array.cpp index f09057c..3429462 100644 --- a/holly/region_array.cpp +++ b/holly/region_array.cpp @@ -26,7 +26,7 @@ struct region_array_entry { // opaque list pointer offset: OPB size * tile index * 4 void region_array(volatile uint32_t * buf, - uint32_t ol_base, + const uint32_t ol_base, const uint32_t width, // in tile units (1 tile unit = 32 pixels) const uint32_t height) // in tile units (1 tile unit = 32 pixels) { @@ -64,3 +64,57 @@ void region_array(volatile uint32_t * buf, } } } + +void region_array_multipass(volatile uint32_t * buf, + const uint32_t ol_base, + const uint32_t width, // in tile units (1 tile unit = 32 pixels) + const uint32_t height, // in tile units (1 tile unit = 32 pixels) + const uint32_t num_render_passes) +{ + volatile region_array_entry * region_array = reinterpret_cast(buf); + uint32_t ix = 0; + + // create a "dummy region array [item]" for CORE & TA-related bug #21: + // "Misshapen tiles or missing tiles occur" + region_array[ix].tile = REGION_ARRAY__FLUSH_ACCUMULATE; + region_array[ix].opaque_list_pointer = REGION_ARRAY__LIST_POINTER__EMPTY; + region_array[ix].opaque_modifier_volume_list_pointer = REGION_ARRAY__LIST_POINTER__EMPTY; + region_array[ix].translucent_list_pointer = REGION_ARRAY__LIST_POINTER__EMPTY; + region_array[ix].translucent_modifier_volume_list_pointer = REGION_ARRAY__LIST_POINTER__EMPTY; + region_array[ix].punch_through_list_pointer = REGION_ARRAY__LIST_POINTER__EMPTY; + + ix += 1; + + constexpr uint32_t opaque_list_opb_size = 16 * 4; // for a single OPB in bytes; this must match O_OPB in TA_ALLOC_CTRL + const uint32_t opaque_opb_render_pass_size = width * height * opaque_list_opb_size; // the sum of the size of all OPB for a single pass + + for (uint32_t y = 0; y < height; y++) { + for (uint32_t x = 0; x < width; x++) { + for (uint32_t render_pass = 0; render_pass < num_render_passes; render_pass++) { + + region_array[ix].tile = REGION_ARRAY__TILE_Y_POSITION(y) + | REGION_ARRAY__TILE_X_POSITION(x); + + if (render_pass != (num_render_passes - 1)) + region_array[ix].tile |= REGION_ARRAY__FLUSH_ACCUMULATE; + + if (render_pass > 0) + region_array[ix].tile |= REGION_ARRAY__Z_CLEAR; + + if (render_pass == (num_render_passes - 1) && + y == (height - 1) && x == (width - 1)) + region_array[ix].tile |= REGION_ARRAY__LAST_REGION; + + uint32_t tile_index = y * width + x; + uint32_t pass_ol_base = ol_base + (opaque_opb_render_pass_size * render_pass); + region_array[ix].opaque_list_pointer = pass_ol_base + (opaque_list_opb_size * tile_index); + region_array[ix].opaque_modifier_volume_list_pointer = REGION_ARRAY__LIST_POINTER__EMPTY; + region_array[ix].translucent_list_pointer = REGION_ARRAY__LIST_POINTER__EMPTY; + region_array[ix].translucent_modifier_volume_list_pointer = REGION_ARRAY__LIST_POINTER__EMPTY; + region_array[ix].punch_through_list_pointer = REGION_ARRAY__LIST_POINTER__EMPTY; + + ix++; + } + } + } +} diff --git a/holly/region_array.hpp b/holly/region_array.hpp index fd6a93b..c7c24bc 100644 --- a/holly/region_array.hpp +++ b/holly/region_array.hpp @@ -3,6 +3,12 @@ #include void region_array(volatile uint32_t * buf, - uint32_t ol_base, + const uint32_t ol_base, const uint32_t width, // in tile units (1 tile unit = 32 pixels) const uint32_t height); // in tile units (1 tile unit = 32 pixels) + +void region_array_multipass(volatile uint32_t * buf, + const uint32_t ol_base, + const uint32_t width, // in tile units (1 tile unit = 32 pixels) + const uint32_t height, // in tile units (1 tile unit = 32 pixels) + const uint32_t num_render_passes); diff --git a/holly/ta_fifo_polygon_converter.cpp b/holly/ta_fifo_polygon_converter.cpp index da05fbc..46252e5 100644 --- a/holly/ta_fifo_polygon_converter.cpp +++ b/holly/ta_fifo_polygon_converter.cpp @@ -12,7 +12,7 @@ #include "ta_fifo_polygon_converter.hpp" -void ta_polygon_converter_init() +void ta_polygon_converter_init(uint32_t opb_total_size) // for all render passes { holly.SOFTRESET = softreset::ta_soft_reset; holly.SOFTRESET = 0; @@ -31,17 +31,33 @@ void ta_polygon_converter_init() holly.TA_ISP_LIMIT = (offsetof (struct texture_memory_alloc, object_list)); // the end of isp_tsp_parameters holly.TA_OL_BASE = (offsetof (struct texture_memory_alloc, object_list)); holly.TA_OL_LIMIT = (offsetof (struct texture_memory_alloc, _res0)); // the end of the object_list - holly.TA_NEXT_OPB_INIT = (offsetof (struct texture_memory_alloc, object_list)); - //holly.TA_NEXT_OPB_INIT = (offsetof (struct texture_memory_alloc, object_list)) - // + (640 / 32) * (320 / 32) * 16 * 4; + holly.TA_NEXT_OPB_INIT = (offsetof (struct texture_memory_alloc, object_list)) + + opb_total_size; // opb_size is the total size of all OPBs for all passes holly.TA_LIST_INIT = ta_list_init::list_init; - volatile uint32_t _dummy_read = holly.TA_LIST_INIT; + uint32_t _dummy_read = holly.TA_LIST_INIT; + (void)_dummy_read; +} + +void ta_polygon_converter_cont(uint32_t ol_base_offset) +{ + holly.TA_ALLOC_CTRL = ta_alloc_ctrl::opb_mode::increasing_addresses + | ta_alloc_ctrl::pt_opb::no_list + | ta_alloc_ctrl::tm_opb::no_list + | ta_alloc_ctrl::t_opb::_16x4byte + | ta_alloc_ctrl::om_opb::no_list + | ta_alloc_ctrl::o_opb::no_list; + + holly.TA_OL_BASE = (offsetof (struct texture_memory_alloc, object_list)) + + ol_base_offset; + + holly.TA_LIST_CONT = ta_list_cont::list_cont; + + uint32_t _dummy_read = holly.TA_LIST_CONT; (void)_dummy_read; } -extern void serial_string(const char * s); void ta_polygon_converter_transfer(volatile uint32_t * buf, uint32_t size) { @@ -97,3 +113,10 @@ void ta_wait_opaque_list() system.ISTNRM = ISTNRM__END_OF_TRANSFERRING_OPAQUE_LIST; } + +void ta_wait_translucent_list() +{ + while ((system.ISTNRM & ISTNRM__END_OF_TRANSFERRING_TRANSLUCENT_LIST) == 0); + + system.ISTNRM = ISTNRM__END_OF_TRANSFERRING_TRANSLUCENT_LIST; +} diff --git a/holly/ta_fifo_polygon_converter.hpp b/holly/ta_fifo_polygon_converter.hpp index f3e9f02..9b92a09 100644 --- a/holly/ta_fifo_polygon_converter.hpp +++ b/holly/ta_fifo_polygon_converter.hpp @@ -2,6 +2,8 @@ #include -void ta_polygon_converter_init(); +void ta_polygon_converter_init(uint32_t opb_total_size); // total OPB size for all render passes +void ta_polygon_converter_cont(uint32_t ol_base_offset); void ta_polygon_converter_transfer(volatile uint32_t * buf, uint32_t size); void ta_wait_opaque_list(); +void ta_wait_translucent_list(); diff --git a/holly/ta_parameter.hpp b/holly/ta_parameter.hpp index 42ce4e6..eb0b1a9 100644 --- a/holly/ta_parameter.hpp +++ b/holly/ta_parameter.hpp @@ -141,27 +141,30 @@ struct global_polygon_type_0 { uint32_t data_size_for_sort_dma; uint32_t next_address_for_sort_dma; - /* + // untextured global_polygon_type_0() : parameter_control_word( para_control::para_type::polygon_or_modifier_volume - | para_control::list_type::opaque - | obj_control::col_type::packed_color ) + | para_control::list_type::translucent + | obj_control::col_type::packed_color + | obj_control::gouraud ) , isp_tsp_instruction_word( isp_tsp_instruction_word::depth_compare_mode::always - | isp_tsp_instruction_word::culling_mode::no_culling ) + | isp_tsp_instruction_word::culling_mode::no_culling ) , tsp_instruction_word( tsp_instruction_word::src_alpha_instr::one - | tsp_instruction_word::dst_alpha_instr::zero - | tsp_instruction_word::fog_control::no_fog ) + | tsp_instruction_word::dst_alpha_instr::src_alpha + | tsp_instruction_word::fog_control::no_fog + | tsp_instruction_word::use_alpha ) + + , texture_control_word( 0 ) - , texture_control_word(0) , _res0(0) , _res1(0) , data_size_for_sort_dma(0) , next_address_for_sort_dma(0) { } - */ + // textured global_polygon_type_0(const uint32_t texture_address) : parameter_control_word( para_control::para_type::polygon_or_modifier_volume | para_control::list_type::opaque diff --git a/holly/texture_memory_alloc.hpp b/holly/texture_memory_alloc.hpp index 5325e9c..4a934cf 100644 --- a/holly/texture_memory_alloc.hpp +++ b/holly/texture_memory_alloc.hpp @@ -2,6 +2,14 @@ #include +/* + object_list[0x00100000 / 4] is enough space for 81 sets of + 0x3200-byte lists (16 * 4 * (640 / 32) * (320 / 32)) + + (that is, it is significantly more space than required for trivial + TA/CORE drawings) +*/ + struct texture_memory_alloc { uint32_t isp_tsp_parameters[0x00100000 / 4]; // TA_ISP_BASE / PARAM_BASE (the actual objects) uint32_t object_list[0x00100000 / 4]; // TA_OL_BASE (contains object pointer blocks) diff --git a/notes/ta-multipass.txt b/notes/ta-multipass.txt new file mode 100644 index 0000000..d91d904 --- /dev/null +++ b/notes/ta-multipass.txt @@ -0,0 +1,12 @@ +page 179 + +"If list continuation processing is performed through the TA_LIST_CONT +register, the TA initializes its internal status in the same manner as +before, but leaves the TA_NEXT_OPB register unchanged." + +TA_NEXT_OPB is only used in the case where the TA needs to allocate a +object pointer in a tile that has a full OPB (e.g: for TA_ALLOC_CTRL +0x2, to write the 16th opaque object to a tile, the next OPB for that +tile is at the address TA_NEXT_OPB). + +