diff --git a/common.mk b/common.mk index 58f10fd..9bf6328 100644 --- a/common.mk +++ b/common.mk @@ -2,7 +2,7 @@ MAKEFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST))) DIR := $(dir $(MAKEFILE_PATH)) LIB ?= . -OPT ?= -Os +OPT ?= -O0 DEBUG ?= -g -gdwarf-4 GENERATED ?= @@ -101,7 +101,7 @@ audio.pcm: synth 1 sin 440 vol -10dB mv $@.raw $@ -1ST_READ.BIN: main.bin +1ST_READ.BIN: example/macaw_multipass.bin ./scramble $< $@ %.iso: 1ST_READ.BIN ip.bin diff --git a/example/example.mk b/example/example.mk index 9b0b679..d5eaa91 100644 --- a/example/example.mk +++ b/example/example.mk @@ -33,6 +33,18 @@ MACAW_MULTIPASS_OBJ = \ example/macaw_multipass.elf: LDSCRIPT = $(LIB)/alt.lds example/macaw_multipass.elf: $(START_OBJ) $(MACAW_MULTIPASS_OBJ) +TRANSLUCENCY_OBJ = \ + example/translucency.o \ + vga.o \ + holly/core.o \ + holly/region_array.o \ + holly/background.o \ + holly/ta_fifo_polygon_converter.o \ + macaw.data.o + +example/translucency.elf: LDSCRIPT = $(LIB)/alt.lds +example/translucency.elf: $(START_OBJ) $(TRANSLUCENCY_OBJ) + CUBE_OBJ = \ example/cube.o \ vga.o \ diff --git a/example/macaw.cpp b/example/macaw.cpp index 6cbebdf..fceac0b 100644 --- a/example/macaw.cpp +++ b/example/macaw.cpp @@ -9,6 +9,7 @@ #include "holly/core_bits.hpp" #include "holly/ta_fifo_polygon_converter.hpp" #include "holly/ta_parameter.hpp" +#include "holly/ta_bits.hpp" #include "holly/region_array.hpp" #include "holly/background.hpp" #include "memorymap.hpp" @@ -71,22 +72,21 @@ uint32_t transform(uint32_t * ta_parameter_buf, parameter.append() = global_end_of_list(); - theta += half_degree; - return parameter.offset; } -void init_texture_memory() +void init_texture_memory(const struct opb_size& opb_size) { volatile texture_memory_alloc * mem = reinterpret_cast(texture_memory); background_parameter(mem->background); - region_array(mem->region_array, - (offsetof (struct texture_memory_alloc, object_list)), - 640 / 32, // width - 480 / 32 // height - ); + region_array2(mem->region_array, + (offsetof (struct texture_memory_alloc, object_list)), + 640 / 32, // width + 480 / 32, // height + opb_size + ); } uint32_t _ta_parameter_buf[((32 * (strip_length + 2)) + 32) / 4]; @@ -107,31 +107,48 @@ void main() mem->texture[px] = rgb565; } + // The address of `ta_parameter_buf` must be a multiple of 32 bytes. + // This is mandatory for ch2-dma to the ta fifo polygon converter. + uint32_t * ta_parameter_buf = align_32byte(_ta_parameter_buf); + + constexpr uint32_t ta_alloc = ta_alloc_ctrl::pt_opb::no_list + | ta_alloc_ctrl::tm_opb::no_list + | ta_alloc_ctrl::t_opb::_16x4byte + | ta_alloc_ctrl::om_opb::no_list + | ta_alloc_ctrl::o_opb::_16x4byte; + + constexpr struct opb_size opb_size = { .opaque = 16 * 4 + , .opaque_modifier = 0 + , .translucent = 0 + , .translucent_modifier = 0 + , .punch_through = 0 + }; + + constexpr uint32_t tiles = (640 / 32) * (320 / 32); + holly.SOFTRESET = softreset::pipeline_soft_reset | softreset::ta_soft_reset; holly.SOFTRESET = 0; core_init(); - init_texture_memory(); + init_texture_memory(opb_size); - // The address of `ta_parameter_buf` must be a multiple of 32 bytes. - // This is mandatory for ch2-dma to the ta fifo polygon converter. - uint32_t * ta_parameter_buf = align_32byte(_ta_parameter_buf); - - constexpr uint32_t tiles = (640 / 32) * (320 / 32); - constexpr uint32_t opb_total_size = tiles * 16 * 4; + uint32_t frame_ix = 0; + constexpr uint32_t num_frames = 1; while (true) { - v_sync_out(); - v_sync_in(); - - ta_polygon_converter_init(opb_total_size); + ta_polygon_converter_init(opb_size.total() * tiles, ta_alloc); uint32_t ta_parameter_size = transform(ta_parameter_buf, strip_vertices, strip_length); ta_polygon_converter_transfer(ta_parameter_buf, ta_parameter_size); ta_wait_opaque_list(); - constexpr int frame_ix = 0; - constexpr int num_frames = 0; core_start_render(frame_ix, num_frames); + + v_sync_out(); + v_sync_in(); + core_wait_end_of_render_video(frame_ix, num_frames); + + theta += half_degree; + frame_ix += 1; } } diff --git a/example/macaw_multipass.cpp b/example/macaw_multipass.cpp index b69b7b6..567cd82 100644 --- a/example/macaw_multipass.cpp +++ b/example/macaw_multipass.cpp @@ -9,6 +9,7 @@ #include "holly/core_bits.hpp" #include "holly/ta_fifo_polygon_converter.hpp" #include "holly/ta_parameter.hpp" +#include "holly/ta_bits.hpp" #include "holly/background.hpp" #include "holly/region_array.hpp" #include "memorymap.hpp" @@ -26,10 +27,10 @@ struct vertex { const struct vertex strip_vertices[4] = { // [ position ] [ uv coordinates ] [color ] - { -0.5f, 0.5f, 0.f, 0.f , 127.f/128.f, 0x7fff0000}, // the first two base colors in a - { -0.5f, -0.5f, 0.f, 0.f , 0.f , 0x7f00ff00}, // non-Gouraud triangle strip are ignored - { 0.5f, 0.5f, 0.f, 127.f/128.f, 127.f/128.f, 0x7f0000ff}, - { 0.5f, -0.5f, 0.f, 127.f/128.f, 0.f , 0x7fff00ff}, + { -0.5f, 0.5f, 0.f, 0.f , 127.f/128.f, 0xa0ff0000}, // the first two base colors in a + { -0.5f, -0.5f, 0.f, 0.f , 0.f , 0xa000ff00}, // non-Gouraud triangle strip are ignored + { 0.5f, 0.5f, 0.f, 127.f/128.f, 127.f/128.f, 0xa00000ff}, + { 0.5f, -0.5f, 0.f, 127.f/128.f, 0.f , 0xa0ff00ff}, }; constexpr uint32_t strip_length = (sizeof (strip_vertices)) / (sizeof (struct vertex)); @@ -62,8 +63,8 @@ uint32_t transform(uint32_t * ta_parameter_buf, x1 = x * __builtin_cosf(theta) - z * __builtin_sinf(theta); z = x * __builtin_sinf(theta) + z * __builtin_cosf(theta); x = x1; - x *= 240.f; - y *= 240.f; + x *= 256.f; + y *= 256.f; x += 320.f; y += 240.f; z = 1.f / (z + 10.f); @@ -81,7 +82,7 @@ uint32_t transform(uint32_t * ta_parameter_buf, return parameter.offset; } -void init_texture_memory() +void init_texture_memory(uint32_t render_passes) { volatile texture_memory_alloc * mem = reinterpret_cast(texture_memory); @@ -91,7 +92,7 @@ void init_texture_memory() (offsetof (struct texture_memory_alloc, object_list)), 640 / 32, // width 480 / 32, // height - 2 // num_render_passes + render_passes // num_render_passes ); } @@ -118,36 +119,68 @@ void main() holly.SOFTRESET = 0; core_init(); - init_texture_memory(); + constexpr uint32_t render_passes = 2; + init_texture_memory(render_passes); // The address of `ta_parameter_buf` must be a multiple of 32 bytes. // This is mandatory for ch2-dma to the ta fifo polygon converter. uint32_t * ta_parameter_buf = align_32byte(_ta_parameter_buf); + constexpr uint32_t ta_alloc[2] = + { ta_alloc_ctrl::pt_opb::no_list + | ta_alloc_ctrl::tm_opb::no_list + | ta_alloc_ctrl::t_opb::no_list + | ta_alloc_ctrl::om_opb::no_list + | ta_alloc_ctrl::o_opb::_16x4byte + + , ta_alloc_ctrl::pt_opb::no_list + | ta_alloc_ctrl::tm_opb::no_list + | ta_alloc_ctrl::t_opb::_16x4byte + | ta_alloc_ctrl::om_opb::no_list + | ta_alloc_ctrl::o_opb::no_list + }; + + constexpr struct opb_size opb_size[2] = { + { .opaque = 16 * 4 + , .opaque_modifier = 0 + , .translucent = 0 + , .translucent_modifier = 0 + , .punch_through = 0 + }, + { .opaque = 0 + , .opaque_modifier = 0 + , .translucent = 16 * 4 + , .translucent_modifier = 0 + , .punch_through = 0 + } + }; + constexpr uint32_t tiles = (640 / 32) * (320 / 32); - constexpr uint32_t opb_per_pass_size = tiles * 16 * 4; - constexpr uint32_t opb_total_size = opb_per_pass_size * 2; + + uint32_t frame_ix = 0; + constexpr uint32_t num_frames = 1; + uint32_t ta_parameter_size[2]; while (true) { - v_sync_out(); - v_sync_in(); - // first render pass - ta_polygon_converter_init(opb_total_size); - uint32_t ta_parameter_size_pass_1 = transform(ta_parameter_buf, strip_vertices, strip_length, 0); - ta_polygon_converter_transfer(ta_parameter_buf, ta_parameter_size_pass_1); + ta_polygon_converter_init((opb_size[0].total() + opb_size[1].total()) * tiles, ta_alloc[0]); + ta_parameter_size[0] = transform(ta_parameter_buf, strip_vertices, strip_length, 0); + ta_polygon_converter_transfer(ta_parameter_buf, ta_parameter_size[0]); ta_wait_opaque_list(); // second render pass - ta_polygon_converter_cont(opb_per_pass_size); - uint32_t ta_parameter_size_pass_2 = transform(ta_parameter_buf, strip_vertices, strip_length, 1); - ta_polygon_converter_transfer(ta_parameter_buf, ta_parameter_size_pass_2); + ta_polygon_converter_cont(opb_size[0].total() * tiles, ta_alloc[1]); + ta_parameter_size[1] = transform(ta_parameter_buf, strip_vertices, strip_length, 1); + ta_polygon_converter_transfer(ta_parameter_buf, ta_parameter_size[1]); ta_wait_translucent_list(); - constexpr int frame_ix = 0; - constexpr int num_frames = 0; core_start_render(frame_ix, num_frames); + v_sync_out(); + v_sync_in(); + core_wait_end_of_render_video(frame_ix, num_frames); + theta += half_degree; + frame_ix += 1; } } diff --git a/example/translucency.cpp b/example/translucency.cpp new file mode 100644 index 0000000..c543bea --- /dev/null +++ b/example/translucency.cpp @@ -0,0 +1,164 @@ +#include + +#include "align.hpp" +#include "vga.hpp" + +#include "holly/texture_memory_alloc.hpp" +#include "holly.hpp" +#include "holly/core.hpp" +#include "holly/core_bits.hpp" +#include "holly/ta_fifo_polygon_converter.hpp" +#include "holly/ta_parameter.hpp" +#include "holly/ta_bits.hpp" +#include "holly/region_array.hpp" +#include "holly/background.hpp" +#include "memorymap.hpp" + +#include "macaw.hpp" + +struct vertex { + float x; + float y; + float z; + float u; + float v; + uint32_t color; +}; + +const struct vertex strip_vertices[4] = { + // [ position ] [ uv coordinates ] [color ] + { -0.5f, 0.5f, 0.f, 0.f , 127.f/128.f, 0xa0ff0000}, // the first two base colors in a + { -0.5f, -0.5f, 0.f, 0.f , 0.f , 0xa000ff00}, // non-Gouraud triangle strip are ignored + { 0.5f, 0.5f, 0.f, 127.f/128.f, 127.f/128.f, 0xa00000ff}, + { 0.5f, -0.5f, 0.f, 127.f/128.f, 0.f , 0xa0ff00ff}, +}; +constexpr uint32_t strip_length = (sizeof (strip_vertices)) / (sizeof (struct vertex)); + +static float theta = 0; +constexpr float half_degree = 0.01745329f / 2.f; + +uint32_t transform(uint32_t * ta_parameter_buf, + const vertex * strip_vertices, + const uint32_t strip_length, + bool translucent) +{ + auto parameter = ta_parameter_writer(ta_parameter_buf); + uint32_t texture_address = (offsetof (struct texture_memory_alloc, texture)); + if (translucent) { + // translucent untextured + parameter.append() = global_polygon_type_0(); + } else { + // opaque textured + parameter.append() = global_polygon_type_0(texture_address); + } + + for (uint32_t i = 0; i < strip_length; i++) { + bool end_of_strip = i == strip_length - 1; + + float x = strip_vertices[i].x; + float y = strip_vertices[i].y; + float z = strip_vertices[i].z; + float x1; + + x1 = x * __builtin_cosf(theta) - z * __builtin_sinf(theta); + z = x * __builtin_sinf(theta) + z * __builtin_cosf(theta); + x = x1; + x *= 240.f; + y *= 240.f; + x += 320.f; + y += 240.f; + z = 1.f / (z + 10.f); + + parameter.append() = + vertex_polygon_type_3(x, y, z, + strip_vertices[i].u, + strip_vertices[i].v, + strip_vertices[i].color, + end_of_strip); + } + + parameter.append() = global_end_of_list(); + + return parameter.offset; +} + +void init_texture_memory(const struct opb_size& opb_size) +{ + volatile texture_memory_alloc * mem = reinterpret_cast(texture_memory); + + background_parameter(mem->background); + + region_array2(mem->region_array, + (offsetof (struct texture_memory_alloc, object_list)), + 640 / 32, // width + 480 / 32, // height + opb_size + ); +} + +uint32_t _ta_parameter_buf[((32 * (strip_length + 2)) * 2 + 32) / 4]; + +void main() +{ + + vga(); + + auto src = reinterpret_cast(&_binary_macaw_data_start); + auto size = reinterpret_cast(&_binary_macaw_data_size); + auto mem = reinterpret_cast(0xa400'0000); + for (uint32_t px = 0; px < size / 3; px++) { + uint8_t r = src[px * 3 + 0]; + uint8_t g = src[px * 3 + 1]; + uint8_t b = src[px * 3 + 2]; + + uint16_t rgb565 = ((r / 8) << 11) | ((g / 4) << 5) | ((b / 8) << 0); + mem->texture[px] = rgb565; + } + + // The address of `ta_parameter_buf` must be a multiple of 32 bytes. + // This is mandatory for ch2-dma to the ta fifo polygon converter. + uint32_t * ta_parameter_buf = align_32byte(_ta_parameter_buf); + + constexpr uint32_t ta_alloc = ta_alloc_ctrl::pt_opb::no_list + | ta_alloc_ctrl::tm_opb::no_list + | ta_alloc_ctrl::t_opb::_16x4byte + | ta_alloc_ctrl::om_opb::no_list + | ta_alloc_ctrl::o_opb::_16x4byte; + + constexpr struct opb_size opb_size = { .opaque = 16 * 4 + , .opaque_modifier = 0 + , .translucent = 16 * 4 + , .translucent_modifier = 0 + , .punch_through = 0 + }; + + constexpr uint32_t tiles = (640 / 32) * (320 / 32); + + holly.SOFTRESET = softreset::pipeline_soft_reset + | softreset::ta_soft_reset; + holly.SOFTRESET = 0; + + core_init(); + init_texture_memory(opb_size); + + uint32_t frame_ix = 0; + constexpr uint32_t num_frames = 1; + + while (true) { + ta_polygon_converter_init(opb_size.total() * tiles, ta_alloc); + uint32_t ta_parameter_size = 0; + ta_parameter_size += transform(&ta_parameter_buf[ta_parameter_size / 4], strip_vertices, strip_length, 0); + ta_parameter_size += transform(&ta_parameter_buf[ta_parameter_size / 4], strip_vertices, strip_length, 1); + ta_polygon_converter_transfer(ta_parameter_buf, ta_parameter_size); + ta_wait_translucent_list(); + + core_start_render(frame_ix, num_frames); + + v_sync_out(); + v_sync_in(); + core_wait_end_of_render_video(frame_ix, num_frames); + + theta += half_degree; + frame_ix += 1; + } +} diff --git a/holly/core.cpp b/holly/core.cpp index 3398545..4853f82 100644 --- a/holly/core.cpp +++ b/holly/core.cpp @@ -2,6 +2,8 @@ #include "core_bits.hpp" #include "../holly.hpp" #include "../memorymap.hpp" +#include "../systembus.hpp" +#include "../systembus_bits.hpp" #include "texture_memory_alloc.hpp" @@ -11,7 +13,8 @@ void core_init() { - holly.ISP_FEED_CFG = isp_feed_cfg::cache_size_for_translucency(0x200); + holly.ISP_FEED_CFG = isp_feed_cfg::cache_size_for_translucency(0x200) + | isp_feed_cfg::pre_sort_mode; holly.FPU_SHAD_SCALE = fpu_shad_scale::scale_factor_for_shadows(1); holly.FPU_CULL_VAL = _i(1.f); @@ -44,11 +47,11 @@ void core_init() holly.FPU_PARAM_CFG = fpu_param_cfg::region_header_type::type_2 | fpu_param_cfg::tsp_parameter_burst_threshold(31) | fpu_param_cfg::isp_parameter_burst_threshold(31) - | fpu_param_cfg::pointer_burst_size(7) // must be less than opb size + | fpu_param_cfg::pointer_burst_size(0x15) // must be less than opb size | fpu_param_cfg::pointer_first_burst_size(7); // half of pointer burst size(?) } -void core_start_render(int frame_ix, int num_frames) +void core_start_render(uint32_t frame_ix, uint32_t num_frames) { holly.REGION_BASE = (offsetof (struct texture_memory_alloc, region_array)); holly.PARAM_BASE = (offsetof (struct texture_memory_alloc, isp_tsp_parameters)); @@ -56,15 +59,28 @@ void core_start_render(int frame_ix, int num_frames) holly.ISP_BACKGND_T = isp_backgnd_t::tag_address((offsetof (struct texture_memory_alloc, background)) / 4) | isp_backgnd_t::tag_offset(0) | isp_backgnd_t::skip(1); - holly.ISP_BACKGND_D = _i(1.f/100000); + holly.ISP_BACKGND_D = _i(1.f/100000.f); holly.FB_W_CTRL = 1 << 3 | fb_w_ctrl::fb_packmode::_565_rgb_16bit; holly.FB_W_LINESTRIDE = (640 * 2) / 8; - int w_fb = ((frame_ix + 0) & num_frames) * 0x00096000; - int r_fb = ((frame_ix + 1) & num_frames) * 0x00096000; + uint32_t w_fb = ((frame_ix + 0) & num_frames) * 0x00096000; holly.FB_W_SOF1 = (offsetof (struct texture_memory_alloc, framebuffer)) + w_fb; - holly.FB_R_SOF1 = (offsetof (struct texture_memory_alloc, framebuffer)) + r_fb; holly.STARTRENDER = 1; } + +static bool flycast_is_dumb = 0; + +void core_wait_end_of_render_video(uint32_t frame_ix, uint32_t num_frames) +{ + uint32_t r_fb = ((frame_ix + 1) & num_frames) * 0x00096000; + holly.FB_R_SOF1 = (offsetof (struct texture_memory_alloc, framebuffer)) + r_fb; + + if (!flycast_is_dumb) { + flycast_is_dumb = 1; + } else { + while ((system.ISTNRM & ISTNRM__END_OF_RENDER_VIDEO) == 0); + system.ISTNRM = ISTNRM__END_OF_RENDER_VIDEO; + } +} diff --git a/holly/core.hpp b/holly/core.hpp index 0333b52..5714740 100644 --- a/holly/core.hpp +++ b/holly/core.hpp @@ -1,4 +1,5 @@ #pragma once void core_init(); -void core_start_render(int frame_ix, int num_frames); +void core_start_render(uint32_t frame_ix, uint32_t num_frames); +void core_wait_end_of_render_video(uint32_t frame_ix, uint32_t num_frames); diff --git a/holly/region_array.cpp b/holly/region_array.cpp index 3429462..71806f5 100644 --- a/holly/region_array.cpp +++ b/holly/region_array.cpp @@ -26,23 +26,24 @@ struct region_array_entry { // opaque list pointer offset: OPB size * tile index * 4 void region_array(volatile uint32_t * buf, - const uint32_t ol_base, - const uint32_t width, // in tile units (1 tile unit = 32 pixels) - const uint32_t height) // in tile units (1 tile unit = 32 pixels) + const uint32_t ol_base, + const uint32_t width, // in tile units (1 tile unit = 32 pixels) + const uint32_t height) // in tile units (1 tile unit = 32 pixels) { volatile region_array_entry * region_array = reinterpret_cast(buf); uint32_t ix = 0; // create a "dummy region array [item]" for CORE & TA-related bug #21: // "Misshapen tiles or missing tiles occur" + /* region_array[ix].tile = REGION_ARRAY__FLUSH_ACCUMULATE; region_array[ix].opaque_list_pointer = REGION_ARRAY__LIST_POINTER__EMPTY; region_array[ix].opaque_modifier_volume_list_pointer = REGION_ARRAY__LIST_POINTER__EMPTY; region_array[ix].translucent_list_pointer = REGION_ARRAY__LIST_POINTER__EMPTY; region_array[ix].translucent_modifier_volume_list_pointer = REGION_ARRAY__LIST_POINTER__EMPTY; region_array[ix].punch_through_list_pointer = REGION_ARRAY__LIST_POINTER__EMPTY; - ix += 1; + */ for (uint32_t y = 0; y < height; y++) { for (uint32_t x = 0; x < width; x++) { @@ -50,7 +51,7 @@ void region_array(volatile uint32_t * buf, | REGION_ARRAY__TILE_X_POSITION(x); if (y == (height - 1) && x == (width - 1)) - region_array[ix].tile |= REGION_ARRAY__LAST_REGION; + region_array[ix].tile |= REGION_ARRAY__LAST_REGION; uint32_t tile_index = y * width + x; constexpr uint32_t opaque_list_opb_size = 16 * 4; // in bytes; this must match O_OPB in TA_ALLOC_CTRL @@ -65,15 +66,84 @@ void region_array(volatile uint32_t * buf, } } +void region_array2(volatile uint32_t * buf, + const uint32_t ol_base, + const uint32_t width, // in tile units (1 tile unit = 32 pixels) + const uint32_t height, // in tile units (1 tile unit = 32 pixels) + const struct opb_size& opb_size) +{ + volatile region_array_entry * region_array = reinterpret_cast(buf); + const uint32_t num_tiles = width * height; + uint32_t ix = 0; + + /* + region_array[ix].tile = REGION_ARRAY__FLUSH_ACCUMULATE; + region_array[ix].opaque_list_pointer = REGION_ARRAY__LIST_POINTER__EMPTY; + region_array[ix].opaque_modifier_volume_list_pointer = REGION_ARRAY__LIST_POINTER__EMPTY; + region_array[ix].translucent_list_pointer = REGION_ARRAY__LIST_POINTER__EMPTY; + region_array[ix].translucent_modifier_volume_list_pointer = REGION_ARRAY__LIST_POINTER__EMPTY; + region_array[ix].punch_through_list_pointer = REGION_ARRAY__LIST_POINTER__EMPTY; + ix += 1; + */ + + for (uint32_t y = 0; y < height; y++) { + for (uint32_t x = 0; x < width; x++) { + region_array[ix].tile = REGION_ARRAY__TILE_Y_POSITION(y) + | REGION_ARRAY__TILE_X_POSITION(x); + + if (y == (height - 1) && x == (width - 1)) + region_array[ix].tile |= REGION_ARRAY__LAST_REGION; + + uint32_t tile_index = y * width + x; + constexpr uint32_t opaque_list_opb_size = 16 * 4; // in bytes; this must match O_OPB in TA_ALLOC_CTRL + constexpr uint32_t translucent_list_opb_size = 16 * 4; // in bytes; this must match O_OPB in TA_ALLOC_CTRL + region_array[ix].opaque_list_pointer = (opb_size.opaque == 0) ? REGION_ARRAY__LIST_POINTER__EMPTY : + (ol_base + (opb_size.opaque * tile_index) + ); + + region_array[ix].opaque_modifier_volume_list_pointer = (opb_size.opaque_modifier == 0) ? REGION_ARRAY__LIST_POINTER__EMPTY : + (ol_base + num_tiles * ( opb_size.opaque + ) + + (opb_size.opaque_modifier * tile_index) + ); + + region_array[ix].translucent_list_pointer = (opb_size.translucent == 0) ? REGION_ARRAY__LIST_POINTER__EMPTY : + (ol_base + num_tiles * ( opb_size.opaque + + opb_size.opaque_modifier + ) + + (opb_size.translucent * tile_index) + ); + region_array[ix].translucent_modifier_volume_list_pointer = (opb_size.translucent_modifier == 0) ? REGION_ARRAY__LIST_POINTER__EMPTY : + (ol_base + num_tiles * ( opb_size.opaque + + opb_size.opaque_modifier + + opb_size.translucent + ) + + (opb_size.translucent_modifier * tile_index) + ); + region_array[ix].punch_through_list_pointer = (opb_size.punch_through == 0) ? REGION_ARRAY__LIST_POINTER__EMPTY : + (ol_base + num_tiles * ( opb_size.opaque + + opb_size.opaque_modifier + + opb_size.translucent + + opb_size.translucent_modifier + ) + + (opb_size.punch_through * tile_index) + ); + + ix += 1; + } + } +} + void region_array_multipass(volatile uint32_t * buf, - const uint32_t ol_base, - const uint32_t width, // in tile units (1 tile unit = 32 pixels) - const uint32_t height, // in tile units (1 tile unit = 32 pixels) - const uint32_t num_render_passes) + const uint32_t ol_base, + const uint32_t width, // in tile units (1 tile unit = 32 pixels) + const uint32_t height, // in tile units (1 tile unit = 32 pixels) + const uint32_t num_render_passes) { volatile region_array_entry * region_array = reinterpret_cast(buf); uint32_t ix = 0; + /* // create a "dummy region array [item]" for CORE & TA-related bug #21: // "Misshapen tiles or missing tiles occur" region_array[ix].tile = REGION_ARRAY__FLUSH_ACCUMULATE; @@ -82,38 +152,51 @@ void region_array_multipass(volatile uint32_t * buf, region_array[ix].translucent_list_pointer = REGION_ARRAY__LIST_POINTER__EMPTY; region_array[ix].translucent_modifier_volume_list_pointer = REGION_ARRAY__LIST_POINTER__EMPTY; region_array[ix].punch_through_list_pointer = REGION_ARRAY__LIST_POINTER__EMPTY; - ix += 1; + */ - constexpr uint32_t opaque_list_opb_size = 16 * 4; // for a single OPB in bytes; this must match O_OPB in TA_ALLOC_CTRL - const uint32_t opaque_opb_render_pass_size = width * height * opaque_list_opb_size; // the sum of the size of all OPB for a single pass + constexpr uint32_t list_opb_size = 16 * 4; // for a single OPB in bytes; this must match O_OPB in TA_ALLOC_CTRL + const uint32_t opb_render_pass_size = width * height * list_opb_size; // the sum of the size of all OPB for a single pass for (uint32_t y = 0; y < height; y++) { for (uint32_t x = 0; x < width; x++) { + + region_array[ix].tile = REGION_ARRAY__TILE_Y_POSITION(y) + | REGION_ARRAY__TILE_X_POSITION(x); + + /* 0x10 = FLUSH_ACCUMULATE + 0x50 = FLUSH_ACCUMULATE | Z_CLEAR + 0x40 = Z_CLEAR + */ + for (uint32_t render_pass = 0; render_pass < num_render_passes; render_pass++) { + if (render_pass != (num_render_passes - 1)) + region_array[ix].tile |= REGION_ARRAY__FLUSH_ACCUMULATE; - region_array[ix].tile = REGION_ARRAY__TILE_Y_POSITION(y) - | REGION_ARRAY__TILE_X_POSITION(x); + if (render_pass > 0) + region_array[ix].tile |= REGION_ARRAY__Z_CLEAR; - if (render_pass != (num_render_passes - 1)) - region_array[ix].tile |= REGION_ARRAY__FLUSH_ACCUMULATE; + uint32_t tile_index = y * width + x; + uint32_t pass_ol_base = ol_base + (opb_render_pass_size * render_pass); + if (render_pass == 0) { + region_array[ix].opaque_list_pointer = pass_ol_base + (list_opb_size * tile_index); + region_array[ix].opaque_modifier_volume_list_pointer = REGION_ARRAY__LIST_POINTER__EMPTY; + region_array[ix].translucent_list_pointer = REGION_ARRAY__LIST_POINTER__EMPTY; + region_array[ix].translucent_modifier_volume_list_pointer = REGION_ARRAY__LIST_POINTER__EMPTY; + region_array[ix].punch_through_list_pointer = REGION_ARRAY__LIST_POINTER__EMPTY; + } else { + // (list_opb_size * width * height) + + region_array[ix].opaque_list_pointer = REGION_ARRAY__LIST_POINTER__EMPTY; + region_array[ix].opaque_modifier_volume_list_pointer = REGION_ARRAY__LIST_POINTER__EMPTY; + region_array[ix].translucent_list_pointer = pass_ol_base + (list_opb_size * tile_index); + region_array[ix].translucent_modifier_volume_list_pointer = REGION_ARRAY__LIST_POINTER__EMPTY; + region_array[ix].punch_through_list_pointer = REGION_ARRAY__LIST_POINTER__EMPTY; + } - if (render_pass > 0) - region_array[ix].tile |= REGION_ARRAY__Z_CLEAR; + if (y == (height - 1) && x == (width - 1)) + region_array[ix].tile |= REGION_ARRAY__LAST_REGION; - if (render_pass == (num_render_passes - 1) && - y == (height - 1) && x == (width - 1)) - region_array[ix].tile |= REGION_ARRAY__LAST_REGION; - - uint32_t tile_index = y * width + x; - uint32_t pass_ol_base = ol_base + (opaque_opb_render_pass_size * render_pass); - region_array[ix].opaque_list_pointer = pass_ol_base + (opaque_list_opb_size * tile_index); - region_array[ix].opaque_modifier_volume_list_pointer = REGION_ARRAY__LIST_POINTER__EMPTY; - region_array[ix].translucent_list_pointer = REGION_ARRAY__LIST_POINTER__EMPTY; - region_array[ix].translucent_modifier_volume_list_pointer = REGION_ARRAY__LIST_POINTER__EMPTY; - region_array[ix].punch_through_list_pointer = REGION_ARRAY__LIST_POINTER__EMPTY; - - ix++; + ix += 1; } } } diff --git a/holly/region_array.hpp b/holly/region_array.hpp index c7c24bc..0f4a82d 100644 --- a/holly/region_array.hpp +++ b/holly/region_array.hpp @@ -2,11 +2,34 @@ #include +struct opb_size { + uint32_t opaque; + uint32_t opaque_modifier; + uint32_t translucent; + uint32_t translucent_modifier; + uint32_t punch_through; + + uint32_t total() const + { + return opaque + + opaque_modifier + + translucent + + translucent_modifier + + punch_through; + } +}; + void region_array(volatile uint32_t * buf, const uint32_t ol_base, const uint32_t width, // in tile units (1 tile unit = 32 pixels) const uint32_t height); // in tile units (1 tile unit = 32 pixels) +void region_array2(volatile uint32_t * buf, + const uint32_t ol_base, + const uint32_t width, // in tile units (1 tile unit = 32 pixels) + const uint32_t height, // in tile units (1 tile unit = 32 pixels) + const struct opb_size& opb_size); + void region_array_multipass(volatile uint32_t * buf, const uint32_t ol_base, const uint32_t width, // in tile units (1 tile unit = 32 pixels) diff --git a/holly/ta_fifo_polygon_converter.cpp b/holly/ta_fifo_polygon_converter.cpp index 46252e5..a005072 100644 --- a/holly/ta_fifo_polygon_converter.cpp +++ b/holly/ta_fifo_polygon_converter.cpp @@ -12,7 +12,8 @@ #include "ta_fifo_polygon_converter.hpp" -void ta_polygon_converter_init(uint32_t opb_total_size) // for all render passes +void ta_polygon_converter_init(uint32_t opb_total_size, // for all render passes + uint32_t ta_alloc) { holly.SOFTRESET = softreset::ta_soft_reset; holly.SOFTRESET = 0; @@ -21,11 +22,7 @@ void ta_polygon_converter_init(uint32_t opb_total_size) // for all render passes | ta_glob_tile_clip::tile_x_num((640 / 32) - 1); holly.TA_ALLOC_CTRL = ta_alloc_ctrl::opb_mode::increasing_addresses - | ta_alloc_ctrl::pt_opb::no_list - | ta_alloc_ctrl::tm_opb::no_list - | ta_alloc_ctrl::t_opb::no_list - | ta_alloc_ctrl::om_opb::no_list - | ta_alloc_ctrl::o_opb::_16x4byte; + | ta_alloc; holly.TA_ISP_BASE = (offsetof (struct texture_memory_alloc, isp_tsp_parameters)); holly.TA_ISP_LIMIT = (offsetof (struct texture_memory_alloc, object_list)); // the end of isp_tsp_parameters @@ -40,15 +37,11 @@ void ta_polygon_converter_init(uint32_t opb_total_size) // for all render passes (void)_dummy_read; } -void ta_polygon_converter_cont(uint32_t ol_base_offset) +void ta_polygon_converter_cont(uint32_t ol_base_offset, + uint32_t ta_alloc) { holly.TA_ALLOC_CTRL = ta_alloc_ctrl::opb_mode::increasing_addresses - | ta_alloc_ctrl::pt_opb::no_list - | ta_alloc_ctrl::tm_opb::no_list - | ta_alloc_ctrl::t_opb::_16x4byte - | ta_alloc_ctrl::om_opb::no_list - | ta_alloc_ctrl::o_opb::no_list; - + | ta_alloc; holly.TA_OL_BASE = (offsetof (struct texture_memory_alloc, object_list)) + ol_base_offset; @@ -58,7 +51,6 @@ void ta_polygon_converter_cont(uint32_t ol_base_offset) (void)_dummy_read; } - void ta_polygon_converter_transfer(volatile uint32_t * buf, uint32_t size) { /* wait for previous transfer to complete (if any) */ diff --git a/holly/ta_fifo_polygon_converter.hpp b/holly/ta_fifo_polygon_converter.hpp index 9b92a09..b401192 100644 --- a/holly/ta_fifo_polygon_converter.hpp +++ b/holly/ta_fifo_polygon_converter.hpp @@ -2,8 +2,10 @@ #include -void ta_polygon_converter_init(uint32_t opb_total_size); // total OPB size for all render passes -void ta_polygon_converter_cont(uint32_t ol_base_offset); +void ta_polygon_converter_init(uint32_t opb_total_size, // total OPB size for all render passes + uint32_t ta_alloc); +void ta_polygon_converter_cont(uint32_t ol_base_offset, + uint32_t ta_alloc); void ta_polygon_converter_transfer(volatile uint32_t * buf, uint32_t size); void ta_wait_opaque_list(); void ta_wait_translucent_list(); diff --git a/holly/ta_parameter.hpp b/holly/ta_parameter.hpp index eb0b1a9..a30b255 100644 --- a/holly/ta_parameter.hpp +++ b/holly/ta_parameter.hpp @@ -151,8 +151,8 @@ struct global_polygon_type_0 { , isp_tsp_instruction_word( isp_tsp_instruction_word::depth_compare_mode::always | isp_tsp_instruction_word::culling_mode::no_culling ) - , tsp_instruction_word( tsp_instruction_word::src_alpha_instr::one - | tsp_instruction_word::dst_alpha_instr::src_alpha + , tsp_instruction_word( tsp_instruction_word::src_alpha_instr::src_alpha + | tsp_instruction_word::dst_alpha_instr::inverse_src_alpha | tsp_instruction_word::fog_control::no_fog | tsp_instruction_word::use_alpha ) diff --git a/holly/texture_memory_alloc.hpp b/holly/texture_memory_alloc.hpp index 4a934cf..bc280c2 100644 --- a/holly/texture_memory_alloc.hpp +++ b/holly/texture_memory_alloc.hpp @@ -8,15 +8,19 @@ (that is, it is significantly more space than required for trivial TA/CORE drawings) + + region-array[0x00004000 / 4] is enough space for 2 render passes. */ struct texture_memory_alloc { uint32_t isp_tsp_parameters[0x00100000 / 4]; // TA_ISP_BASE / PARAM_BASE (the actual objects) uint32_t object_list[0x00100000 / 4]; // TA_OL_BASE (contains object pointer blocks) uint32_t _res0[ 0x20 / 4]; // (the TA may clobber 4 bytes starting at TA_OL_LIMIT) - uint32_t region_array[0x00002000 / 4]; // REGION_BASE + uint32_t region_array[0x00004000 / 4]; // REGION_BASE uint32_t background[0x00000040 / 4]; // ISP_BACKGND_T uint32_t framebuffer[2][0x00096000 / 4]; // FB_R_SOF1 / FB_W_SOF1 uint32_t _res1[ 0x20 / 4]; // (re-align texture to a 64-byte boundary) uint16_t texture[128 * 128 * 2 / 2]; // texture_control_word::texture_address }; + +static_assert((sizeof (texture_memory_alloc)) < 0x1000000);