suzanne_profile: remove tearing

Though I did spend much time thinking about this, my idea was not correct.

The "tearing" and "previous frame is being shown while it is being drawn" is
simply because that's exactly what the logic in holly/core.cpp did.

This is no longer the case--by the time the newly-created core_flip function is
called, the core render is complete, and we should switch the FB_R_SOF1 to the
current framebuffer, not the one that is going to be written on next frame.

This also modifies alt.lds so that (non-startup) code now runs in the P1 area,
with operand/instruction/copyback caches enabled. This caused a 10x speed
increase in my testing.
This commit is contained in:
Zack Buhman 2024-02-02 13:11:32 +08:00
parent b17e075138
commit a71ac1c4b1
9 changed files with 123 additions and 52 deletions

17
alt.lds
View File

@ -1,16 +1,23 @@
OUTPUT_FORMAT("elf32-shl", "elf32-shl", "elf32-shl") OUTPUT_FORMAT("elf32-shl", "elf32-shl", "elf32-shl")
MEMORY MEMORY
{ {
p1ram : ORIGIN = 0xac020000, LENGTH = 0xff0000 p1ram : ORIGIN = 0x8c020000, LENGTH = 0xff0000
p2ram : ORIGIN = 0xac020000, LENGTH = 0xff0000
} }
SECTIONS SECTIONS
{ {
. = ORIGIN(p1ram); . = ORIGIN(p2ram);
.text ALIGN(4) : SUBALIGN(4) .text ALIGN(4) : SUBALIGN(4)
{ {
KEEP(*(.text.start)) KEEP(*(.text.start))
*(.text.startup.*) *(.text.startup.*)
} > p2ram AT>p1ram
. = ORIGIN(p1ram) + (. - ORIGIN(p2ram));
.text ALIGN(4) : SUBALIGN(4)
{
*(.text.*) *(.text.*)
*(.text) *(.text)
} > p1ram } > p1ram
@ -33,12 +40,6 @@ SECTIONS
KEEP(*(.ctors.*)) KEEP(*(.ctors.*))
} > p1ram } > p1ram
.text.p2ram ALIGN(4) : SUBALIGN(4)
{
*(.p2ram)
*(.p2ram.*)
} > p1ram
.bss ALIGN(4) (NOLOAD) : SUBALIGN(4) .bss ALIGN(4) (NOLOAD) : SUBALIGN(4)
{ {
*(.bss) *(.bss)

View File

@ -16,12 +16,17 @@
#include "holly/background.hpp" #include "holly/background.hpp"
#include "holly/texture_memory_alloc.hpp" #include "holly/texture_memory_alloc.hpp"
#include "memorymap.hpp" #include "memorymap.hpp"
#include "sh7091/sh7091.hpp"
#include "sh7091/sh7091_bits.hpp"
#include "sh7091/serial.hpp"
#include "geometry/suzanne.hpp" #include "geometry/suzanne.hpp"
#include "geometry/circle.hpp"
#include "math/vec4.hpp" #include "math/vec4.hpp"
#include "font/font_bitmap.hpp" #include "font/font_bitmap.hpp"
#include "verite_8x16.hpp" #include "verite_8x16.hpp"
#include "string.hpp"
constexpr float half_degree = 0.01745329f / 2; constexpr float half_degree = 0.01745329f / 2;
@ -235,8 +240,39 @@ void init_texture_memory(const struct opb_size& opb_size)
uint32_t _ta_parameter_buf[((32 * 8192) + 32) / 4]; uint32_t _ta_parameter_buf[((32 * 8192) + 32) / 4];
static inline void label_number(ta_parameter_writer& parameter,
const char * label,
const uint32_t len,
const uint32_t number,
const uint32_t row)
{
constexpr uint32_t max_label_len = 10;
char buf[8];
string::hex(buf, 8, number);
font_bitmap::transform_string(parameter,
8, 16, // texture
8, 16, // glyph
16 + (8 * (max_label_len - len)), // position x
16 * row, // position y
label, len);
font_bitmap::transform_string(parameter,
8, 16, // texture
8, 16, // glyph
16 + (8 * (max_label_len + 1)), // position x
16 * row, // position y
buf, 8);
}
void main() void main()
{ {
sh7091.TMU.TSTR = 0; // stop all timers
sh7091.TMU.TOCR = tmu::tocr::tcoe::tclk_is_external_clock_or_input_capture;
sh7091.TMU.TCR0 = tmu::tcr0::tpsc::p_phi_256; // 256 / 200MHz = 1.28 μs ; underflows in ~1 hour
sh7091.TMU.TCOR0 = 0xffff'ffff;
sh7091.TMU.TCNT0 = 0xffff'ffff;
sh7091.TMU.TSTR = tmu::tstr::str0::counter_start;
vga(); vga();
auto src = reinterpret_cast<const uint8_t *>(&_binary_verite_8x16_data_start); auto src = reinterpret_cast<const uint8_t *>(&_binary_verite_8x16_data_start);
@ -282,45 +318,86 @@ void main()
{0.f, 0.f, 0.f}, {0.f, 0.f, 0.f},
}; };
uint32_t t_transform_start = 0;
uint32_t t_transform_end = 0;
uint32_t t_text_start = 0;
uint32_t t_text_end = 0;
uint32_t t_transfer_start = 0;
uint32_t t_transfer_end = 0;
uint32_t t_render_start = 0;
uint32_t t_render_end = 0;
while (1) { while (1) {
ta_polygon_converter_init(opb_size.total(), ta_polygon_converter_init(opb_size.total(),
ta_alloc, ta_alloc,
640 / 32, 640 / 32,
480 / 32); 480 / 32);
float theta2 = 3.14 * 2 * sin(theta / 7);
lights[0].x = cos(theta) * 15;
lights[0].z = sin(theta) * 15;
lights[1].x = cos(theta2 + half_degree * 180.f) * 15;
lights[1].z = sin(theta2 + half_degree * 180.f) * 15;
lights[2].x = cos(theta + half_degree * 360.f) * 15;
lights[2].z = sin(theta + half_degree * 360.f) * 15;
auto parameter = ta_parameter_writer(ta_parameter_buf); auto parameter = ta_parameter_writer(ta_parameter_buf);
for (uint32_t i = 0; i < MODEL::num_faces; i++) {
transform(parameter, i, theta, lights);
}
transform2(parameter, lights[0], {1.f, 0.f, 0.f, 1.f});
transform2(parameter, lights[1], {0.f, 1.f, 0.f, 1.f});
transform2(parameter, lights[2], {0.f, 0.f, 1.f, 1.f});
font_bitmap::transform_string(parameter, // transform start
8, 16, // texture t_transform_start = sh7091.TMU.TCNT0;
8, 16, // glyph {
40, 40, // position const float theta2 = 3.14 * 2 * sin(theta / 7);
"test", 4);
lights[0].x = cos(theta) * 15;
lights[0].z = sin(theta) * 15;
lights[1].x = cos(theta2 + half_degree * 180.f) * 15;
lights[1].z = sin(theta2 + half_degree * 180.f) * 15;
lights[2].x = cos(theta + half_degree * 360.f) * 15;
lights[2].z = sin(theta + half_degree * 360.f) * 15;
for (uint32_t i = 0; i < MODEL::num_faces; i++) {
transform(parameter, i, theta, lights);
}
transform2(parameter, lights[0], {1.f, 0.f, 0.f, 1.f});
transform2(parameter, lights[1], {0.f, 1.f, 0.f, 1.f});
transform2(parameter, lights[2], {0.f, 0.f, 1.f, 1.f});
}
t_transform_end = sh7091.TMU.TCNT0;
// transform end
uint32_t _t_text_start = sh7091.TMU.TCNT0;
{
const uint32_t transform = t_transform_start - t_transform_end;
label_number(parameter, "transform:", 10, transform, 1);
const uint32_t text = t_text_start - t_text_end;
label_number(parameter, "text:", 5, text, 2);
const uint32_t transfer = t_transfer_start - t_transfer_end;
label_number(parameter, "transfer:", 9, transfer, 3);
const uint32_t render = t_render_start - t_render_end;
label_number(parameter, "render:", 7, render, 4);
}
t_text_start = _t_text_start;
t_text_end = sh7091.TMU.TCNT0;
parameter.append<ta_global_parameter::end_of_list>() = ta_global_parameter::end_of_list(para_control::para_type::end_of_list); parameter.append<ta_global_parameter::end_of_list>() = ta_global_parameter::end_of_list(para_control::para_type::end_of_list);
ta_polygon_converter_transfer(ta_parameter_buf, parameter.offset);
ta_wait_opaque_list();
core_start_render(frame_ix, num_frames);
v_sync_out(); // transfer start
core_wait_end_of_render_video(frame_ix, num_frames); t_transfer_start = sh7091.TMU.TCNT0;
theta += half_degree; {
ta_polygon_converter_transfer(ta_parameter_buf, parameter.offset);
ta_wait_opaque_list();
}
t_transfer_end = sh7091.TMU.TCNT0;
t_render_start = sh7091.TMU.TCNT0;
core_start_render(frame_ix, num_frames);
core_wait_end_of_render_video();
t_render_end = sh7091.TMU.TCNT0;
while (!spg_status::vsync(holly.SPG_STATUS)) {
}
core_flip(frame_ix, num_frames);
while (spg_status::vsync(holly.SPG_STATUS)) {
}
theta += half_degree * 0.5;
frame_ix += 1; frame_ix += 1;
} }
} }

View File

@ -152,7 +152,7 @@ void transform_string(ta_parameter_writer& parameter,
x *= static_cast<float>(glyph_width * 1); x *= static_cast<float>(glyph_width * 1);
y *= static_cast<float>(glyph_height * 1); y *= static_cast<float>(glyph_height * 1);
x += static_cast<float>(position_x + glyph_width * 4 * string_ix); x += static_cast<float>(position_x + glyph_width * string_ix);
y += static_cast<float>(position_y); y += static_cast<float>(position_y);
z = 1.f / (z + 10.f); z = 1.f / (z + 10.f);

View File

@ -67,7 +67,7 @@ void core_start_render(uint32_t frame_address,
holly.FB_W_CTRL = fb_w_ctrl::fb_dither | fb_w_ctrl::fb_packmode::_565_rgb_16bit; holly.FB_W_CTRL = fb_w_ctrl::fb_dither | fb_w_ctrl::fb_packmode::_565_rgb_16bit;
holly.FB_W_LINESTRIDE = (frame_linestride * 2) / 8; holly.FB_W_LINESTRIDE = (frame_linestride * 2) / 8;
uint32_t w_fb = ((frame_ix + 0) & num_frames) * frame_size; uint32_t w_fb = (frame_ix & num_frames) * frame_size;
holly.FB_W_SOF1 = frame_address + w_fb; holly.FB_W_SOF1 = frame_address + w_fb;
holly.STARTRENDER = 1; holly.STARTRENDER = 1;
@ -95,11 +95,8 @@ void core_wait_end_of_render_video()
| ISTNRM__END_OF_RENDER_VIDEO; | ISTNRM__END_OF_RENDER_VIDEO;
} }
void core_wait_end_of_render_video(uint32_t frame_ix, uint32_t num_frames) void core_flip(uint32_t frame_ix, uint32_t num_frames)
{ {
core_wait_end_of_render_video(); uint32_t r_fb = (frame_ix & num_frames) * 0x00096000;
// hmm hacky...
uint32_t r_fb = ((frame_ix + 1) & num_frames) * 0x00096000;
holly.FB_R_SOF1 = (offsetof (struct texture_memory_alloc, framebuffer)) + r_fb; holly.FB_R_SOF1 = (offsetof (struct texture_memory_alloc, framebuffer)) + r_fb;
} }

View File

@ -9,4 +9,4 @@ void core_start_render(uint32_t frame_address,
void core_start_render(uint32_t frame_ix, uint32_t num_frames); void core_start_render(uint32_t frame_ix, uint32_t num_frames);
void core_wait_end_of_render_video(); void core_wait_end_of_render_video();
void core_wait_end_of_render_video(uint32_t frame_ix, uint32_t num_frames); void core_flip(uint32_t frame_ix, uint32_t num_frames);

View File

@ -25,7 +25,7 @@ void init()
| ice::ic_used // instruction cache enable | ice::ic_used // instruction cache enable
| oci::clear_v_and_u_bits_of_all_oc_entries // operand cache invalidate | oci::clear_v_and_u_bits_of_all_oc_entries // operand cache invalidate
| oce::oc_used // operand cache enable | oce::oc_used // operand cache enable
// | cb::copy_back_mode // enable copy-back mode for the P1 area | cb::copy_back_mode // enable copy-back mode for the P1 area
; ;
sh7091.CCN.MMUCR = ccn::mmucr::at::mmu_disabled; sh7091.CCN.MMUCR = ccn::mmucr::at::mmu_disabled;

View File

@ -2,6 +2,6 @@
namespace cache { namespace cache {
void init() __attribute__ ((section (".p2ram.cache_init"))); void init() __attribute__ ((section (".text.startup.cache_init")));
} }

View File

@ -33,7 +33,7 @@ void character(const char c)
// wait for transmit fifo to become empty // wait for transmit fifo to become empty
while ((sh7091.SCIF.SCFSR2 & scfsr2::tdfe::bit_mask) == 0); while ((sh7091.SCIF.SCFSR2 & scfsr2::tdfe::bit_mask) == 0);
for (int i = 0; i < 100000; i++) { for (int i = 0; i < 10000; i++) {
asm volatile ("nop;"); asm volatile ("nop;");
} }

View File

@ -100,20 +100,16 @@ void vga2()
void v_sync_in() void v_sync_in()
{ {
while (!spg_status::vsync(holly.SPG_STATUS)) { while (!spg_status::vsync(holly.SPG_STATUS)) {
asm volatile ("nop");
} }
while (spg_status::vsync(holly.SPG_STATUS)) { while (spg_status::vsync(holly.SPG_STATUS)) {
asm volatile ("nop");
} }
} }
void v_sync_out() void v_sync_out()
{ {
while (spg_status::vsync(holly.SPG_STATUS)) { while (spg_status::vsync(holly.SPG_STATUS)) {
asm volatile ("nop");
} }
while (!spg_status::vsync(holly.SPG_STATUS)) { while (!spg_status::vsync(holly.SPG_STATUS)) {
asm volatile ("nop");
} }
} }