suzanne_profile: remove tearing
Though I did spend much time thinking about this, my idea was not correct. The "tearing" and "previous frame is being shown while it is being drawn" is simply because that's exactly what the logic in holly/core.cpp did. This is no longer the case--by the time the newly-created core_flip function is called, the core render is complete, and we should switch the FB_R_SOF1 to the current framebuffer, not the one that is going to be written on next frame. This also modifies alt.lds so that (non-startup) code now runs in the P1 area, with operand/instruction/copyback caches enabled. This caused a 10x speed increase in my testing.
This commit is contained in:
parent
b17e075138
commit
a71ac1c4b1
17
alt.lds
17
alt.lds
@ -1,16 +1,23 @@
|
||||
OUTPUT_FORMAT("elf32-shl", "elf32-shl", "elf32-shl")
|
||||
MEMORY
|
||||
{
|
||||
p1ram : ORIGIN = 0xac020000, LENGTH = 0xff0000
|
||||
p1ram : ORIGIN = 0x8c020000, LENGTH = 0xff0000
|
||||
p2ram : ORIGIN = 0xac020000, LENGTH = 0xff0000
|
||||
}
|
||||
SECTIONS
|
||||
{
|
||||
. = ORIGIN(p1ram);
|
||||
. = ORIGIN(p2ram);
|
||||
|
||||
.text ALIGN(4) : SUBALIGN(4)
|
||||
{
|
||||
KEEP(*(.text.start))
|
||||
*(.text.startup.*)
|
||||
} > p2ram AT>p1ram
|
||||
|
||||
. = ORIGIN(p1ram) + (. - ORIGIN(p2ram));
|
||||
|
||||
.text ALIGN(4) : SUBALIGN(4)
|
||||
{
|
||||
*(.text.*)
|
||||
*(.text)
|
||||
} > p1ram
|
||||
@ -33,12 +40,6 @@ SECTIONS
|
||||
KEEP(*(.ctors.*))
|
||||
} > p1ram
|
||||
|
||||
.text.p2ram ALIGN(4) : SUBALIGN(4)
|
||||
{
|
||||
*(.p2ram)
|
||||
*(.p2ram.*)
|
||||
} > p1ram
|
||||
|
||||
.bss ALIGN(4) (NOLOAD) : SUBALIGN(4)
|
||||
{
|
||||
*(.bss)
|
||||
|
@ -16,12 +16,17 @@
|
||||
#include "holly/background.hpp"
|
||||
#include "holly/texture_memory_alloc.hpp"
|
||||
#include "memorymap.hpp"
|
||||
#include "sh7091/sh7091.hpp"
|
||||
#include "sh7091/sh7091_bits.hpp"
|
||||
#include "sh7091/serial.hpp"
|
||||
|
||||
#include "geometry/suzanne.hpp"
|
||||
#include "geometry/circle.hpp"
|
||||
#include "math/vec4.hpp"
|
||||
|
||||
#include "font/font_bitmap.hpp"
|
||||
#include "verite_8x16.hpp"
|
||||
#include "string.hpp"
|
||||
|
||||
constexpr float half_degree = 0.01745329f / 2;
|
||||
|
||||
@ -235,8 +240,39 @@ void init_texture_memory(const struct opb_size& opb_size)
|
||||
|
||||
uint32_t _ta_parameter_buf[((32 * 8192) + 32) / 4];
|
||||
|
||||
static inline void label_number(ta_parameter_writer& parameter,
|
||||
const char * label,
|
||||
const uint32_t len,
|
||||
const uint32_t number,
|
||||
const uint32_t row)
|
||||
{
|
||||
constexpr uint32_t max_label_len = 10;
|
||||
char buf[8];
|
||||
|
||||
string::hex(buf, 8, number);
|
||||
font_bitmap::transform_string(parameter,
|
||||
8, 16, // texture
|
||||
8, 16, // glyph
|
||||
16 + (8 * (max_label_len - len)), // position x
|
||||
16 * row, // position y
|
||||
label, len);
|
||||
font_bitmap::transform_string(parameter,
|
||||
8, 16, // texture
|
||||
8, 16, // glyph
|
||||
16 + (8 * (max_label_len + 1)), // position x
|
||||
16 * row, // position y
|
||||
buf, 8);
|
||||
}
|
||||
|
||||
void main()
|
||||
{
|
||||
sh7091.TMU.TSTR = 0; // stop all timers
|
||||
sh7091.TMU.TOCR = tmu::tocr::tcoe::tclk_is_external_clock_or_input_capture;
|
||||
sh7091.TMU.TCR0 = tmu::tcr0::tpsc::p_phi_256; // 256 / 200MHz = 1.28 μs ; underflows in ~1 hour
|
||||
sh7091.TMU.TCOR0 = 0xffff'ffff;
|
||||
sh7091.TMU.TCNT0 = 0xffff'ffff;
|
||||
sh7091.TMU.TSTR = tmu::tstr::str0::counter_start;
|
||||
|
||||
vga();
|
||||
|
||||
auto src = reinterpret_cast<const uint8_t *>(&_binary_verite_8x16_data_start);
|
||||
@ -282,45 +318,86 @@ void main()
|
||||
{0.f, 0.f, 0.f},
|
||||
};
|
||||
|
||||
uint32_t t_transform_start = 0;
|
||||
uint32_t t_transform_end = 0;
|
||||
uint32_t t_text_start = 0;
|
||||
uint32_t t_text_end = 0;
|
||||
uint32_t t_transfer_start = 0;
|
||||
uint32_t t_transfer_end = 0;
|
||||
uint32_t t_render_start = 0;
|
||||
uint32_t t_render_end = 0;
|
||||
|
||||
while (1) {
|
||||
ta_polygon_converter_init(opb_size.total(),
|
||||
ta_alloc,
|
||||
640 / 32,
|
||||
480 / 32);
|
||||
|
||||
float theta2 = 3.14 * 2 * sin(theta / 7);
|
||||
|
||||
lights[0].x = cos(theta) * 15;
|
||||
lights[0].z = sin(theta) * 15;
|
||||
|
||||
lights[1].x = cos(theta2 + half_degree * 180.f) * 15;
|
||||
lights[1].z = sin(theta2 + half_degree * 180.f) * 15;
|
||||
|
||||
lights[2].x = cos(theta + half_degree * 360.f) * 15;
|
||||
lights[2].z = sin(theta + half_degree * 360.f) * 15;
|
||||
|
||||
auto parameter = ta_parameter_writer(ta_parameter_buf);
|
||||
for (uint32_t i = 0; i < MODEL::num_faces; i++) {
|
||||
transform(parameter, i, theta, lights);
|
||||
}
|
||||
transform2(parameter, lights[0], {1.f, 0.f, 0.f, 1.f});
|
||||
transform2(parameter, lights[1], {0.f, 1.f, 0.f, 1.f});
|
||||
transform2(parameter, lights[2], {0.f, 0.f, 1.f, 1.f});
|
||||
|
||||
font_bitmap::transform_string(parameter,
|
||||
8, 16, // texture
|
||||
8, 16, // glyph
|
||||
40, 40, // position
|
||||
"test", 4);
|
||||
// transform start
|
||||
t_transform_start = sh7091.TMU.TCNT0;
|
||||
{
|
||||
const float theta2 = 3.14 * 2 * sin(theta / 7);
|
||||
|
||||
lights[0].x = cos(theta) * 15;
|
||||
lights[0].z = sin(theta) * 15;
|
||||
|
||||
lights[1].x = cos(theta2 + half_degree * 180.f) * 15;
|
||||
lights[1].z = sin(theta2 + half_degree * 180.f) * 15;
|
||||
|
||||
lights[2].x = cos(theta + half_degree * 360.f) * 15;
|
||||
lights[2].z = sin(theta + half_degree * 360.f) * 15;
|
||||
|
||||
for (uint32_t i = 0; i < MODEL::num_faces; i++) {
|
||||
transform(parameter, i, theta, lights);
|
||||
}
|
||||
transform2(parameter, lights[0], {1.f, 0.f, 0.f, 1.f});
|
||||
transform2(parameter, lights[1], {0.f, 1.f, 0.f, 1.f});
|
||||
transform2(parameter, lights[2], {0.f, 0.f, 1.f, 1.f});
|
||||
}
|
||||
t_transform_end = sh7091.TMU.TCNT0;
|
||||
// transform end
|
||||
|
||||
uint32_t _t_text_start = sh7091.TMU.TCNT0;
|
||||
{
|
||||
|
||||
const uint32_t transform = t_transform_start - t_transform_end;
|
||||
label_number(parameter, "transform:", 10, transform, 1);
|
||||
|
||||
const uint32_t text = t_text_start - t_text_end;
|
||||
label_number(parameter, "text:", 5, text, 2);
|
||||
|
||||
const uint32_t transfer = t_transfer_start - t_transfer_end;
|
||||
label_number(parameter, "transfer:", 9, transfer, 3);
|
||||
|
||||
const uint32_t render = t_render_start - t_render_end;
|
||||
label_number(parameter, "render:", 7, render, 4);
|
||||
}
|
||||
t_text_start = _t_text_start;
|
||||
t_text_end = sh7091.TMU.TCNT0;
|
||||
|
||||
parameter.append<ta_global_parameter::end_of_list>() = ta_global_parameter::end_of_list(para_control::para_type::end_of_list);
|
||||
ta_polygon_converter_transfer(ta_parameter_buf, parameter.offset);
|
||||
ta_wait_opaque_list();
|
||||
core_start_render(frame_ix, num_frames);
|
||||
|
||||
v_sync_out();
|
||||
core_wait_end_of_render_video(frame_ix, num_frames);
|
||||
theta += half_degree;
|
||||
// transfer start
|
||||
t_transfer_start = sh7091.TMU.TCNT0;
|
||||
{
|
||||
ta_polygon_converter_transfer(ta_parameter_buf, parameter.offset);
|
||||
ta_wait_opaque_list();
|
||||
}
|
||||
t_transfer_end = sh7091.TMU.TCNT0;
|
||||
|
||||
t_render_start = sh7091.TMU.TCNT0;
|
||||
core_start_render(frame_ix, num_frames);
|
||||
core_wait_end_of_render_video();
|
||||
t_render_end = sh7091.TMU.TCNT0;
|
||||
|
||||
while (!spg_status::vsync(holly.SPG_STATUS)) {
|
||||
}
|
||||
core_flip(frame_ix, num_frames);
|
||||
while (spg_status::vsync(holly.SPG_STATUS)) {
|
||||
}
|
||||
|
||||
theta += half_degree * 0.5;
|
||||
frame_ix += 1;
|
||||
}
|
||||
}
|
||||
|
@ -152,7 +152,7 @@ void transform_string(ta_parameter_writer& parameter,
|
||||
|
||||
x *= static_cast<float>(glyph_width * 1);
|
||||
y *= static_cast<float>(glyph_height * 1);
|
||||
x += static_cast<float>(position_x + glyph_width * 4 * string_ix);
|
||||
x += static_cast<float>(position_x + glyph_width * string_ix);
|
||||
y += static_cast<float>(position_y);
|
||||
z = 1.f / (z + 10.f);
|
||||
|
||||
|
@ -67,7 +67,7 @@ void core_start_render(uint32_t frame_address,
|
||||
holly.FB_W_CTRL = fb_w_ctrl::fb_dither | fb_w_ctrl::fb_packmode::_565_rgb_16bit;
|
||||
holly.FB_W_LINESTRIDE = (frame_linestride * 2) / 8;
|
||||
|
||||
uint32_t w_fb = ((frame_ix + 0) & num_frames) * frame_size;
|
||||
uint32_t w_fb = (frame_ix & num_frames) * frame_size;
|
||||
holly.FB_W_SOF1 = frame_address + w_fb;
|
||||
|
||||
holly.STARTRENDER = 1;
|
||||
@ -95,11 +95,8 @@ void core_wait_end_of_render_video()
|
||||
| ISTNRM__END_OF_RENDER_VIDEO;
|
||||
}
|
||||
|
||||
void core_wait_end_of_render_video(uint32_t frame_ix, uint32_t num_frames)
|
||||
void core_flip(uint32_t frame_ix, uint32_t num_frames)
|
||||
{
|
||||
core_wait_end_of_render_video();
|
||||
|
||||
// hmm hacky...
|
||||
uint32_t r_fb = ((frame_ix + 1) & num_frames) * 0x00096000;
|
||||
uint32_t r_fb = (frame_ix & num_frames) * 0x00096000;
|
||||
holly.FB_R_SOF1 = (offsetof (struct texture_memory_alloc, framebuffer)) + r_fb;
|
||||
}
|
||||
|
@ -9,4 +9,4 @@ void core_start_render(uint32_t frame_address,
|
||||
void core_start_render(uint32_t frame_ix, uint32_t num_frames);
|
||||
|
||||
void core_wait_end_of_render_video();
|
||||
void core_wait_end_of_render_video(uint32_t frame_ix, uint32_t num_frames);
|
||||
void core_flip(uint32_t frame_ix, uint32_t num_frames);
|
||||
|
@ -25,7 +25,7 @@ void init()
|
||||
| ice::ic_used // instruction cache enable
|
||||
| oci::clear_v_and_u_bits_of_all_oc_entries // operand cache invalidate
|
||||
| oce::oc_used // operand cache enable
|
||||
// | cb::copy_back_mode // enable copy-back mode for the P1 area
|
||||
| cb::copy_back_mode // enable copy-back mode for the P1 area
|
||||
;
|
||||
|
||||
sh7091.CCN.MMUCR = ccn::mmucr::at::mmu_disabled;
|
||||
|
@ -2,6 +2,6 @@
|
||||
|
||||
namespace cache {
|
||||
|
||||
void init() __attribute__ ((section (".p2ram.cache_init")));
|
||||
void init() __attribute__ ((section (".text.startup.cache_init")));
|
||||
|
||||
}
|
||||
|
@ -33,7 +33,7 @@ void character(const char c)
|
||||
// wait for transmit fifo to become empty
|
||||
while ((sh7091.SCIF.SCFSR2 & scfsr2::tdfe::bit_mask) == 0);
|
||||
|
||||
for (int i = 0; i < 100000; i++) {
|
||||
for (int i = 0; i < 10000; i++) {
|
||||
asm volatile ("nop;");
|
||||
}
|
||||
|
||||
|
4
vga.cpp
4
vga.cpp
@ -100,20 +100,16 @@ void vga2()
|
||||
void v_sync_in()
|
||||
{
|
||||
while (!spg_status::vsync(holly.SPG_STATUS)) {
|
||||
asm volatile ("nop");
|
||||
}
|
||||
while (spg_status::vsync(holly.SPG_STATUS)) {
|
||||
asm volatile ("nop");
|
||||
}
|
||||
}
|
||||
|
||||
void v_sync_out()
|
||||
{
|
||||
while (spg_status::vsync(holly.SPG_STATUS)) {
|
||||
asm volatile ("nop");
|
||||
}
|
||||
while (!spg_status::vsync(holly.SPG_STATUS)) {
|
||||
asm volatile ("nop");
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user