diff --git a/alt.lds b/alt.lds index 77d5d52..89d4c0b 100644 --- a/alt.lds +++ b/alt.lds @@ -1,16 +1,23 @@ OUTPUT_FORMAT("elf32-shl", "elf32-shl", "elf32-shl") MEMORY { - p1ram : ORIGIN = 0xac020000, LENGTH = 0xff0000 + p1ram : ORIGIN = 0x8c020000, LENGTH = 0xff0000 + p2ram : ORIGIN = 0xac020000, LENGTH = 0xff0000 } SECTIONS { - . = ORIGIN(p1ram); + . = ORIGIN(p2ram); .text ALIGN(4) : SUBALIGN(4) { KEEP(*(.text.start)) *(.text.startup.*) + } > p2ram AT>p1ram + + . = ORIGIN(p1ram) + (. - ORIGIN(p2ram)); + + .text ALIGN(4) : SUBALIGN(4) + { *(.text.*) *(.text) } > p1ram @@ -33,12 +40,6 @@ SECTIONS KEEP(*(.ctors.*)) } > p1ram - .text.p2ram ALIGN(4) : SUBALIGN(4) - { - *(.p2ram) - *(.p2ram.*) - } > p1ram - .bss ALIGN(4) (NOLOAD) : SUBALIGN(4) { *(.bss) diff --git a/example/suzanne_profile.cpp b/example/suzanne_profile.cpp index f427313..5d3c4f9 100644 --- a/example/suzanne_profile.cpp +++ b/example/suzanne_profile.cpp @@ -16,12 +16,17 @@ #include "holly/background.hpp" #include "holly/texture_memory_alloc.hpp" #include "memorymap.hpp" +#include "sh7091/sh7091.hpp" +#include "sh7091/sh7091_bits.hpp" +#include "sh7091/serial.hpp" #include "geometry/suzanne.hpp" +#include "geometry/circle.hpp" #include "math/vec4.hpp" #include "font/font_bitmap.hpp" #include "verite_8x16.hpp" +#include "string.hpp" constexpr float half_degree = 0.01745329f / 2; @@ -235,8 +240,39 @@ void init_texture_memory(const struct opb_size& opb_size) uint32_t _ta_parameter_buf[((32 * 8192) + 32) / 4]; +static inline void label_number(ta_parameter_writer& parameter, + const char * label, + const uint32_t len, + const uint32_t number, + const uint32_t row) +{ + constexpr uint32_t max_label_len = 10; + char buf[8]; + + string::hex(buf, 8, number); + font_bitmap::transform_string(parameter, + 8, 16, // texture + 8, 16, // glyph + 16 + (8 * (max_label_len - len)), // position x + 16 * row, // position y + label, len); + font_bitmap::transform_string(parameter, + 8, 16, // texture + 8, 16, // glyph + 16 + (8 * (max_label_len + 1)), // position x + 16 * row, // position y + buf, 8); +} + void main() { + sh7091.TMU.TSTR = 0; // stop all timers + sh7091.TMU.TOCR = tmu::tocr::tcoe::tclk_is_external_clock_or_input_capture; + sh7091.TMU.TCR0 = tmu::tcr0::tpsc::p_phi_256; // 256 / 200MHz = 1.28 μs ; underflows in ~1 hour + sh7091.TMU.TCOR0 = 0xffff'ffff; + sh7091.TMU.TCNT0 = 0xffff'ffff; + sh7091.TMU.TSTR = tmu::tstr::str0::counter_start; + vga(); auto src = reinterpret_cast(&_binary_verite_8x16_data_start); @@ -282,45 +318,86 @@ void main() {0.f, 0.f, 0.f}, }; + uint32_t t_transform_start = 0; + uint32_t t_transform_end = 0; + uint32_t t_text_start = 0; + uint32_t t_text_end = 0; + uint32_t t_transfer_start = 0; + uint32_t t_transfer_end = 0; + uint32_t t_render_start = 0; + uint32_t t_render_end = 0; + while (1) { ta_polygon_converter_init(opb_size.total(), ta_alloc, 640 / 32, 480 / 32); - - float theta2 = 3.14 * 2 * sin(theta / 7); - - lights[0].x = cos(theta) * 15; - lights[0].z = sin(theta) * 15; - - lights[1].x = cos(theta2 + half_degree * 180.f) * 15; - lights[1].z = sin(theta2 + half_degree * 180.f) * 15; - - lights[2].x = cos(theta + half_degree * 360.f) * 15; - lights[2].z = sin(theta + half_degree * 360.f) * 15; - auto parameter = ta_parameter_writer(ta_parameter_buf); - for (uint32_t i = 0; i < MODEL::num_faces; i++) { - transform(parameter, i, theta, lights); - } - transform2(parameter, lights[0], {1.f, 0.f, 0.f, 1.f}); - transform2(parameter, lights[1], {0.f, 1.f, 0.f, 1.f}); - transform2(parameter, lights[2], {0.f, 0.f, 1.f, 1.f}); - font_bitmap::transform_string(parameter, - 8, 16, // texture - 8, 16, // glyph - 40, 40, // position - "test", 4); + // transform start + t_transform_start = sh7091.TMU.TCNT0; + { + const float theta2 = 3.14 * 2 * sin(theta / 7); + + lights[0].x = cos(theta) * 15; + lights[0].z = sin(theta) * 15; + + lights[1].x = cos(theta2 + half_degree * 180.f) * 15; + lights[1].z = sin(theta2 + half_degree * 180.f) * 15; + + lights[2].x = cos(theta + half_degree * 360.f) * 15; + lights[2].z = sin(theta + half_degree * 360.f) * 15; + + for (uint32_t i = 0; i < MODEL::num_faces; i++) { + transform(parameter, i, theta, lights); + } + transform2(parameter, lights[0], {1.f, 0.f, 0.f, 1.f}); + transform2(parameter, lights[1], {0.f, 1.f, 0.f, 1.f}); + transform2(parameter, lights[2], {0.f, 0.f, 1.f, 1.f}); + } + t_transform_end = sh7091.TMU.TCNT0; + // transform end + + uint32_t _t_text_start = sh7091.TMU.TCNT0; + { + + const uint32_t transform = t_transform_start - t_transform_end; + label_number(parameter, "transform:", 10, transform, 1); + + const uint32_t text = t_text_start - t_text_end; + label_number(parameter, "text:", 5, text, 2); + + const uint32_t transfer = t_transfer_start - t_transfer_end; + label_number(parameter, "transfer:", 9, transfer, 3); + + const uint32_t render = t_render_start - t_render_end; + label_number(parameter, "render:", 7, render, 4); + } + t_text_start = _t_text_start; + t_text_end = sh7091.TMU.TCNT0; parameter.append() = ta_global_parameter::end_of_list(para_control::para_type::end_of_list); - ta_polygon_converter_transfer(ta_parameter_buf, parameter.offset); - ta_wait_opaque_list(); - core_start_render(frame_ix, num_frames); - v_sync_out(); - core_wait_end_of_render_video(frame_ix, num_frames); - theta += half_degree; + // transfer start + t_transfer_start = sh7091.TMU.TCNT0; + { + ta_polygon_converter_transfer(ta_parameter_buf, parameter.offset); + ta_wait_opaque_list(); + } + t_transfer_end = sh7091.TMU.TCNT0; + + t_render_start = sh7091.TMU.TCNT0; + core_start_render(frame_ix, num_frames); + core_wait_end_of_render_video(); + t_render_end = sh7091.TMU.TCNT0; + + while (!spg_status::vsync(holly.SPG_STATUS)) { + } + core_flip(frame_ix, num_frames); + while (spg_status::vsync(holly.SPG_STATUS)) { + } + + theta += half_degree * 0.5; frame_ix += 1; } } diff --git a/font/font_bitmap.cpp b/font/font_bitmap.cpp index 813ceba..f609380 100644 --- a/font/font_bitmap.cpp +++ b/font/font_bitmap.cpp @@ -152,7 +152,7 @@ void transform_string(ta_parameter_writer& parameter, x *= static_cast(glyph_width * 1); y *= static_cast(glyph_height * 1); - x += static_cast(position_x + glyph_width * 4 * string_ix); + x += static_cast(position_x + glyph_width * string_ix); y += static_cast(position_y); z = 1.f / (z + 10.f); diff --git a/holly/core.cpp b/holly/core.cpp index 0ea30c0..90a73c1 100644 --- a/holly/core.cpp +++ b/holly/core.cpp @@ -67,7 +67,7 @@ void core_start_render(uint32_t frame_address, holly.FB_W_CTRL = fb_w_ctrl::fb_dither | fb_w_ctrl::fb_packmode::_565_rgb_16bit; holly.FB_W_LINESTRIDE = (frame_linestride * 2) / 8; - uint32_t w_fb = ((frame_ix + 0) & num_frames) * frame_size; + uint32_t w_fb = (frame_ix & num_frames) * frame_size; holly.FB_W_SOF1 = frame_address + w_fb; holly.STARTRENDER = 1; @@ -95,11 +95,8 @@ void core_wait_end_of_render_video() | ISTNRM__END_OF_RENDER_VIDEO; } -void core_wait_end_of_render_video(uint32_t frame_ix, uint32_t num_frames) +void core_flip(uint32_t frame_ix, uint32_t num_frames) { - core_wait_end_of_render_video(); - - // hmm hacky... - uint32_t r_fb = ((frame_ix + 1) & num_frames) * 0x00096000; + uint32_t r_fb = (frame_ix & num_frames) * 0x00096000; holly.FB_R_SOF1 = (offsetof (struct texture_memory_alloc, framebuffer)) + r_fb; } diff --git a/holly/core.hpp b/holly/core.hpp index b9761dc..95c25e3 100644 --- a/holly/core.hpp +++ b/holly/core.hpp @@ -9,4 +9,4 @@ void core_start_render(uint32_t frame_address, void core_start_render(uint32_t frame_ix, uint32_t num_frames); void core_wait_end_of_render_video(); -void core_wait_end_of_render_video(uint32_t frame_ix, uint32_t num_frames); +void core_flip(uint32_t frame_ix, uint32_t num_frames); diff --git a/sh7091/cache.cpp b/sh7091/cache.cpp index f223b83..ee12709 100644 --- a/sh7091/cache.cpp +++ b/sh7091/cache.cpp @@ -25,7 +25,7 @@ void init() | ice::ic_used // instruction cache enable | oci::clear_v_and_u_bits_of_all_oc_entries // operand cache invalidate | oce::oc_used // operand cache enable - // | cb::copy_back_mode // enable copy-back mode for the P1 area + | cb::copy_back_mode // enable copy-back mode for the P1 area ; sh7091.CCN.MMUCR = ccn::mmucr::at::mmu_disabled; diff --git a/sh7091/cache.hpp b/sh7091/cache.hpp index 192c228..cf2e602 100644 --- a/sh7091/cache.hpp +++ b/sh7091/cache.hpp @@ -2,6 +2,6 @@ namespace cache { -void init() __attribute__ ((section (".p2ram.cache_init"))); +void init() __attribute__ ((section (".text.startup.cache_init"))); } diff --git a/sh7091/serial.cpp b/sh7091/serial.cpp index b3475ed..de42b71 100644 --- a/sh7091/serial.cpp +++ b/sh7091/serial.cpp @@ -33,7 +33,7 @@ void character(const char c) // wait for transmit fifo to become empty while ((sh7091.SCIF.SCFSR2 & scfsr2::tdfe::bit_mask) == 0); - for (int i = 0; i < 100000; i++) { + for (int i = 0; i < 10000; i++) { asm volatile ("nop;"); } diff --git a/vga.cpp b/vga.cpp index 7f4123c..a5af9ad 100644 --- a/vga.cpp +++ b/vga.cpp @@ -100,20 +100,16 @@ void vga2() void v_sync_in() { while (!spg_status::vsync(holly.SPG_STATUS)) { - asm volatile ("nop"); } while (spg_status::vsync(holly.SPG_STATUS)) { - asm volatile ("nop"); } } void v_sync_out() { while (spg_status::vsync(holly.SPG_STATUS)) { - asm volatile ("nop"); } while (!spg_status::vsync(holly.SPG_STATUS)) { - asm volatile ("nop"); } }