From a73327c76db62e3dcf321c1d0cdddb6c4ceacd1c Mon Sep 17 00:00:00 2001 From: Zack Buhman Date: Tue, 5 Dec 2023 20:42:37 +0800 Subject: [PATCH] fix multiple issues Successfully tested on real hardware on multiple optimization levels. I knew in the previous commit that __attribute__((aligned(32))) did not actually align to 32-bytes. However, at -Os specifically, and only with that exact code, GCC was coincidentally generating a 32-byte alignment. When the code or optimization level changed, this changed the alignment of the "scene" buffer, which caused CH2-DMA to perform incomplete copies of the TA parameters, which in turn variously caused the TA to generate incomplete/nonsensical/nonexistent object lists. This also fixes an unrelated issue with the background ISP/TSP parameters. This "worked" in flycast but not on real hardware by complete accident (a coincidence of the specific timing that the ISP/TSP parameters are read in each Dreamcast implementation). The issue is that the TSP parameters are 60 bytes long, which is greater than the 32 bytes were previously being allocated. After changing the allocation to 64 bytes, the background color is now drawn on real hardware as expected. In addition, though this did not cause issues yet, I corrected the length of p1ram/p2ram in the linker script, to prevent future issues where GCC's memory allocations wrap around past the end of the system memory address space. --- alt.lds | 2 +- holly/background.cpp | 6 +++--- holly/texture_memory_alloc.h | 2 +- main.cpp | 12 ++++++++++-- main.lds | 4 ++-- 5 files changed, 17 insertions(+), 9 deletions(-) diff --git a/alt.lds b/alt.lds index 18b36ba..bce99eb 100644 --- a/alt.lds +++ b/alt.lds @@ -1,7 +1,7 @@ OUTPUT_FORMAT("elf32-shl", "elf32-shl", "elf32-shl") MEMORY { - p1ram : ORIGIN = 0xac020000, LENGTH = 16M + p1ram : ORIGIN = 0xac020000, LENGTH = 0xff0000 } SECTIONS { diff --git a/holly/background.cpp b/holly/background.cpp index 8d9dc25..a4da067 100644 --- a/holly/background.cpp +++ b/holly/background.cpp @@ -37,15 +37,15 @@ void background_parameter(volatile uint32_t * buf) parameter->vertex[0].x = 0.f; parameter->vertex[0].y = 0.f; parameter->vertex[0].z = 1.f/100000; - parameter->vertex[0].base_color = 0x00000000; + parameter->vertex[0].base_color = 0xff0000ff; parameter->vertex[1].x = 639.f; parameter->vertex[1].y = 0.f; parameter->vertex[1].z = 1.f/100000; - parameter->vertex[1].base_color = 0x00000000; + parameter->vertex[1].base_color = 0xff0000ff; parameter->vertex[2].x = 639.f; parameter->vertex[2].y = 479.f; parameter->vertex[2].z = 1.f/100000; - parameter->vertex[2].base_color = 0x00000000; + parameter->vertex[2].base_color = 0xff0000ff; } diff --git a/holly/texture_memory_alloc.h b/holly/texture_memory_alloc.h index c831b9b..c6cc208 100644 --- a/holly/texture_memory_alloc.h +++ b/holly/texture_memory_alloc.h @@ -7,6 +7,6 @@ struct texture_memory_alloc { uint32_t object_list[0x00100000 / 4]; // TA_OL_BASE (contains object pointer blocks) uint32_t _res0[ 0x20 / 4]; // (the TA may clobber 4 bytes starting at TA_OL_LIMIT) uint32_t region_array[0x00002000 / 4]; // REGION_BASE - uint32_t background[0x00000020 / 4]; // ISP_BACKGND_T + uint32_t background[0x00000040 / 4]; // ISP_BACKGND_T uint32_t framebuffer[2][0x00096000 / 4]; // FB_R_SOF1 / FB_W_SOF1 }; diff --git a/main.cpp b/main.cpp index e257e38..43cd040 100644 --- a/main.cpp +++ b/main.cpp @@ -51,8 +51,14 @@ void serial_string(const char * s) } /* must be aligned to 32-bytes for DMA transfer */ -// the aligned(32) attribute does not actually align to 32 bytes. -volatile uint32_t __attribute__((aligned(32))) scene[(32 * 5) / 4]; +// the aligned(32) attribute does not actually align to 32 bytes; gcc is the best compiler. +// `+ 32` to allow for repositioning _scene to an actual 32-byte alignment. +uint32_t __attribute__((aligned(32))) _scene[((32 * 5) + 32) / 4]; + +uint32_t * align_32byte(uint32_t * mem) +{ + return reinterpret_cast(((reinterpret_cast(_scene) + 31) & ~31)); +} extern "C" void main() @@ -95,8 +101,10 @@ void main() core_init_texture_memory(); int frame = 0; + // the address of `scene` must be a multiple of 32 bytes // this is mandatory for ch2-dma to the ta fifo polygon converter + uint32_t * scene = align_32byte(_scene); if ((reinterpret_cast(scene) & 31) != 0) { serial_string("unaligned\n"); while(1); diff --git a/main.lds b/main.lds index b64b5c5..c744c77 100644 --- a/main.lds +++ b/main.lds @@ -1,8 +1,8 @@ OUTPUT_FORMAT("elf32-shl", "elf32-shl", "elf32-shl") MEMORY { - p1ram : ORIGIN = 0x8c010000, LENGTH = 16M - p2ram : ORIGIN = 0xac010000, LENGTH = 16M + p1ram : ORIGIN = 0x8c010000, LENGTH = 0xff0000 + p2ram : ORIGIN = 0xac010000, LENGTH = 0xff0000 } SECTIONS {