diff --git a/example/example.mk b/example/example.mk index 5f92511..62fc5b8 100644 --- a/example/example.mk +++ b/example/example.mk @@ -21,6 +21,18 @@ MACAW_OBJ = \ example/macaw.elf: LDSCRIPT = $(LIB)/alt.lds example/macaw.elf: $(START_OBJ) $(MACAW_OBJ) +MACAW_TWIDDLE_OBJ = \ + example/macaw_twiddle.o \ + vga.o \ + holly/core.o \ + holly/region_array.o \ + holly/background.o \ + holly/ta_fifo_polygon_converter.o \ + macaw.data.o + +example/macaw_twiddle.elf: LDSCRIPT = $(LIB)/alt.lds +example/macaw_twiddle.elf: $(START_OBJ) $(MACAW_TWIDDLE_OBJ) + MACAW_MULTIPASS_OBJ = \ example/macaw_multipass.o \ vga.o \ diff --git a/example/macaw.cpp b/example/macaw.cpp index fceac0b..71a957a 100644 --- a/example/macaw.cpp +++ b/example/macaw.cpp @@ -113,7 +113,7 @@ void main() constexpr uint32_t ta_alloc = ta_alloc_ctrl::pt_opb::no_list | ta_alloc_ctrl::tm_opb::no_list - | ta_alloc_ctrl::t_opb::_16x4byte + | ta_alloc_ctrl::t_opb::no_list | ta_alloc_ctrl::om_opb::no_list | ta_alloc_ctrl::o_opb::_16x4byte; diff --git a/example/macaw_twiddle.cpp b/example/macaw_twiddle.cpp new file mode 100644 index 0000000..c5d46cb --- /dev/null +++ b/example/macaw_twiddle.cpp @@ -0,0 +1,162 @@ +#include + +#include "align.hpp" +#include "vga.hpp" + +#include "holly/texture_memory_alloc.hpp" +#include "holly.hpp" +#include "holly/core.hpp" +#include "holly/core_bits.hpp" +#include "holly/ta_fifo_polygon_converter.hpp" +#include "holly/ta_parameter.hpp" +#include "holly/ta_bits.hpp" +#include "holly/region_array.hpp" +#include "holly/background.hpp" +#include "memorymap.hpp" +#include "twiddle.hpp" + +#include "macaw.hpp" + +struct vertex { + float x; + float y; + float z; + float u; + float v; + uint32_t color; +}; + +const struct vertex strip_vertices[4] = { + // [ position ] [ uv coordinates ] [color ] + { -0.5f, 0.5f, 0.f, 0.f , 127.f/128.f, 0x00000000}, // the first two base colors in a + { -0.5f, -0.5f, 0.f, 0.f , 0.f , 0x00000000}, // non-Gouraud triangle strip are ignored + { 0.5f, 0.5f, 0.f, 127.f/128.f, 127.f/128.f, 0x00000000}, + { 0.5f, -0.5f, 0.f, 127.f/128.f, 0.f , 0x00000000}, +}; +constexpr uint32_t strip_length = (sizeof (strip_vertices)) / (sizeof (struct vertex)); + +static float theta = 0; +constexpr float half_degree = 0.01745329f / 2.f; + +uint32_t transform(uint32_t * ta_parameter_buf, + const vertex * strip_vertices, + const uint32_t strip_length) +{ + auto parameter = ta_parameter_writer(ta_parameter_buf); + uint32_t texture_address = (offsetof (struct texture_memory_alloc, texture)); + auto polygon = global_polygon_type_0(texture_address); + polygon.texture_control_word = texture_control_word::pixel_format::_565 + | texture_control_word::scan_order::twiddled + | texture_control_word::texture_address(texture_address / 8); + parameter.append() = polygon; + + for (uint32_t i = 0; i < strip_length; i++) { + bool end_of_strip = i == strip_length - 1; + + float x = strip_vertices[i].x; + float y = strip_vertices[i].y; + float z = strip_vertices[i].z; + float x1; + + x1 = x * __builtin_cosf(theta) - z * __builtin_sinf(theta); + z = x * __builtin_sinf(theta) + z * __builtin_cosf(theta); + x = x1; + x *= 240.f; + y *= 240.f; + x += 320.f; + y += 240.f; + z = 1.f / (z + 10.f); + + parameter.append() = + vertex_polygon_type_3(x, y, z, + strip_vertices[i].u, + strip_vertices[i].v, + strip_vertices[i].color, + end_of_strip); + } + + parameter.append() = global_end_of_list(); + + return parameter.offset; +} + +void init_texture_memory(const struct opb_size& opb_size) +{ + volatile texture_memory_alloc * mem = reinterpret_cast(texture_memory); + + background_parameter(mem->background); + + region_array2(mem->region_array, + (offsetof (struct texture_memory_alloc, object_list)), + 640 / 32, // width + 480 / 32, // height + opb_size + ); +} + +uint32_t _ta_parameter_buf[((32 * (strip_length + 2)) + 32) / 4]; + +void main() +{ + vga(); + + auto src = reinterpret_cast(&_binary_macaw_data_start); + auto size = reinterpret_cast(&_binary_macaw_data_size); + auto mem = reinterpret_cast(0xa400'0000); + + uint16_t temp[size / 3]; + for (uint32_t px = 0; px < size / 3; px++) { + uint8_t r = src[px * 3 + 0]; + uint8_t g = src[px * 3 + 1]; + uint8_t b = src[px * 3 + 2]; + + uint16_t rgb565 = ((r / 8) << 11) | ((g / 4) << 5) | ((b / 8) << 0); + temp[px] = rgb565; + } + twiddle::texture(mem->texture, temp, 128, 128); + + // The address of `ta_parameter_buf` must be a multiple of 32 bytes. + // This is mandatory for ch2-dma to the ta fifo polygon converter. + uint32_t * ta_parameter_buf = align_32byte(_ta_parameter_buf); + + constexpr uint32_t ta_alloc = ta_alloc_ctrl::pt_opb::no_list + | ta_alloc_ctrl::tm_opb::no_list + | ta_alloc_ctrl::t_opb::no_list + | ta_alloc_ctrl::om_opb::no_list + | ta_alloc_ctrl::o_opb::_16x4byte; + + constexpr struct opb_size opb_size = { .opaque = 16 * 4 + , .opaque_modifier = 0 + , .translucent = 0 + , .translucent_modifier = 0 + , .punch_through = 0 + }; + + constexpr uint32_t tiles = (640 / 32) * (320 / 32); + + holly.SOFTRESET = softreset::pipeline_soft_reset + | softreset::ta_soft_reset; + holly.SOFTRESET = 0; + + core_init(); + init_texture_memory(opb_size); + + uint32_t frame_ix = 0; + constexpr uint32_t num_frames = 1; + + while (true) { + ta_polygon_converter_init(opb_size.total() * tiles, ta_alloc); + uint32_t ta_parameter_size = transform(ta_parameter_buf, strip_vertices, strip_length); + ta_polygon_converter_transfer(ta_parameter_buf, ta_parameter_size); + ta_wait_opaque_list(); + + core_start_render(frame_ix, num_frames); + + v_sync_out(); + v_sync_in(); + core_wait_end_of_render_video(frame_ix, num_frames); + + theta += half_degree; + frame_ix += 1; + } +} diff --git a/holly.hpp b/holly.hpp index 6458a44..24952c6 100644 --- a/holly.hpp +++ b/holly.hpp @@ -4,92 +4,92 @@ #include "type.hpp" struct holly_reg { - reg32 ID; /* Device ID */ - reg32 REVISION; /* Revision Number */ - reg32 SOFTRESET; /* CORE & TA software reset */ + reg32 ID; /* Device ID */ + reg32 REVISION; /* Revision Number */ + reg32 SOFTRESET; /* CORE & TA software reset */ reg8 _pad0[8]; - reg32 STARTRENDER; /* Drawing start */ - reg32 TEST_SELECT; /* Test (writing this register is prohibited) */ + reg32 STARTRENDER; /* Drawing start */ + reg32 TEST_SELECT; /* Test (writing this register is prohibited) */ reg8 _pad1[4]; - reg32 PARAM_BASE; /* Base address for ISP parameters */ + reg32 PARAM_BASE; /* Base address for ISP parameters */ reg8 _pad2[8]; - reg32 REGION_BASE; /* Base address for Region Array */ - reg32 SPAN_SORT_CFG; /* Span Sorter control */ + reg32 REGION_BASE; /* Base address for Region Array */ + reg32 SPAN_SORT_CFG; /* Span Sorter control */ reg8 _pad3[12]; - reg32 VO_BORDER_COL; /* Border area color */ - reg32 FB_R_CTRL; /* Frame buffer read control */ - reg32 FB_W_CTRL; /* Frame buffer write control */ - reg32 FB_W_LINESTRIDE; /* Frame buffer line stride */ - reg32 FB_R_SOF1; /* Read start address for field - 1/strip - 1 */ - reg32 FB_R_SOF2; /* Read start address for field - 2/strip - 2 */ + reg32 VO_BORDER_COL; /* Border area color */ + reg32 FB_R_CTRL; /* Frame buffer read control */ + reg32 FB_W_CTRL; /* Frame buffer write control */ + reg32 FB_W_LINESTRIDE; /* Frame buffer line stride */ + reg32 FB_R_SOF1; /* Read start address for field - 1/strip - 1 */ + reg32 FB_R_SOF2; /* Read start address for field - 2/strip - 2 */ reg8 _pad4[4]; - reg32 FB_R_SIZE; /* Frame buffer XY size */ - reg32 FB_W_SOF1; /* Write start address for field - 1/strip - 1 */ - reg32 FB_W_SOF2; /* Write start address for field - 2/strip - 2 */ - reg32 FB_X_CLIP; /* Pixel clip X coordinate */ - reg32 FB_Y_CLIP; /* Pixel clip Y coordinate */ + reg32 FB_R_SIZE; /* Frame buffer XY size */ + reg32 FB_W_SOF1; /* Write start address for field - 1/strip - 1 */ + reg32 FB_W_SOF2; /* Write start address for field - 2/strip - 2 */ + reg32 FB_X_CLIP; /* Pixel clip X coordinate */ + reg32 FB_Y_CLIP; /* Pixel clip Y coordinate */ reg8 _pad5[4]; - reg32 FPU_SHAD_SCALE; /* Intensity Volume mode */ - reg32 FPU_CULL_VAL; /* Comparison value for culling */ - reg32 FPU_PARAM_CFG; /* Parameter read control */ - reg32 HALF_OFFSET; /* Pixel sampling control */ - reg32 FPU_PERP_VAL; /* Comparison value for perpendicular polygons */ - reg32 ISP_BACKGND_D; /* Background surface depth */ - reg32 ISP_BACKGND_T; /* Background surface tag */ + reg32 FPU_SHAD_SCALE; /* Intensity Volume mode */ + reg32 FPU_CULL_VAL; /* Comparison value for culling */ + reg32 FPU_PARAM_CFG; /* Parameter read control */ + reg32 HALF_OFFSET; /* Pixel sampling control */ + reg32 FPU_PERP_VAL; /* Comparison value for perpendicular polygons */ + reg32 ISP_BACKGND_D; /* Background surface depth */ + reg32 ISP_BACKGND_T; /* Background surface tag */ reg8 _pad6[8]; - reg32 ISP_FEED_CFG; /* Translucent polygon sort mode */ + reg32 ISP_FEED_CFG; /* Translucent polygon sort mode */ reg8 _pad7[4]; - reg32 SDRAM_REFRESH; /* Texture memory refresh counter */ - reg32 SDRAM_ARB_CFG; /* Texture memory arbiter control */ - reg32 SDRAM_CFG; /* Texture memory control */ + reg32 SDRAM_REFRESH; /* Texture memory refresh counter */ + reg32 SDRAM_ARB_CFG; /* Texture memory arbiter control */ + reg32 SDRAM_CFG; /* Texture memory control */ reg8 _pad8[4]; - reg32 FOG_COL_RAM; /* Color for Look Up table Fog */ - reg32 FOG_COL_VERT; /* Color for vertex Fog */ - reg32 FOG_DENSITY; /* Fog scale value */ - reg32 FOG_CLAMP_MAX; /* Color clamping maximum value */ - reg32 FOG_CLAMP_MIN; /* Color clamping minimum value */ - reg32 SPG_TRIGGER_POS; /* External trigger signal HV counter value */ - reg32 SPG_HBLANK_INT; /* H-blank interrupt control */ - reg32 SPG_VBLANK_INT; /* V-blank interrupt control */ - reg32 SPG_CONTROL; /* Sync pulse generator control */ - reg32 SPG_HBLANK; /* H-blank control */ - reg32 SPG_LOAD; /* HV counter load value */ - reg32 SPG_VBLANK; /* V-blank control */ - reg32 SPG_WIDTH; /* Sync width control */ - reg32 TEXT_CONTROL; /* Texturing control */ - reg32 VO_CONTROL; /* Video output control */ - reg32 VO_STARTX; /* Video output start X position */ - reg32 VO_STARTY; /* Video output start Y position */ - reg32 SCALER_CTL; /* X & Y scaler control */ + reg32 FOG_COL_RAM; /* Color for Look Up table Fog */ + reg32 FOG_COL_VERT; /* Color for vertex Fog */ + reg32 FOG_DENSITY; /* Fog scale value */ + reg32 FOG_CLAMP_MAX; /* Color clamping maximum value */ + reg32 FOG_CLAMP_MIN; /* Color clamping minimum value */ + reg32 SPG_TRIGGER_POS; /* External trigger signal HV counter value */ + reg32 SPG_HBLANK_INT; /* H-blank interrupt control */ + reg32 SPG_VBLANK_INT; /* V-blank interrupt control */ + reg32 SPG_CONTROL; /* Sync pulse generator control */ + reg32 SPG_HBLANK; /* H-blank control */ + reg32 SPG_LOAD; /* HV counter load value */ + reg32 SPG_VBLANK; /* V-blank control */ + reg32 SPG_WIDTH; /* Sync width control */ + reg32 TEXT_CONTROL; /* Texturing control */ + reg32 VO_CONTROL; /* Video output control */ + reg32 VO_STARTX; /* Video output start X position */ + reg32 VO_STARTY; /* Video output start Y position */ + reg32 SCALER_CTL; /* X & Y scaler control */ reg8 _pad9[16]; - reg32 PAL_RAM_CTRL; /* Palette RAM control */ - reg32 SPG_STATUS; /* Sync pulse generator status */ - reg32 FB_BURSTCTRL; /* Frame buffer burst control */ - reg32 FB_C_SOF; /* Current frame buffer start address */ - reg32 Y_COEFF; /* Y scaling coefficent */ - reg32 PT_ALPHA_REF; /* Alpha value for Punch Through polygon comparison */ + reg32 PAL_RAM_CTRL; /* Palette RAM control */ + reg32 SPG_STATUS; /* Sync pulse generator status */ + reg32 FB_BURSTCTRL; /* Frame buffer burst control */ + reg32 FB_C_SOF; /* Current frame buffer start address */ + reg32 Y_COEFF; /* Y scaling coefficent */ + reg32 PT_ALPHA_REF; /* Alpha value for Punch Through polygon comparison */ reg8 _pad10[4]; - reg32 TA_OL_BASE; /* Object List write start address */ - reg32 TA_ISP_BASE; /* ISP/TSP Parameter write start address */ - reg32 TA_OL_LIMIT; /* Object List write limit address */ - reg32 TA_ISP_LIMIT; /* ISP/TSP Parameter limit address */ - reg32 TA_NEXT_OPB; /* Start address for the Object Pointer Block */ - reg32 TA_ITP_CURRENT; /* Starting address where the next ISP/TSP Parameters are stored */ - reg32 TA_GLOB_TILE_CLIP; /* Global Tile Clip control */ - reg32 TA_ALLOC_CTRL; /* Object list control */ - reg32 TA_LIST_INIT; /* TA initialization */ - reg32 TA_YUV_TEX_BASE; /* YUV422 texture write start address */ - reg32 TA_YUV_TEX_CTRL; /* YUV converter control */ - reg32 TA_YUV_TEX_CNT; /* YUV converter macro block counter value */ + reg32 TA_OL_BASE; /* Object List write start address */ + reg32 TA_ISP_BASE; /* ISP/TSP Parameter write start address */ + reg32 TA_OL_LIMIT; /* Object List write limit address */ + reg32 TA_ISP_LIMIT; /* ISP/TSP Parameter limit address */ + reg32 TA_NEXT_OPB; /* Start address for the Object Pointer Block */ + reg32 TA_ITP_CURRENT; /* Starting address where the next ISP/TSP Parameters are stored */ + reg32 TA_GLOB_TILE_CLIP; /* Global Tile Clip control */ + reg32 TA_ALLOC_CTRL; /* Object list control */ + reg32 TA_LIST_INIT; /* TA initialization */ + reg32 TA_YUV_TEX_BASE; /* YUV422 texture write start address */ + reg32 TA_YUV_TEX_CTRL; /* YUV converter control */ + reg32 TA_YUV_TEX_CNT; /* YUV converter macro block counter value */ reg8 _pad11[12]; - reg32 TA_LIST_CONT; /* TA continuation processing */ - reg32 TA_NEXT_OPB_INIT; /* Additional OPB starting address */ + reg32 TA_LIST_CONT; /* TA continuation processing */ + reg32 TA_NEXT_OPB_INIT; /* Additional OPB starting address */ reg8 _pad12[152]; - reg8 FOG_TABLE[512]; /* Look-up table fog data */ + reg32 FOG_TABLE[128]; /* Look-up table fog data */ reg8 _pad13[512]; - reg8 TA_OL_POINTERS[2400];/* TA Object List Pointer data */ + reg32 TA_OL_POINTERS[600]; /* TA Object List Pointer data */ reg8 _pad14[160]; - reg8 PALETTE_RAM[4096]; /* Palette RAM */ + reg32 PALETTE_RAM[1024]; /* Palette RAM */ }; static_assert((offsetof (struct holly_reg, ID)) == 0x0); diff --git a/holly/isp_tsp.hpp b/holly/isp_tsp.hpp index dc6339d..4a468f3 100644 --- a/holly/isp_tsp.hpp +++ b/holly/isp_tsp.hpp @@ -137,7 +137,10 @@ namespace texture_control_word { constexpr uint32_t _8bpp_palette = 6 << 27; } - constexpr uint32_t scan_order = 1 << 26; + namespace scan_order { + constexpr uint32_t twiddled = 0 << 26; + constexpr uint32_t non_twiddled = 1 << 26; + } constexpr uint32_t stride_select = 1 << 25; // in 8-byte units diff --git a/holly/ta_parameter.hpp b/holly/ta_parameter.hpp index a30b255..b0d2daf 100644 --- a/holly/ta_parameter.hpp +++ b/holly/ta_parameter.hpp @@ -186,7 +186,7 @@ struct global_polygon_type_0 { | tsp_instruction_word::texture_v_size::_128 ) // 128px , texture_control_word( texture_control_word::pixel_format::_565 - | texture_control_word::scan_order // non-twiddled + | texture_control_word::scan_order::non_twiddled | texture_control_word::texture_address(texture_address / 8) ) , _res0(0) diff --git a/regs/gen/sh7091.py b/regs/gen/sh7091.py index 63f88a6..a3cf7c0 100644 --- a/regs/gen/sh7091.py +++ b/regs/gen/sh7091.py @@ -95,10 +95,10 @@ def new_writer(): type = size_to_type(size) return f"{type} {name};" else: - type = size_to_type(1) - return f"{type} {name}[{size}];" + type = size_to_type(4) + return f"{type} {name}[{size // 4}];" - yield field().ljust(25) + f"/* {description} */" + yield field().ljust(27) + f"/* {description} */" stack.append((address, name)) last_address = address + size diff --git a/regs/maple_bus_bits.ods b/regs/maple_bus_bits.ods index 3c5318c..7c93f53 100644 Binary files a/regs/maple_bus_bits.ods and b/regs/maple_bus_bits.ods differ diff --git a/twiddle.hpp b/twiddle.hpp new file mode 100644 index 0000000..0c0075d --- /dev/null +++ b/twiddle.hpp @@ -0,0 +1,72 @@ +#include + +namespace twiddle { + +/* +This reproduces the twiddle index table shown in +"3.6.2.1 Twiddled Format". + + x → + 000 001 010 011 + -------------------------------- + | xyxyxy xyxyxy xyxyxy xyxyxy + |=============================== + y 000 | 000000 000010 001000 001010 + ↓ 001 | 000001 000011 001001 001011 + 010 | 000100 000110 001100 001110 + 011 | 000101 000111 001101 001111 + +alternately, in verilog syntax: + + + input [2:0] x; // x coordinate + input [2:0] y; // y coordinate + output [5:0] t; // twiddled index + assign t = {x[2], y[2], x[1], y[1], x[0], y[0]}; +*/ + +constexpr inline uint32_t from_xy(uint32_t x, uint32_t y) +{ + // maximum texture size : 1024x1024 + // maximum 1-dimensional index: 0xfffff + // bits : 19-0 + + uint32_t twiddle_ix = 0; + for (int i = 0; i <= (19 / 2); i++) { + twiddle_ix |= ((y >> i) & 1) << (i * 2 + 0); + twiddle_ix |= ((x >> i) & 1) << (i * 2 + 1); + } + + return twiddle_ix; +} + +static_assert(from_xy(0b000, 0b000) == 0); +static_assert(from_xy(0b001, 0b000) == 2); +static_assert(from_xy(0b010, 0b000) == 8); +static_assert(from_xy(0b011, 0b000) == 10); +static_assert(from_xy(0b100, 0b000) == 32); +static_assert(from_xy(0b101, 0b000) == 34); +static_assert(from_xy(0b110, 0b000) == 40); +static_assert(from_xy(0b111, 0b000) == 42); + +static_assert(from_xy(0b000, 0b001) == 1); +static_assert(from_xy(0b000, 0b010) == 4); +static_assert(from_xy(0b000, 0b011) == 5); +static_assert(from_xy(0b000, 0b100) == 16); +static_assert(from_xy(0b000, 0b101) == 17); +static_assert(from_xy(0b000, 0b110) == 20); +static_assert(from_xy(0b000, 0b111) == 21); + +template +void texture(T * dst, const T * src, const uint32_t width, const uint32_t height) +{ + for (uint32_t y = 0; y < height; y++) { + for (uint32_t x = 0; x < width; x++) { + uint32_t twiddle_ix = from_xy(x, y); + T value = src[y * width + x]; + dst[twiddle_ix] = value; + } + } +} + +}