draw a textured triangle strip

This draws a nice macaw texture in a square-shaped triangle
strip. The square is then rotated around the y-axis.

I dealt with myriad bugs while experimenting with this, all of them
entirely my fault:

- macaw texture colors were incorrect because GIMP was exporting raw
  RGB data in gamma-corrected sRGB space, whereas the Dreamcast is in
  linear color space.

- macaw texture colors were incorrect because I truncated color values
  to the least significant rather than most significant bits.

- macaw rotation around the Y axis caused the macaw texture to
  distort, stretch and recurse in interesting and unexpected ways. This
  was caused by sending Z values in the wrong coordinate space (Z)
  contrast to what is expected by the Dreamcast (1/z). Reordering
  z-coordinate operations so that the reciprocal is computed last
  resolved this.

- macaw rotation around the Y axis caused the macaw texture to warp
  unexpectedly, but only on real hardware. This was caused by
  unnecessarily negating Z coordinate values.

Behavior for each of the Z-coordinate issues differed between Flycast
and real Dreamcast hardware.

I also did several tests related to SH4 cache behavior, particularly
related to the "copy-back" mode. I verified copy-back behavior on a
real dreamcast, and experimented with the operand cache write-back
instruction, "ocbwb".

In particular, when the `scene` buffer is access from cacheable
memory, e.g: the P1 area, and CCR__CB is enabled, DMA from physical
memory to the TA FIFO polygon converter will fail because the scene
data has not yet been written to physical memory yet. `ocbwb` can be
used to "write back" scene from the SH4 operand cache to physical
memory--only the latter is visible from the CH2-DMA perspective.
This commit is contained in:
Zack Buhman 2023-12-06 20:57:52 +08:00
parent bac7618df2
commit 9610c428bd
13 changed files with 205 additions and 42 deletions

View File

@ -17,7 +17,12 @@ void cache_init()
sh7091_oc_a[i][0] = 0; sh7091_oc_a[i][0] = 0;
} }
sh7091.CCN.CCR = CCR__ICI | CCR__ICE | CCR__OCI | CCR__OCE; sh7091.CCN.CCR = CCR__ICI // instruction cache invalidate
| CCR__ICE // instruction cache enable
| CCR__OCI // operand cache invalidate
| CCR__OCE // operand cache enable
// | CCR__CB // enable copy-back mode for the P1 area
;
sh7091.CCN.MMUCR = 0; sh7091.CCN.MMUCR = 0;

View File

@ -57,7 +57,8 @@ MAIN_OBJ = \
holly/ta_parameter.o \ holly/ta_parameter.o \
holly/ta_fifo_polygon_converter.o \ holly/ta_fifo_polygon_converter.o \
holly/core.o \ holly/core.o \
scene.o scene.o \
macaw.data.o
all: main.cdi all: main.cdi

View File

@ -76,9 +76,8 @@ void core_start_render(int fb)
int w_fb = (!(!fb)) * 0x00096000; int w_fb = (!(!fb)) * 0x00096000;
int r_fb = (!fb) * 0x00096000; int r_fb = (!fb) * 0x00096000;
holly.FB_W_SOF1 = (offsetof (struct texture_memory_alloc, framebuffer)) + w_fb; holly.FB_W_SOF1 = (offsetof (struct texture_memory_alloc, framebuffer)) + w_fb;
holly.FB_W_SOF2 = (offsetof (struct texture_memory_alloc, framebuffer)) + w_fb; //holly.FB_R_SOF1 = (offsetof (struct texture_memory_alloc, framebuffer)) + r_fb;
holly.FB_R_SOF1 = (offsetof (struct texture_memory_alloc, framebuffer)) + r_fb; holly.FB_R_SOF1 = (offsetof (struct texture_memory_alloc, framebuffer)) + w_fb;
holly.FB_R_SOF2 = (offsetof (struct texture_memory_alloc, framebuffer)) + r_fb;
holly.STARTRENDER = 1; holly.STARTRENDER = 1;
} }

View File

@ -65,8 +65,19 @@ namespace tsp_instruction_word {
constexpr uint32_t use_alpha = 1 << 20; constexpr uint32_t use_alpha = 1 << 20;
constexpr uint32_t ignore_tex_alpha = 1 << 19; constexpr uint32_t ignore_tex_alpha = 1 << 19;
// flip_uv namespace flip_uv {
// clamp_uv constexpr uint32_t none = 0 << 17;
constexpr uint32_t v = 1 << 17;
constexpr uint32_t u = 2 << 17;
constexpr uint32_t uv = 3 << 17;
}
namespace clamp_uv {
constexpr uint32_t none = 0 << 15;
constexpr uint32_t v = 1 << 15;
constexpr uint32_t u = 2 << 15;
constexpr uint32_t uv = 3 << 15;
}
namespace filter_mode { namespace filter_mode {
constexpr uint32_t point_sampled = 0b00 << 13; constexpr uint32_t point_sampled = 0b00 << 13;
@ -111,3 +122,26 @@ namespace tsp_instruction_word {
constexpr uint32_t _1024 = 7 << 0; constexpr uint32_t _1024 = 7 << 0;
} }
} }
namespace texture_control_word {
constexpr uint32_t mip_mapped = 1 << 31;
constexpr uint32_t vq_compressed = 1 << 30;
namespace pixel_format {
constexpr uint32_t _1555 = 0 << 27;
constexpr uint32_t _565 = 1 << 27;
constexpr uint32_t _4444 = 2 << 27;
constexpr uint32_t yuv422 = 3 << 27;
constexpr uint32_t bump_map = 4 << 27;
constexpr uint32_t _4bpp_palette = 5 << 27;
constexpr uint32_t _8bpp_palette = 6 << 27;
}
constexpr uint32_t scan_order = 1 << 26;
constexpr uint32_t stride_select = 1 << 25;
// in 8-byte units
constexpr uint32_t texture_address(uint32_t a) {
return a & 0x1fffff;
}
}

View File

@ -51,10 +51,22 @@ void ta_polygon_converter_transfer(volatile uint32_t * buf, uint32_t size)
DCDBSysArc990907E's claim, it does not appear to be useful to check TE. */ DCDBSysArc990907E's claim, it does not appear to be useful to check TE. */
//while ((sh7091.DMAC.CHCR2 & CHCR2__TE) == 0); /* 1 == all transfers are completed */ //while ((sh7091.DMAC.CHCR2 & CHCR2__TE) == 0); /* 1 == all transfers are completed */
/* start a new CH2-DMA transfer from "system memory" to "TA FIFO polygon converter" */ /* "Write back" the entire buffer to physical memory.
// this dummy read is required on real hardware.
This is required on real hardware if CCR__CB is enabled, and `buf` is in a
cacheable area (e.g: system memory access via 0x8c00_0000).*/
for (uint32_t i = 0; i < size / 32; i++) {
asm volatile ("ocbwb @%0"
: // output
: "r" (&buf[(i * 32) / 4]) // input
);
}
// this dummy read appears to be required on real hardware.
volatile uint32_t _dummy = sh7091.DMAC.CHCR2; volatile uint32_t _dummy = sh7091.DMAC.CHCR2;
(void)_dummy; (void)_dummy;
/* start a new CH2-DMA transfer from "system memory" to "TA FIFO polygon converter" */
sh7091.DMAC.CHCR2 = 0; /* disable DMA channel */ sh7091.DMAC.CHCR2 = 0; /* disable DMA channel */
sh7091.DMAC.SAR2 = reinterpret_cast<uint32_t>(&buf[0]); /* start address, must be aligned to a CHCHR__TS-sized (32-byte) boundary */ sh7091.DMAC.SAR2 = reinterpret_cast<uint32_t>(&buf[0]); /* start address, must be aligned to a CHCHR__TS-sized (32-byte) boundary */
sh7091.DMAC.DMATCR2 = DMATCR2__TRANSFER_COUNT(size / 32); /* transfer count, in CHCHR__TS-sized (32-byte) units */ sh7091.DMAC.DMATCR2 = DMATCR2__TRANSFER_COUNT(size / 32); /* transfer count, in CHCHR__TS-sized (32-byte) units */

View File

@ -18,6 +18,17 @@ struct vertex_polygon_type_0 {
uint32_t _res2; uint32_t _res2;
}; };
struct vertex_polygon_type_3 {
uint32_t parameter_control_word;
float x;
float y;
float z;
float u;
float v;
uint32_t base_color;
uint32_t offset_color;
};
static_assert((sizeof (vertex_polygon_type_0)) == 32); static_assert((sizeof (vertex_polygon_type_0)) == 32);
static_assert((offsetof (struct vertex_polygon_type_0, parameter_control_word)) == 0x00); static_assert((offsetof (struct vertex_polygon_type_0, parameter_control_word)) == 0x00);
static_assert((offsetof (struct vertex_polygon_type_0, x)) == 0x04); static_assert((offsetof (struct vertex_polygon_type_0, x)) == 0x04);
@ -155,7 +166,7 @@ void triangle(volatile uint32_t * buf)
parameter->parameter_control_word = para_control::para_type::polygon_or_modifier_volume parameter->parameter_control_word = para_control::para_type::polygon_or_modifier_volume
| para_control::list_type::opaque | para_control::list_type::opaque
| obj_control::col_type::packed_color; | obj_control::col_type::packed_color;
parameter->isp_tsp_instruction_word = isp_tsp_instruction_word::depth_compare_mode::always parameter->isp_tsp_instruction_word = isp_tsp_instruction_word::depth_compare_mode::always
| isp_tsp_instruction_word::culling_mode::no_culling; | isp_tsp_instruction_word::culling_mode::no_culling;
@ -170,6 +181,68 @@ void triangle(volatile uint32_t * buf)
parameter->next_address_for_sort_dma = 0; parameter->next_address_for_sort_dma = 0;
} }
void textured_vertex(volatile uint32_t * buf,
const float x,
const float y,
const float z,
const float u,
const float v,
const uint32_t base_color,
const uint32_t offset_color,
bool end_of_strip
)
{
volatile vertex_polygon_type_3 * parameter = reinterpret_cast<volatile vertex_polygon_type_3 *>(buf);
parameter->parameter_control_word = para_control::para_type::vertex_parameter;
if (end_of_strip)
parameter->parameter_control_word |= para_control::end_of_strip;
parameter->x = x;
parameter->y = y;
parameter->z = z;
parameter->u = u;
parameter->v = v;
parameter->base_color = base_color;
parameter->offset_color = offset_color;
}
void textured_triangle(volatile uint32_t * buf,
uint32_t texture_address)
{
volatile global_polygon_type_0 * parameter = reinterpret_cast<volatile global_polygon_type_0 *>(buf);
parameter->parameter_control_word = para_control::para_type::polygon_or_modifier_volume
| para_control::list_type::opaque
| obj_control::col_type::packed_color
| obj_control::texture;
parameter->isp_tsp_instruction_word = isp_tsp_instruction_word::depth_compare_mode::always
| isp_tsp_instruction_word::culling_mode::no_culling;
// <Note> Because a value of "0.0" is invalid for [MIP-Map] D [adjust], it must not be specified.
parameter->tsp_instruction_word = tsp_instruction_word::src_alpha_instr::one
| tsp_instruction_word::dst_alpha_instr::zero
| tsp_instruction_word::fog_control::no_fog
//| tsp_instruction_word::mip_map_d_adjust(0b0100) // 1.0 (2.2 fixed-point)
//| tsp_instruction_word::filter_mode::bilinear_filter
//| tsp_instruction_word::clamp_uv::uv
//| tsp_instruction_word::flip_uv::uv
| tsp_instruction_word::texture_u_size::_128 // 128px
| tsp_instruction_word::texture_v_size::_128; // 128px
if ((texture_address & 63) != 0) while (1);
parameter->texture_control_word = texture_control_word::pixel_format::_565
| texture_control_word::scan_order // non-twiddled
| texture_control_word::texture_address(texture_address / 8);
parameter->_res0 = 0;
parameter->_res1 = 0;
parameter->data_size_for_sort_dma = 0;
parameter->next_address_for_sort_dma = 0;
}
void end_of_list(volatile uint32_t * buf) void end_of_list(volatile uint32_t * buf)
{ {
volatile global_end_of_list * parameter = reinterpret_cast<volatile global_end_of_list *>(buf); volatile global_end_of_list * parameter = reinterpret_cast<volatile global_end_of_list *>(buf);

View File

@ -12,4 +12,19 @@ void vertex(volatile uint32_t * buf,
void triangle(volatile uint32_t * buf); void triangle(volatile uint32_t * buf);
void textured_vertex(volatile uint32_t * buf,
const float x,
const float y,
const float z,
const float u,
const float v,
const uint32_t base_color,
const uint32_t offset_color,
bool end_of_strip
);
void textured_triangle(volatile uint32_t * buf,
uint32_t texture_address
);
void end_of_list(volatile uint32_t * buf); void end_of_list(volatile uint32_t * buf);

View File

@ -9,4 +9,6 @@ struct texture_memory_alloc {
uint32_t region_array[0x00002000 / 4]; // REGION_BASE uint32_t region_array[0x00002000 / 4]; // REGION_BASE
uint32_t background[0x00000040 / 4]; // ISP_BACKGND_T uint32_t background[0x00000040 / 4]; // ISP_BACKGND_T
uint32_t framebuffer[2][0x00096000 / 4]; // FB_R_SOF1 / FB_W_SOF1 uint32_t framebuffer[2][0x00096000 / 4]; // FB_R_SOF1 / FB_W_SOF1
uint32_t _res1[ 0x20 / 4]; // (re-align texture to a 64-byte boundary)
uint16_t texture[128 * 128 * 2 / 2]; // texture_control_word::texture_address
}; };

BIN
macaw.data Normal file

Binary file not shown.

5
macaw.h Normal file
View File

@ -0,0 +1,5 @@
#include <cstdint>
extern uint32_t _binary_macaw_data_start __asm("_binary_macaw_data_start");
extern uint32_t _binary_macaw_data_end __asm("_binary_macaw_data_end");
extern uint32_t _binary_macaw_data_size __asm("_binary_macaw_data_size");

View File

@ -10,12 +10,16 @@
#include "holly/ta_fifo_polygon_converter.h" #include "holly/ta_fifo_polygon_converter.h"
#include "systembus.h" #include "systembus.h"
#include "holly/texture_memory_alloc.h"
#include "cache.h" #include "cache.h"
#include "load.h" #include "load.h"
#include "vga.h" #include "vga.h"
#include "rgb.h" #include "rgb.h"
#include "scene.h" #include "scene.h"
#include "macaw.h"
extern uint32_t __bss_link_start __asm("__bss_link_start"); extern uint32_t __bss_link_start __asm("__bss_link_start");
extern uint32_t __bss_link_end __asm("__bss_link_end"); extern uint32_t __bss_link_end __asm("__bss_link_end");
@ -87,12 +91,25 @@ void main()
} }
} }
volatile texture_memory_alloc * mem = reinterpret_cast<volatile texture_memory_alloc *>(0xa400'0000);
volatile uint8_t * macaw = reinterpret_cast<volatile uint8_t *>(&_binary_macaw_data_start);
uint32_t macaw_size = reinterpret_cast<uint32_t>(&_binary_macaw_data_size);
for (uint32_t px = 0; px < macaw_size / 3; px++) {
uint8_t r = macaw[px * 3 + 0];
uint8_t g = macaw[px * 3 + 1];
uint8_t b = macaw[px * 3 + 2];
uint16_t rgb565 = ((r / 8) << 11) | ((g / 4) << 5) | ((b / 8) << 0);
mem->texture[px] = rgb565;
}
holly.SOFTRESET = softreset::pipeline_soft_reset holly.SOFTRESET = softreset::pipeline_soft_reset
| softreset::ta_soft_reset; | softreset::ta_soft_reset;
holly.SOFTRESET = 0; holly.SOFTRESET = 0;
system.LMMODE0 = 1; //system.LMMODE0 = 1; // texture memory through TA FIFO
system.LMMODE1 = 1; //system.LMMODE1 = 1; // texture memory through TA FIFO (mirror)
v_sync_out(); v_sync_out();
v_sync_in(); v_sync_in();
@ -100,8 +117,6 @@ void main()
core_init(); core_init();
core_init_texture_memory(); core_init_texture_memory();
int frame = 0;
// the address of `scene` must be a multiple of 32 bytes // the address of `scene` must be a multiple of 32 bytes
// this is mandatory for ch2-dma to the ta fifo polygon converter // this is mandatory for ch2-dma to the ta fifo polygon converter
uint32_t * scene = align_32byte(_scene); uint32_t * scene = align_32byte(_scene);
@ -110,6 +125,9 @@ void main()
while(1); while(1);
} }
int frame = 0;
int ix = 0;
while (true) { while (true) {
v_sync_out(); v_sync_out();
v_sync_in(); v_sync_in();

View File

@ -1,9 +1,12 @@
#include <cstdint> #include <cstdint>
#include <cstddef>
#include "holly/ta_parameter.h" #include "holly/ta_parameter.h"
#include "holly/texture_memory_alloc.h"
/* /*
0,-.5 -0.5,-0.5 0.5,-0.5
| |
--- ---
-0.5,0.5 | 0.5,0.5 -0.5,0.5 | 0.5,0.5
@ -13,24 +16,28 @@ struct triangle {
float x; float x;
float y; float y;
float z; float z;
float u;
float v;
uint32_t color; uint32_t color;
}; };
const struct triangle scene_triangle[4] = { const struct triangle scene_triangle[4] = {
{ -0.5f, 0.5f, 1/10.f, 0x00000000}, // the first two base colors in a { -0.5f, 0.5f, 0.f, 0.f , 128.f/128.f, 0x00000000}, // the first two base colors in a
{ -0.5f, -0.5f, 1/10.f, 0x00000000}, // triangle strip are ignored { -0.5f, -0.5f, 0.f, 0.f , 0.f , 0x00000000}, // triangle strip are ignored
{ 0.5f, 0.5f, 1/10.f, 0xffff00ff}, { 0.5f, 0.5f, 0.f, 128.f/128.f, 128.f/128.f, 0xffff00ff},
{ 0.5f, -0.5f, 1/10.f, 0xffffff00}, { 0.5f, -0.5f, 0.f, 128.f/128.f, 0.f , 0xffffff00},
}; };
static float theta = 0; static float theta = 0;
constexpr float one_degree = 0.01745329f; constexpr float one_degree = 0.01745329f / 2.f;
uint32_t scene_transform(volatile uint32_t * scene) uint32_t scene_transform(volatile uint32_t * scene)
{ {
uint32_t ix = 0; uint32_t ix = 0;
triangle(&scene[(32 * ix) / 4]); uint32_t address = (offsetof (struct texture_memory_alloc, texture));
textured_triangle(&scene[(32 * ix) / 4],
address);
ix++; ix++;
for (int i = 0; i < 4; i++) { for (int i = 0; i < 4; i++) {
@ -38,22 +45,26 @@ uint32_t scene_transform(volatile uint32_t * scene)
float x = scene_triangle[i].x; float x = scene_triangle[i].x;
float y = scene_triangle[i].y; float y = scene_triangle[i].y;
float z = scene_triangle[i].z;
float x1; float x1;
x1 = x * __builtin_cosf(theta) - y * __builtin_sinf(theta); x1 = x * __builtin_cosf(theta) - z * __builtin_sinf(theta);
y = x * __builtin_sinf(theta) + y * __builtin_cosf(theta); z = x * __builtin_sinf(theta) + z * __builtin_cosf(theta);
x = x1; x = x1;
x *= 240.f; x *= 240.f;
y *= 240.f; y *= 240.f;
x += 320.f; x += 320.f;
y += 240.f; y += 240.f;
vertex(&scene[(32 * ix) / 4], textured_vertex(&scene[(32 * ix) / 4],
x, // x x, // x
y, // y y, // y
scene_triangle[i].z, // z 1.f / (z + 10.f), // z
scene_triangle[i].color, // base_color scene_triangle[i].u, // u
end_of_strip); scene_triangle[i].v, // v
scene_triangle[i].color, // base_color
0, // offset_color
end_of_strip);
ix++; ix++;
} }

View File

@ -1,12 +0,0 @@
#pragma once
#include <cstdint>
struct __attribute__((__packed__)) texture_memory_alloc {
uint32_t isp_tsp_parameters[0x00100000 / 4]; // TA_ISP_BASE / PARAM_BASE (the actual objects)
uint32_t object_list[0x00100000 / 4]; // TA_OL_BASE (contains object pointer blocks)
uint32_t _res0[ 0x20 / 4]; // (the TA may clobber 4 bytes starting at TA_OL_LIMIT)
uint32_t region_array[0x00002000 / 4]; // REGION_BASE
uint32_t background[0x00000020 / 4]; // ISP_BACKGND_T
uint32_t framebuffer[2][0x00096000 / 4]; // FB_R_SOF1 / FB_W_SOF1
};