wiffle_screen_space: use DMA for transfers to/from texture memory
This commit is contained in:
parent
65d768bdaf
commit
05e0a36ca7
@ -745,3 +745,10 @@ TEXTURE_MEMORY_OBJ = \
|
||||
|
||||
example/texture_memory.elf: LDSCRIPT = $(LIB)/main.lds
|
||||
example/texture_memory.elf: $(START_OBJ) $(TEXTURE_MEMORY_OBJ)
|
||||
|
||||
HOLLY_RECV_DMA_OBJ = \
|
||||
example/holly_recv_dma.o \
|
||||
sh7091/serial.o
|
||||
|
||||
example/holly_recv_dma.elf: LDSCRIPT = $(LIB)/main.lds
|
||||
example/holly_recv_dma.elf: $(START_OBJ) $(HOLLY_RECV_DMA_OBJ)
|
||||
|
80
example/holly_recv_dma.cpp
Normal file
80
example/holly_recv_dma.cpp
Normal file
@ -0,0 +1,80 @@
|
||||
#include "sh7091/sh7091.hpp"
|
||||
#include "sh7091/sh7091_bits.hpp"
|
||||
#include "sh7091/serial.hpp"
|
||||
#include "memorymap.hpp"
|
||||
|
||||
static void dma(uint32_t source, uint32_t destination, uint32_t length)
|
||||
{
|
||||
using namespace dmac;
|
||||
|
||||
sh7091.DMAC.CHCR1 = 0;
|
||||
|
||||
sh7091.DMAC.SAR1 = source;
|
||||
sh7091.DMAC.DAR1 = destination;
|
||||
sh7091.DMAC.DMATCR1 = length & 0x00ff'ffff;
|
||||
|
||||
sh7091.DMAC.CHCR1 = chcr::dm::destination_address_incremented
|
||||
| chcr::sm::source_address_incremented
|
||||
| chcr::rs::resource_select(0b0100) /* external address space → external address space */
|
||||
| chcr::tm::cycle_burst_mode /* transmit mode */
|
||||
//| chcr::tm::cycle_steal_mode /* transmit mode */
|
||||
| chcr::ts::_32_byte /* transfer size */
|
||||
//| chcr::ie::interrupt_request_generated
|
||||
| chcr::de::channel_operation_enabled;
|
||||
}
|
||||
|
||||
static uint32_t buf[256] __attribute__((aligned(32)));
|
||||
|
||||
void main()
|
||||
{
|
||||
for (int i = 0; i < 256; i++) {
|
||||
buf[i] = 0;
|
||||
texture_memory32[i] = (1 << 31) | i;
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < (sizeof (buf)) / 32; i++) {
|
||||
uint32_t address = (uint32_t)&buf[0];
|
||||
asm volatile ("ocbp @%0"
|
||||
: // output
|
||||
: "r" (address + (i * 32)) // input
|
||||
);
|
||||
}
|
||||
|
||||
sh7091.DMAC.DMAOR = 0;
|
||||
|
||||
serial::integer<uint32_t>(sh7091.DMAC.DMAOR);
|
||||
|
||||
serial::integer<uint32_t>((uint32_t)&buf[0]);
|
||||
|
||||
dma((uint32_t)&texture_memory32[0], (uint32_t)&buf[0], (sizeof (buf)));
|
||||
|
||||
uint32_t last_dar = sh7091.DMAC.DAR1;
|
||||
uint32_t count = 0;
|
||||
while ((sh7091.DMAC.CHCR1 & dmac::chcr::te::transfers_completed) == 0) {
|
||||
uint32_t dar = sh7091.DMAC.DAR1;
|
||||
if (dar == last_dar)
|
||||
count += 1;
|
||||
else
|
||||
count = 0;
|
||||
if (count > 100)
|
||||
return;
|
||||
};
|
||||
serial::integer<uint32_t>(sh7091.DMAC.DMAOR);
|
||||
|
||||
for (uint32_t i = 0; i < (sizeof (buf)) / 32; i++) {
|
||||
uint32_t address = (uint32_t)&buf[i * 32];
|
||||
asm volatile ("ocbi @%0"
|
||||
: // output
|
||||
: "r" (address) // input
|
||||
);
|
||||
}
|
||||
|
||||
serial::string("buf:\n");
|
||||
for (int i = 0; i < 256; i++) {
|
||||
serial::integer<uint32_t>(buf[i]);
|
||||
}
|
||||
serial::string("return\n");
|
||||
serial::string("return\n");
|
||||
serial::string("return\n");
|
||||
serial::string("return\n");
|
||||
}
|
@ -1,24 +1,29 @@
|
||||
#include <cstdint>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "align.hpp"
|
||||
#include "holly/video_output.hpp"
|
||||
|
||||
#include "holly/holly.hpp"
|
||||
#include "holly/background.hpp"
|
||||
#include "holly/core.hpp"
|
||||
#include "holly/core_bits.hpp"
|
||||
#include "holly/ta_fifo_polygon_converter.hpp"
|
||||
#include "holly/ta_parameter.hpp"
|
||||
#include "holly/ta_global_parameter.hpp"
|
||||
#include "holly/ta_vertex_parameter.hpp"
|
||||
#include "holly/holly.hpp"
|
||||
#include "holly/isp_tsp.hpp"
|
||||
#include "holly/ta_bits.hpp"
|
||||
#include "holly/region_array.hpp"
|
||||
#include "holly/background.hpp"
|
||||
#include "holly/ta_bits.hpp"
|
||||
#include "holly/ta_fifo_polygon_converter.hpp"
|
||||
#include "holly/ta_global_parameter.hpp"
|
||||
#include "holly/ta_parameter.hpp"
|
||||
#include "holly/ta_vertex_parameter.hpp"
|
||||
#include "holly/texture_memory_alloc3.hpp"
|
||||
#include "holly/video_output.hpp"
|
||||
|
||||
#include "sh7091/sh7091.hpp"
|
||||
#include "sh7091/sh7091_bits.hpp"
|
||||
#include "sh7091/store_queue.hpp"
|
||||
#include "sh7091/serial.hpp"
|
||||
|
||||
#include "memorymap.hpp"
|
||||
#include "systembus.hpp"
|
||||
#include "systembus_bits.hpp"
|
||||
|
||||
#include "geometry/wiffle.hpp"
|
||||
#include "math/vec4.hpp"
|
||||
|
||||
void convolve(uint32_t * in, uint32_t * out);
|
||||
|
||||
@ -46,8 +51,7 @@ vec3 rotate(const vec3& vertex, float theta)
|
||||
return vec3(x, y, z);
|
||||
}
|
||||
|
||||
void transform(ta_parameter_writer& parameter,
|
||||
const uint32_t face_ix,
|
||||
void transform(const uint32_t face_ix,
|
||||
const float theta,
|
||||
const vec3 lights[3])
|
||||
{
|
||||
@ -65,7 +69,7 @@ void transform(ta_parameter_writer& parameter,
|
||||
| tsp_instruction_word::fog_control::no_fog
|
||||
| tsp_instruction_word::use_alpha;
|
||||
|
||||
parameter.append<ta_global_parameter::polygon_type_0>() =
|
||||
*reinterpret_cast<ta_global_parameter::polygon_type_0 *>(store_queue) =
|
||||
ta_global_parameter::polygon_type_0(parameter_control_word,
|
||||
isp_tsp_instruction_word,
|
||||
tsp_instruction_word,
|
||||
@ -73,6 +77,7 @@ void transform(ta_parameter_writer& parameter,
|
||||
0, // data_size_for_sort_dma
|
||||
0 // next_address_for_sort_dma
|
||||
);
|
||||
sq_transfer_32byte(ta_fifo_polygon_converter);
|
||||
|
||||
auto& face = MODEL::faces[face_ix];
|
||||
|
||||
@ -158,7 +163,7 @@ void transform(ta_parameter_writer& parameter,
|
||||
|
||||
bool end_of_strip = i == strip_length - 1;
|
||||
|
||||
parameter.append<ta_vertex_parameter::polygon_type_1>() =
|
||||
*reinterpret_cast<ta_vertex_parameter::polygon_type_1 *>(store_queue) =
|
||||
ta_vertex_parameter::polygon_type_1(polygon_vertex_parameter_control_word(end_of_strip),
|
||||
x, y, z,
|
||||
scale_z, // alpha
|
||||
@ -166,22 +171,92 @@ void transform(ta_parameter_writer& parameter,
|
||||
scale_ny, // g
|
||||
scale_nz // b
|
||||
);
|
||||
sq_transfer_32byte(ta_fifo_polygon_converter);
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t ta_parameter_buf[((32 * 8192) + 32) / 4]
|
||||
__attribute__((aligned(32)));
|
||||
void dma_transfer(uint32_t source, uint32_t destination, uint32_t transfers)
|
||||
{
|
||||
using namespace dmac;
|
||||
|
||||
uint32_t inbuf[640 * 480];
|
||||
uint32_t outbuf[640 * 480];
|
||||
volatile uint32_t _dummy = sh7091.DMAC.CHCR1;
|
||||
(void)_dummy;
|
||||
|
||||
sh7091.DMAC.CHCR1 = 0;
|
||||
|
||||
sh7091.DMAC.SAR1 = source;
|
||||
sh7091.DMAC.DAR1 = destination;
|
||||
sh7091.DMAC.DMATCR1 = transfers & 0x00ff'ffff;
|
||||
|
||||
sh7091.DMAC.CHCR1 = chcr::dm::destination_address_incremented
|
||||
| chcr::sm::source_address_incremented
|
||||
| chcr::rs::resource_select(0b0100) /* auto request; external address space → external address space */
|
||||
| chcr::tm::cycle_burst_mode /* transmit mode */
|
||||
//| chcr::tm::cycle_steal_mode /* transmit mode */
|
||||
| chcr::ts::_32_byte /* transfer size */
|
||||
//| chcr::ie::interrupt_request_generated
|
||||
| chcr::de::channel_operation_enabled;
|
||||
}
|
||||
|
||||
void ch2_dma_transfer(uint32_t source, uint32_t destination, uint32_t transfers)
|
||||
{
|
||||
using namespace dmac;
|
||||
|
||||
for (uint32_t i = 0; i < transfers; i++) {
|
||||
asm volatile ("ocbwb @%0"
|
||||
: // output
|
||||
: "r" (source + (32 * i)) // input
|
||||
);
|
||||
}
|
||||
|
||||
// this dummy read appears to be required on real hardware.
|
||||
volatile uint32_t _dummy = sh7091.DMAC.CHCR2;
|
||||
(void)_dummy;
|
||||
|
||||
/* start a new CH2-DMA transfer from "system memory" to "TA FIFO polygon converter" */
|
||||
sh7091.DMAC.CHCR2 = 0; /* disable DMA channel */
|
||||
sh7091.DMAC.SAR2 = reinterpret_cast<uint32_t>(source); /* start address, must be aligned to a CHCHR__TS-sized (32-byte) boundary */
|
||||
sh7091.DMAC.DMATCR2 = dmatcr::transfer_count(transfers); /* transfer count, in CHCHR__TS-sized (32-byte) units */
|
||||
sh7091.DMAC.CHCR2 = chcr::dm::destination_address_incremented
|
||||
| chcr::sm::source_address_incremented
|
||||
| chcr::rs::resource_select(0b0010) /* external request, single address mode;
|
||||
external address space → external device */
|
||||
| chcr::tm::cycle_burst_mode /* transmit mode */
|
||||
| chcr::ts::_32_byte /* transfer size */
|
||||
| chcr::de::channel_operation_enabled;
|
||||
|
||||
system.C2DSTAT = c2dstat::texture_memory_start_address(destination); /* CH2-DMA destination address */
|
||||
system.C2DLEN = c2dlen::transfer_length(transfers * 32); /* CH2-DMA length (must be a multiple of 32) */
|
||||
system.C2DST = 1; /* CH2-DMA start (an 'external' request from SH7091's perspective) */
|
||||
|
||||
// wait for ch2-dma completion
|
||||
while ((system.ISTNRM & istnrm::end_of_dma_ch2_dma) == 0);
|
||||
// reset ch2-dma interrupt status
|
||||
system.ISTNRM = istnrm::end_of_dma_ch2_dma;
|
||||
}
|
||||
|
||||
void dma_init()
|
||||
{
|
||||
using namespace dmac;
|
||||
|
||||
sh7091.DMAC.CHCR0 = 0;
|
||||
sh7091.DMAC.CHCR1 = 0;
|
||||
sh7091.DMAC.CHCR2 = 0;
|
||||
sh7091.DMAC.CHCR3 = 0;
|
||||
sh7091.DMAC.DMAOR = dmaor::ddt::on_demand_data_transfer_mode /* on-demand data transfer mode */
|
||||
| dmaor::pr::ch2_ch0_ch1_ch3 /* priority mode; CH2 > CH0 > CH1 > CH3 */
|
||||
| dmaor::dme::operation_enabled_on_all_channels; /* DMAC master enable */
|
||||
|
||||
}
|
||||
|
||||
static uint32_t inbuf[640 * 480] __attribute__((aligned(32)));
|
||||
static uint32_t outbuf[640 * 480] __attribute__((aligned(32)));
|
||||
|
||||
void main()
|
||||
{
|
||||
dma_init();
|
||||
video_output::set_mode_vga();
|
||||
|
||||
// The address of `ta_parameter_buf` must be a multiple of 32 bytes.
|
||||
// This is mandatory for ch2-dma to the ta fifo polygon converter.
|
||||
|
||||
constexpr uint32_t ta_alloc = ta_alloc_ctrl::pt_opb::no_list
|
||||
| ta_alloc_ctrl::tm_opb::no_list
|
||||
| ta_alloc_ctrl::t_opb::_16x4byte
|
||||
@ -250,9 +325,8 @@ void main()
|
||||
lights[2].x = cos(theta + half_degree * 360.f) * 20;
|
||||
lights[2].z = sin(theta + half_degree * 360.f) * 20;
|
||||
|
||||
auto parameter = ta_parameter_writer(ta_parameter_buf);
|
||||
for (uint32_t i = 0; i < MODEL::num_faces; i++) {
|
||||
transform(parameter, i, theta, lights);
|
||||
transform(i, theta, lights);
|
||||
}
|
||||
/*
|
||||
transform2(parameter, lights[0], {1.f, 0.f, 0.f, 1.f});
|
||||
@ -260,8 +334,10 @@ void main()
|
||||
transform2(parameter, lights[2], {0.f, 0.f, 1.f, 1.f});
|
||||
*/
|
||||
|
||||
parameter.append<ta_global_parameter::end_of_list>() = ta_global_parameter::end_of_list(para_control::para_type::end_of_list);
|
||||
ta_polygon_converter_transfer(ta_parameter_buf, parameter.offset);
|
||||
*reinterpret_cast<ta_global_parameter::end_of_list *>(store_queue) =
|
||||
ta_global_parameter::end_of_list(para_control::para_type::end_of_list);
|
||||
sq_transfer_32byte(ta_fifo_polygon_converter);
|
||||
|
||||
ta_wait_translucent_list();
|
||||
|
||||
holly.FB_W_CTRL = fb_w_ctrl::fb_packmode::_8888_argb_32bit;
|
||||
@ -276,11 +352,31 @@ void main()
|
||||
core_wait_end_of_render_video();
|
||||
|
||||
uint32_t * in = (uint32_t *)&texture_memory64[texture_memory_alloc.texture.start / 4];
|
||||
//uint32_t * out = (uint32_t *)&texture_memory32[texture_memory_alloc.framebuffer[0].start / 4];
|
||||
|
||||
serial::string("ch1 dma start\n");
|
||||
dma_transfer((uint32_t)in, (uint32_t)inbuf, 640 * 480 * 4 / 32);
|
||||
while ((sh7091.DMAC.CHCR1 & dmac::chcr::te::transfers_completed) == 0);
|
||||
serial::string("ch1 dma end\n");
|
||||
|
||||
for (uint32_t i = 0; i < (sizeof (640 * 480 * 4)) / 32; i++) {
|
||||
uint32_t address = (uint32_t)&inbuf[0];
|
||||
asm volatile ("ocbp @%0"
|
||||
: // output
|
||||
: "r" (address + (i * 32)) // input
|
||||
);
|
||||
}
|
||||
|
||||
uint32_t * out = (uint32_t *)&texture_memory32[texture_memory_alloc.framebuffer[0].start / 4];
|
||||
convolve(in, out);
|
||||
serial::string("convolve start\n");
|
||||
convolve(inbuf, outbuf);
|
||||
serial::string("convolve end\n");
|
||||
|
||||
uint32_t framebuffer = 0x11000000 + texture_memory_alloc.framebuffer[0].start; // TA FIFO - Direct Texture Path
|
||||
system.LMMODE0 = 1;
|
||||
system.LMMODE1 = 1; // 32-bit
|
||||
serial::string("ch2 dma start\n");
|
||||
ch2_dma_transfer((uint32_t)outbuf, framebuffer, (640 * 480 * 4) / 32);
|
||||
serial::string("ch2 dma end\n");
|
||||
|
||||
while (!spg_status::vsync(holly.SPG_STATUS));
|
||||
holly.FB_R_SOF1 = texture_memory_alloc.framebuffer[0].start;
|
||||
|
@ -176,9 +176,9 @@ void dma_start(uint8_t const * const send_buf,
|
||||
// start maple DMA
|
||||
_dma_start(send_buf);
|
||||
|
||||
// purge operand cache block for recv buffer, prior to returning to the caller
|
||||
// invalidate operand cache block for recv buffer, prior to returning to the caller
|
||||
for (uint32_t i = 0; i < align_32byte(recv_size) / 32; i++) {
|
||||
asm volatile ("ocbp @%0"
|
||||
asm volatile ("ocbi @%0"
|
||||
: // output
|
||||
: "r" (reinterpret_cast<uint32_t>(&recv_buf[32 * i])) // input
|
||||
);
|
||||
|
@ -192,9 +192,9 @@ void tick(struct maple_poll_state& poll_state)
|
||||
const uint8_t * buf = reinterpret_cast<const uint8_t *>(state.reply_crc.offset);
|
||||
const uint8_t * buf32 = reinterpret_cast<const uint8_t *>((state.reply_crc.offset / 32) * 32); // round down
|
||||
|
||||
// purge operand cache blocks for the data written by DMA, rounding up twice
|
||||
// invalidate operand cache blocks for the data written by DMA, rounding up twice
|
||||
for (uint32_t i = 0; i < align_32byte(len) + 32; i += 32) {
|
||||
asm volatile ("ocbp @%0"
|
||||
asm volatile ("ocbi @%0"
|
||||
: // output
|
||||
: "r" (reinterpret_cast<uint32_t>(&buf32[i])) // input
|
||||
);
|
||||
|
Loading…
x
Reference in New Issue
Block a user