From dfc14d8658440b86f352c4c5c1797f54961b99b5 Mon Sep 17 00:00:00 2001 From: Zack Buhman Date: Sat, 3 Feb 2024 09:52:21 +0800 Subject: [PATCH] maple: move ocbp inline assembly to dma_start Necessarily, this means that dma_start must now know what the size of the response is, so that it can issue the appropriate number of ocbp instructions. This also cleans up the inconsistent _command_buf and _recieve_buf declarations. --- example/clipping.cpp | 4 +- example/clipping2.cpp | 4 +- example/clipping_textured.cpp | 4 +- example/maple_analog.cpp | 4 +- example/maple_controller.cpp | 4 +- example/maple_device_request.cpp | 4 +- example/maple_vibrator.cpp | 65 +++++++------------- example/maple_wink.cpp | 29 +++++---- example/modifier_volume_with_two_volumes.cpp | 4 +- maple/maple.cpp | 63 +++++++++++++++---- maple/maple.hpp | 16 ++++- 11 files changed, 122 insertions(+), 79 deletions(-) diff --git a/example/clipping.cpp b/example/clipping.cpp index ea4e3fa..7005ed5 100644 --- a/example/clipping.cpp +++ b/example/clipping.cpp @@ -29,8 +29,8 @@ #include "maple/maple_bus_commands.hpp" #include "maple/maple_bus_ft0.hpp" -uint32_t _command_buf[1024 / 4 + 32]; -uint32_t _receive_buf[1024 / 4 + 32]; +uint32_t _command_buf[(1024 + 32) / 4]; +uint32_t _receive_buf[(1024 + 32) / 4]; static ft0::data_transfer::data_format data[4]; diff --git a/example/clipping2.cpp b/example/clipping2.cpp index 0ea6ad6..e648e38 100644 --- a/example/clipping2.cpp +++ b/example/clipping2.cpp @@ -30,8 +30,8 @@ #include "maple/maple_bus_commands.hpp" #include "maple/maple_bus_ft0.hpp" -uint32_t _command_buf[1024 / 4 + 32]; -uint32_t _receive_buf[1024 / 4 + 32]; +uint32_t _command_buf[(1024 + 32) / 4]; +uint32_t _receive_buf[(1024 + 32) / 4]; static ft0::data_transfer::data_format data[4]; diff --git a/example/clipping_textured.cpp b/example/clipping_textured.cpp index 4a123b4..25b44a4 100644 --- a/example/clipping_textured.cpp +++ b/example/clipping_textured.cpp @@ -34,8 +34,8 @@ #include "twiddle.hpp" #include "macaw.hpp" -uint32_t _command_buf[1024 / 4 + 32]; -uint32_t _receive_buf[1024 / 4 + 32]; +uint32_t _command_buf[(1024 + 32) / 4]; +uint32_t _receive_buf[(1024 + 32) / 4]; static ft0::data_transfer::data_format data[4]; diff --git a/example/maple_analog.cpp b/example/maple_analog.cpp index b9ed9e0..fac7ce2 100644 --- a/example/maple_analog.cpp +++ b/example/maple_analog.cpp @@ -29,8 +29,8 @@ #include "maple/maple_bus_commands.hpp" #include "maple/maple_bus_ft0.hpp" -uint32_t _command_buf[1024 / 4 + 32]; -uint32_t _receive_buf[1024 / 4 + 32]; +uint32_t _command_buf[(1024 + 32) / 4]; +uint32_t _receive_buf[(1024 + 32) / 4]; static ft0::data_transfer::data_format data[4]; diff --git a/example/maple_controller.cpp b/example/maple_controller.cpp index c9e7183..774f605 100644 --- a/example/maple_controller.cpp +++ b/example/maple_controller.cpp @@ -10,8 +10,8 @@ #include "maple/maple_bus_ft0.hpp" #include "sh7091/serial.hpp" -uint32_t _command_buf[1024 / 4 + 32] = {0}; -uint32_t _receive_buf[1024 / 4 + 32] = {0}; +uint32_t _command_buf[(1024 + 32) / 4]; +uint32_t _receive_buf[(1024 + 32) / 4]; static uint32_t * command_buf; static uint32_t * receive_buf; diff --git a/example/maple_device_request.cpp b/example/maple_device_request.cpp index 1ee10d4..c5394eb 100644 --- a/example/maple_device_request.cpp +++ b/example/maple_device_request.cpp @@ -5,8 +5,8 @@ #include "maple/maple_bus_commands.hpp" #include "sh7091/serial.hpp" -uint32_t _command_buf[1024 / 4 + 32] = {0}; -uint32_t _receive_buf[1024 / 4 + 32] = {0}; +uint32_t _command_buf[(1024 + 32) / 4]; +uint32_t _receive_buf[(1024 + 32) / 4]; void main() { diff --git a/example/maple_vibrator.cpp b/example/maple_vibrator.cpp index cce6a41..b15d348 100644 --- a/example/maple_vibrator.cpp +++ b/example/maple_vibrator.cpp @@ -48,28 +48,21 @@ void do_lm_request(uint8_t port, uint8_t lm) destination_ap, get_media_info::command_code, (sizeof (struct get_media_info::data_fields)), true); - using command_type = struct maple::host_command; - auto host_command = reinterpret_cast(command_buf); + using host_command_type = struct maple::host_command; + auto host_command = reinterpret_cast(command_buf); auto& fields = host_command->bus_data.data_fields; fields.function_type = std::byteswap(function_type::vibration); fields.pt = std::byteswap(1 << 24); - serial::string("dma start\n"); - const uint32_t size = (reinterpret_cast(&host_command[1]) - reinterpret_cast(&host_command[0])); - maple::dma_start(command_buf, size * 2); - using response_type = data_transfer; - using command_response_type = struct maple::command_response; - for (uint32_t i = 0; i < (sizeof (command_response_type)) / 32; i++) { - asm volatile ("ocbp @%0" - : // output - : "r" (reinterpret_cast(&receive_buf[(32 * i) / 4])) // input - ); - } + using host_response_type = struct maple::command_response; + auto host_response = reinterpret_cast(receive_buf); - auto response = reinterpret_cast(receive_buf); + serial::string("dma start\n"); + maple::dma_start(command_buf, maple::sizeof_command(host_command), + receive_buf, maple::sizeof_command(host_response)); - auto& bus_data = response->bus_data; + auto& bus_data = host_response->bus_data; if (bus_data.command_code != response_type::command_code) { serial::string("lm did not reply to vibration get_media_info: "); serial::integer(lm); @@ -124,18 +117,12 @@ void do_lm_request(uint8_t port, uint8_t lm) fields.write_in_data.freq = 0x27; fields.write_in_data.inc = 0x00; - const uint32_t size = (reinterpret_cast(&host_command[1]) - reinterpret_cast(&host_command[0])); - maple::dma_start(command_buf, size); + using host_response_type = struct maple::command_response; + auto host_response = reinterpret_cast(receive_buf); + maple::dma_start(command_buf, maple::sizeof_command(host_command), + receive_buf, maple::sizeof_command(host_response)); - using command_response_type = struct maple::command_response; - for (uint32_t i = 0; i < (sizeof (command_response_type)) / 32; i++) { - asm volatile ("ocbp @%0" - : // output - : "r" (reinterpret_cast(&receive_buf[(32 * i) / 4])) // input - ); - } - auto command_response = reinterpret_cast(receive_buf); - auto& bus_data = command_response->bus_data; + auto& bus_data = host_response->bus_data; if (bus_data.command_code != device_reply::command_code) { serial::string("lm did not reply to vibration set_condition: "); @@ -166,21 +153,15 @@ void do_device_request() { using command_type = device_request; using response_type = device_status; + using host_response_type = struct maple::command_response; + auto host_response = reinterpret_cast(receive_buf); + const uint32_t command_size = maple::init_host_command_all_ports(command_buf, receive_buf); + maple::dma_start(command_buf, command_size, + receive_buf, maple::sizeof_command(host_response)); - const uint32_t size = maple::init_host_command_all_ports(command_buf, receive_buf); - maple::dma_start(command_buf, size); - - using command_response_type = struct maple::command_response; - for (uint32_t i = 0; i < ((sizeof (command_response_type)) * 4) / 32; i++) { - asm volatile ("ocbp @%0" - : // output - : "r" (reinterpret_cast(&receive_buf[(32 * i) / 4])) // input - ); - } - auto response = reinterpret_cast(receive_buf); for (uint8_t port = 0; port < 4; port++) { - auto& bus_data = response[port].bus_data; - auto& data_fields = response[port].bus_data.data_fields; + auto& bus_data = host_response[port].bus_data; + auto& data_fields = bus_data.data_fields; if (bus_data.command_code != device_status::command_code) { // the controller is disconnected } else { @@ -194,11 +175,11 @@ void do_device_request() } } +uint32_t _command_buf[(1024 + 32) / 4]; +uint32_t _receive_buf[(1024 + 32) / 4]; + void main() { - uint32_t _command_buf[1024 / 4 + 32]; - uint32_t _receive_buf[1024 / 4 + 32]; - command_buf = align_32byte(_command_buf); command_buf = reinterpret_cast(reinterpret_cast(command_buf) | 0xa000'0000); receive_buf = align_32byte(_receive_buf); diff --git a/example/maple_wink.cpp b/example/maple_wink.cpp index 500f5ab..a54ad99 100644 --- a/example/maple_wink.cpp +++ b/example/maple_wink.cpp @@ -1,6 +1,7 @@ #include #include "maple/maple.hpp" +#include "maple/maple_bus_commands.hpp" #include "maple/maple_bus_bits.hpp" #include "vga.hpp" #include "align.hpp" @@ -30,26 +31,32 @@ constexpr uint32_t height = 32; constexpr uint32_t pixels_per_byte = 8; constexpr uint32_t wink_size = width * height / pixels_per_byte; +uint32_t _command_buf[(1024 + 32) / 4]; +uint32_t _receive_buf[(1024 + 32) / 4]; + void main() { uint32_t wink_buf[wink_size / 4]; make_wink(wink_buf); - uint32_t _command_buf[(1024 + 32) / 4]; - uint32_t _receive_buf[(1024 + 32) / 4]; uint32_t * command_buf = align_32byte(_command_buf); uint32_t * receive_buf = align_32byte(_receive_buf); - const uint32_t size = maple::init_block_write(command_buf, receive_buf, - host_instruction::port_select::a, - ap::de::expansion_device | ap::port_select::a | ap::lm_bus::_0, - wink_buf, - wink_size); - maple::dma_start(command_buf, size); + const uint32_t command_size = maple::init_block_write(command_buf, receive_buf, + host_instruction::port_select::a, + ap::de::expansion_device | ap::port_select::a | ap::lm_bus::_0, + wink_buf, + wink_size); + using response_type = device_reply; + using host_response_type = struct maple::command_response; + auto host_response = reinterpret_cast(receive_buf); + maple::dma_start(command_buf, command_size, + receive_buf, maple::sizeof_command(host_response)); - for (int i = 0; i < 1; i++) { - serial::integer(receive_buf[i]); - } + serial::integer(host_response->bus_data.command_code); + serial::integer(host_response->bus_data.destination_ap); + serial::integer(host_response->bus_data.source_ap); + serial::integer(host_response->bus_data.data_size); vga(); v_sync_in(); diff --git a/example/modifier_volume_with_two_volumes.cpp b/example/modifier_volume_with_two_volumes.cpp index 233bbd5..ef710ae 100644 --- a/example/modifier_volume_with_two_volumes.cpp +++ b/example/modifier_volume_with_two_volumes.cpp @@ -341,8 +341,8 @@ void update_rot_pos(struct rot_pos& rot_pos) } uint32_t _ta_parameter_buf[((32 * 8192) + 32) / 4]; -uint32_t _command_buf[1024 / 4 + 32]; -uint32_t _receive_buf[1024 / 4 + 32]; +uint32_t _command_buf[(1024 + 32) / 4]; +uint32_t _receive_buf[(1024 + 32) / 4]; void main() { diff --git a/maple/maple.cpp b/maple/maple.cpp index ee693ad..787ba91 100644 --- a/maple/maple.cpp +++ b/maple/maple.cpp @@ -105,17 +105,10 @@ uint32_t init_block_write(uint32_t * command_buf, uint32_t * receive_buf, + data_size; } -void dma_start(const uint32_t * command_buf, const uint32_t size) +static inline void _dma_start(const uint32_t * command_buf) { using namespace dmac; - for (uint32_t i = 0; i < align_32byte(size) / 32; i++) { - asm volatile ("ocbwb @%0" - : // output - : "r" (reinterpret_cast(&command_buf[(32 * i) / 4])) // input - ); - } - //command_buf = reinterpret_cast(reinterpret_cast(command_buf) | 0xa000'0000); sh7091.DMAC.DMAOR = dmaor::ddt::on_demand_data_transfer_mode /* on-demand data transfer mode */ @@ -148,6 +141,57 @@ void dma_start(const uint32_t * command_buf, const uint32_t size) maple_if.MDEN = mden::dma_enable::enable; maple_if.MDST = mdst::start_status::start; +} + +void dma_start(const uint32_t * command_buf, + const uint32_t command_size, + const uint32_t * receive_buf, + const uint32_t receive_size + ) +{ + // write back operand cache blocks for command buffer prior to starting DMA + for (uint32_t i = 0; i < align_32byte(command_size) / 32; i++) { + asm volatile ("ocbwb @%0" + : // output + : "r" (reinterpret_cast(&command_buf[(32 * i) / 4])) // input + ); + } + + // start maple DMA + _dma_start(command_buf); + + // purge operand cache block for receive buffer, prior to returning to the caller + for (uint32_t i = 0; i < align_32byte(receive_size) / 32; i++) { + asm volatile ("ocbp @%0" + : // output + : "r" (reinterpret_cast(&receive_buf[(32 * i) / 4])) // input + ); + } + + // wait for maple DMA completion + while ((system.ISTNRM & ISTNRM__END_OF_DMA_MAPLE_DMA) == 0); + system.ISTNRM = ISTNRM__END_OF_DMA_MAPLE_DMA; +} + +void dma_start(const uint32_t * command_buf, + const uint32_t command_size + ) +{ + // write back operand cache blocks for command buffer prior to starting DMA + for (uint32_t i = 0; i < align_32byte(command_size) / 32; i++) { + asm volatile ("ocbwb @%0" + : // output + : "r" (reinterpret_cast(&command_buf[(32 * i) / 4])) // input + ); + } + + // start maple DMA + _dma_start(command_buf); + + // wait for maple DMA completion + while ((system.ISTNRM & ISTNRM__END_OF_DMA_MAPLE_DMA) == 0); + system.ISTNRM = ISTNRM__END_OF_DMA_MAPLE_DMA; +} // wait for completion //while (mdst::start_status::status(maple_if.MDST) != 0); @@ -163,8 +207,5 @@ void dma_start(const uint32_t * command_buf, const uint32_t size) } } */ - while ((system.ISTNRM & ISTNRM__END_OF_DMA_MAPLE_DMA) == 0); - system.ISTNRM = ISTNRM__END_OF_DMA_MAPLE_DMA; -} } diff --git a/maple/maple.hpp b/maple/maple.hpp index fbd2366..ff8658b 100644 --- a/maple/maple.hpp +++ b/maple/maple.hpp @@ -54,6 +54,20 @@ uint32_t init_block_write(uint32_t * buf, uint32_t * receive_buf, uint32_t * data, uint32_t data_size); -void dma_start(const uint32_t * command_buf, const uint32_t size); +void dma_start(const uint32_t * command_buf, + const uint32_t command_size, + const uint32_t * receive_buf, + const uint32_t receive_size + ); + +void dma_start(const uint32_t * command_buf, + const uint32_t command_size + ); + +template +constexpr uint32_t sizeof_command(T * c) +{ + return reinterpret_cast(&c[1]) - reinterpret_cast(&c[0]); +} }