diff --git a/example/example.mk b/example/example.mk index d13cc4a..6147792 100644 --- a/example/example.mk +++ b/example/example.mk @@ -756,7 +756,15 @@ example/holly_recv_dma.elf: $(START_OBJ) $(HOLLY_RECV_DMA_OBJ) FIPR_OBJ = \ example/fipr.o \ fipr.o \ + sobel_fipr.o \ sh7091/serial.o example/fipr.elf: LDSCRIPT = $(LIB)/main.lds example/fipr.elf: $(START_OBJ) $(FIPR_OBJ) + +ORA_OBJ = \ + example/ora.o \ + sh7091/serial.o + +example/ora.elf: LDSCRIPT = $(LIB)/main.lds +example/ora.elf: $(START_OBJ) $(ORA_OBJ) diff --git a/example/fipr.cpp b/example/fipr.cpp index 145edb1..521b1b9 100644 --- a/example/fipr.cpp +++ b/example/fipr.cpp @@ -1,10 +1,90 @@ #include "stdint.h" +#include "sh7091/sh7091.hpp" +#include "sh7091/sh7091_bits.hpp" +#include "sh7091/vbr.hpp" #include "sh7091/serial.hpp" +#include "systembus.hpp" extern "C" float fipr(float * a, float * b); +extern "C" void sobel_fipr(float * a, int * i); -void main() +void vbr100() +{ + serial::string("vbr100\n"); + serial::string("expevt "); + serial::integer(sh7091.CCN.EXPEVT); + serial::string("intevt "); + serial::integer(sh7091.CCN.INTEVT); + serial::string("tra "); + serial::integer(sh7091.CCN.TRA); + uint32_t spc; + uint32_t ssr; + asm volatile ("stc spc,%0" + : "=r" (spc) + ); + asm volatile ("stc ssr,%0" + : "=r" (ssr) + ); + + serial::string("spc "); + serial::integer(spc); + serial::string("ssr "); + serial::integer(ssr); + while (1); +} + +void vbr400() +{ + serial::string("vbr400\n"); + serial::string("expevt "); + serial::integer(sh7091.CCN.EXPEVT); + serial::string("intevt "); + serial::integer(sh7091.CCN.INTEVT); + serial::string("tra "); + serial::integer(sh7091.CCN.TRA); + uint32_t spc; + uint32_t ssr; + asm volatile ("stc spc,%0" + : "=r" (spc) + ); + asm volatile ("stc ssr,%0" + : "=r" (ssr) + ); + + serial::string("spc "); + serial::integer(spc); + serial::string("ssr "); + serial::integer(ssr); + while (1); +} + +void vbr600() +{ + serial::string("vbr600\n"); + serial::string("expevt "); + serial::integer(sh7091.CCN.EXPEVT); + serial::string("intevt "); + serial::integer(sh7091.CCN.INTEVT); + serial::string("tra "); + serial::integer(sh7091.CCN.TRA); + uint32_t spc; + uint32_t ssr; + asm volatile ("stc spc,%0" + : "=r" (spc) + ); + asm volatile ("stc ssr,%0" + : "=r" (ssr) + ); + + serial::string("spc "); + serial::integer(spc); + serial::string("ssr "); + serial::integer(ssr); + while (1); +} + +void test1() { float a[] = {1, 2, 3, 4}; float b[] = {5, 6, 7, 8}; @@ -18,7 +98,110 @@ void main() v.f = fipr(a, b); serial::integer(v.i); - serial::integer(v.i); - serial::integer(v.i); - serial::integer(v.i); +} + +void test2() +{ + float a[640 * 480]; + a[0] = 11; + a[1] = 12; + a[2] = 13; + a[0 + 640] = 1400; + a[1 + 640] = 1500; + a[2 + 640] = 1600; + a[0 + 1280] = 170000; + a[1 + 1280] = 180000; + a[2 + 1280] = 190000; + + // -719952 + // -20402 + // 518747123908 + + int i[640 * 480]; + + // expected value: + for (int j = 0; j < 640 * 480; j++) { + i[j] = 0xeeeeeeee; + } + + sobel_fipr(a, i); + // -5952 + + int v; + v = i[640 + 1]; + serial::integer(v); + v = i[640 + 2]; + serial::integer(v); + v = i[640 + 3]; + + v = i[640 * 479 - 1]; + serial::integer(v); + v = i[640 * 479 - 2]; + serial::integer(v); + v = i[640 * 479 - 3]; + serial::integer(v); + v = i[640 * 479 - 4]; + serial::integer(v); + v = i[640 * 479 - 5]; + serial::integer(v); + v = i[640 * 479 - 6]; + serial::integer(v); +} + +void init_interrupt() +{ + system.IML2NRM = 0; + system.IML2ERR = 0; + system.IML2EXT = 0; + + system.IML4NRM = 0; + system.IML4ERR = 0; + system.IML4EXT = 0; + + system.IML6NRM = 0; + system.IML6ERR = 0; + system.IML6EXT = 0; + + sh7091.CCN.INTEVT = 0; + sh7091.CCN.EXPEVT = 0; + + uint32_t vbr = reinterpret_cast(&__vbr_link_start) - 0x100; + + asm volatile ("ldc %0,vbr" + : + : "r" (vbr)); + + + uint32_t sr; + asm volatile ("stc sr,%0" + : "=r" (sr)); + + serial::string("sr "); + serial::integer(sr); + + sr &= ~sh::sr::bl; // BL + sr |= sh::sr::imask(15); // imask + + serial::string("sr "); + serial::integer(sr); + + asm volatile ("ldc %0,sr" + : + : "r" (sr)); +} + +void main() +{ + init_interrupt(); + + serial::string("test1:\n"); + test1(); + serial::string("test2:\n"); + serial::string("test2:\n"); + serial::string("test2:\n"); + test2(); + + serial::string("return\n"); + serial::string("return\n"); + serial::string("return\n"); } diff --git a/example/holly_recv_dma.cpp b/example/holly_recv_dma.cpp index 874c2bb..7308c22 100644 --- a/example/holly_recv_dma.cpp +++ b/example/holly_recv_dma.cpp @@ -3,7 +3,7 @@ #include "sh7091/serial.hpp" #include "memorymap.hpp" -static void dma(uint32_t source, uint32_t destination, uint32_t length) +static void dma(uint32_t source, uint32_t destination, uint32_t transfers) { using namespace dmac; @@ -11,22 +11,38 @@ static void dma(uint32_t source, uint32_t destination, uint32_t length) sh7091.DMAC.SAR1 = source; sh7091.DMAC.DAR1 = destination; - sh7091.DMAC.DMATCR1 = length & 0x00ff'ffff; + sh7091.DMAC.DMATCR1 = transfers & 0x00ff'ffff; sh7091.DMAC.CHCR1 = chcr::dm::destination_address_incremented | chcr::sm::source_address_incremented - | chcr::rs::resource_select(0b0100) /* external address space → external address space */ - | chcr::tm::cycle_burst_mode /* transmit mode */ - //| chcr::tm::cycle_steal_mode /* transmit mode */ + | chcr::rs::resource_select(0b0100) /* auto request, external address space → external address space */ + | chcr::tm::cycle_burst_mode /* transmit mode */ + //| chcr::tm::cycle_steal_mode /* transmit mode */ | chcr::ts::_32_byte /* transfer size */ //| chcr::ie::interrupt_request_generated | chcr::de::channel_operation_enabled; } +static void dma_init() +{ + using namespace dmac; + + sh7091.DMAC.CHCR0 = 0; + sh7091.DMAC.CHCR1 = 0; + sh7091.DMAC.CHCR2 = 0; + sh7091.DMAC.CHCR3 = 0; + sh7091.DMAC.DMAOR = dmaor::ddt::on_demand_data_transfer_mode /* on-demand data transfer mode */ + | dmaor::pr::ch2_ch0_ch1_ch3 /* priority mode; CH2 > CH0 > CH1 > CH3 */ + | dmaor::dme::operation_enabled_on_all_channels; /* DMAC master enable */ + +} + static uint32_t buf[256] __attribute__((aligned(32))); void main() { + dma_init(); + for (int i = 0; i < 256; i++) { buf[i] = 0; texture_memory32[i] = (1 << 31) | i; @@ -46,7 +62,8 @@ void main() serial::integer((uint32_t)&buf[0]); - dma((uint32_t)&texture_memory32[0], (uint32_t)&buf[0], (sizeof (buf))); + uint32_t transfers = 256 * 4 / 32; + dma((uint32_t)&texture_memory32[0], (uint32_t)&buf[0], transfers); uint32_t last_dar = sh7091.DMAC.DAR1; uint32_t count = 0; diff --git a/example/ora.cpp b/example/ora.cpp new file mode 100644 index 0000000..9ffcd23 --- /dev/null +++ b/example/ora.cpp @@ -0,0 +1,97 @@ +#include "sh7091/sh7091.hpp" +#include "sh7091/sh7091_bits.hpp" +#include "sh7091/serial.hpp" + +#include "memorymap.hpp" + +static void dma(uint32_t source, uint32_t destination, uint32_t transfers) +{ + using namespace dmac; + + sh7091.DMAC.CHCR1 = 0; + + sh7091.DMAC.SAR1 = source; + sh7091.DMAC.DAR1 = destination; + sh7091.DMAC.DMATCR1 = transfers & 0x00ff'ffff; + + sh7091.DMAC.CHCR1 = chcr::dm::destination_address_incremented + | chcr::sm::source_address_incremented + | chcr::rs::resource_select(0b0101) /* auto request, external address space → on-chip peripheral module */ + | chcr::tm::cycle_burst_mode /* transmit mode */ + //| chcr::tm::cycle_steal_mode /* transmit mode */ + | chcr::ts::_32_bit /* transfer size */ + //| chcr::ie::interrupt_request_generated + | chcr::de::channel_operation_enabled; +} + +static void dma_init() +{ + using namespace dmac; + + sh7091.DMAC.CHCR0 = 0; + sh7091.DMAC.CHCR1 = 0; + sh7091.DMAC.CHCR2 = 0; + sh7091.DMAC.CHCR3 = 0; + sh7091.DMAC.DMAOR = dmaor::ddt::normal_dma_mode /* on-demand data transfer mode */ + | dmaor::pr::ch2_ch0_ch1_ch3 /* priority mode; CH2 > CH0 > CH1 > CH3 */ + | dmaor::dme::operation_enabled_on_all_channels; /* DMAC master enable */ + +} + +void main() +{ + sh7091.CCN.CCR |= ccn::ccr::ora::_8_kbytes_used_as_cache_8_kbytes_used_as_ram; + + dma_init(); + + // from entry 128 to entry 255 and from entry 384 to entry 511 of the OC are to be used as RAM + uint32_t * oc_a = &sh7091_oc_d[128 * 32 / 4]; // 1024 words + uint32_t * oc_b = &sh7091_oc_d[384 * 32 / 4]; // 1024 words + + for (int i = 0; i < 256; i++) { + oc_a[i] = 0; + texture_memory32[i] = (1 << 31) | i; + } + + serial::string("tm: "); + serial::integer((uint32_t)&texture_memory32[0]); + serial::string("oc_a: "); + serial::integer((uint32_t)&oc_a[0]); + + serial::string("dmaor: "); + serial::integer(sh7091.DMAC.DMAOR); + + uint32_t transfers = 64 / 4; + dma((uint32_t)&texture_memory32[0], (uint32_t)&oc_a[0], transfers); + + serial::string("sar: "); + serial::integer(sh7091.DMAC.SAR1); + serial::string("dar: "); + serial::integer(sh7091.DMAC.DAR1); + + uint32_t last_dar = sh7091.DMAC.DAR1; + uint32_t count = 0; + while ((sh7091.DMAC.CHCR1 & dmac::chcr::te::transfers_completed) == 0) { + uint32_t dar = sh7091.DMAC.DAR1; + if (dar == last_dar) + count += 1; + else + count = 0; + if (count > 10) + goto return_main; + serial::integer(sh7091.DMAC.DMAOR); + } + + serial::string("dmaor: "); + serial::integer(sh7091.DMAC.DMAOR); + serial::string("buf:\n"); + for (int i = 0; i < 64; i++) { + serial::integer(oc_a[i]); + } + + return_main: + serial::string("return\n"); + serial::string("return\n"); + serial::string("return\n"); + serial::string("return\n"); +} diff --git a/example/wiffle_screen_space.cpp b/example/wiffle_screen_space.cpp index 8b5685c..029aed4 100644 --- a/example/wiffle_screen_space.cpp +++ b/example/wiffle_screen_space.cpp @@ -25,7 +25,7 @@ #include "geometry/wiffle.hpp" -void convolve(uint32_t * in, uint32_t * out); +#include "sobel.hpp" constexpr float half_degree = 0.01745329f / 2; @@ -250,8 +250,31 @@ void dma_init() } static uint32_t inbuf[640 * 480] __attribute__((aligned(32))); +static float temp[640 * 480] __attribute__((aligned(32))); static uint32_t outbuf[640 * 480] __attribute__((aligned(32))); +void make_temp() +{ + for (int i = 0; i < 640 * 480; i++) { + if ((i & 31) == 0) { + asm volatile ("pref @%0" + : // output + : "r" ((uint32_t)&inbuf[i]) // input + ); + } + uint32_t n = inbuf[i]; + uint32_t sum; + sum = n & 0xff; + n >>= 8; + sum += n & 0xff; + n >>= 8; + sum += n & 0xff; + n >>= 8; + sum += n & 0xff; + temp[i] = (float)(sum * 0.25); + } +} + void main() { dma_init(); @@ -356,8 +379,6 @@ void main() serial::string("ch1 dma start\n"); dma_transfer((uint32_t)in, (uint32_t)inbuf, 640 * 480 * 4 / 32); - while ((sh7091.DMAC.CHCR1 & dmac::chcr::te::transfers_completed) == 0); - serial::string("ch1 dma end\n"); for (uint32_t i = 0; i < (sizeof (640 * 480 * 4)) / 32; i++) { uint32_t address = (uint32_t)&inbuf[0]; @@ -367,8 +388,15 @@ void main() ); } + while ((sh7091.DMAC.CHCR1 & dmac::chcr::te::transfers_completed) == 0); + serial::string("ch1 dma end\n"); + + serial::string("temp start\n"); + make_temp(); + serial::string("temp end\n"); + serial::string("convolve start\n"); - convolve(inbuf, outbuf); + convolve(temp, outbuf); serial::string("convolve end\n"); uint32_t framebuffer = 0x11000000 + texture_memory_alloc.framebuffer[0].start; // TA FIFO - Direct Texture Path diff --git a/memorymap.hpp b/memorymap.hpp index 20d3f63..d1535fc 100644 --- a/memorymap.hpp +++ b/memorymap.hpp @@ -12,3 +12,4 @@ extern volatile uint32_t ta_fifo_polygon_converter_mirror[0x800000] __asm("ta_fi extern volatile uint32_t ta_fifo_yuv_converter_mirror[0x800000] __asm("ta_fifo_yuv_converter_mirror"); extern volatile uint32_t ta_fifo_texture_memory_mirror[0x800000] __asm("ta_fifo_texture_memory_mirror"); extern uint32_t store_queue[0x4000000] __asm("store_queue"); +extern uint32_t sh7091_oc_d[0x1000] __asm("sh7091_oc_d"); diff --git a/sobel.cpp b/sobel.cpp index b15995f..98c83fa 100644 --- a/sobel.cpp +++ b/sobel.cpp @@ -1,102 +1,76 @@ #include -int clamp255(float v) -{ - int n = (int)v; - if (n < 0) - return 0; - if (n > 255) - return 255; - return n; -} +#include "sobel.hpp" -uint32_t getpx(uint32_t * buf, int x, int y) +static inline float getpx(float * buf, int x, int y) { - if (x < 0) - x = 0; - if (y < 0) - y = 0; - if (x >= 640) - x = 640 - 1; - if (y >= 480) - y = 480 - 1; return buf[y * 640 + x]; } -float multiply(uint32_t * buf, int x, int y, float weight) +static inline float kernel2(float * buf, int x, int y) { - uint32_t color = getpx(buf, x, y); - int b = color & 0xff; - color >>= 8; - int g = color & 0xff; - color >>= 8; - int r = color & 0xff; - color >>= 8; - int a = color; + constexpr float gx[] = { + 1, 0, -1, /* fr0 , _ , xf12 */ + 2, 0, -2, /* fr1 , _ , xf13 */ + 1, 0, -1, /* fr2, _ , xf14 */ + }; - float luminance = (float)(r + g + b + a) * 0.25; - return luminance * (float)weight; -} - -float kernel(uint32_t * buf, const float * weights, int x, int y) -{ - float c = 0; - c += multiply(buf, x - 1, y - 1, weights[0]); - c += multiply(buf, x , y - 1, weights[1]); - c += multiply(buf, x + 1, y - 1, weights[2]); - - c += multiply(buf, x - 1, y , weights[3]); - c += multiply(buf, x , y , weights[4]); - c += multiply(buf, x + 1, y , weights[5]); - - c += multiply(buf, x - 1, y + 1, weights[6]); - c += multiply(buf, x , y + 1, weights[7]); - c += multiply(buf, x + 1, y + 1, weights[8]); - - return c; -} - -const float gx[] = { - 1, 0, -1, - 2, 0, -2, - 1, 0, -1, -}; - -const float gy[] = { - 1, 2, 1, + constexpr float gy[] = { + 1, 2, 1, /* fr0, fr1, fr2 */ 0, 0, 0, - -1, -2, -1, -}; + -1, -2, -1, /* fr4, fr5, fr6 */ + }; -void convolve(uint32_t * in, uint32_t * out) + float a = getpx(buf, x - 1, y - 1); + float b = getpx(buf, x , y - 1); + float c = getpx(buf, x + 1, y - 1); + + float d = getpx(buf, x - 1, y ); + float e = getpx(buf, x , y ); + float f = getpx(buf, x + 1, y ); + + float g = getpx(buf, x - 1, y + 1); + float h = getpx(buf, x , y + 1); + float i = getpx(buf, x + 1, y + 1); + + float sx = 0; + float sy = 0; + + sx += a * gx[0]; + //sx += b * gx[1]; + sx += c * gx[2]; + + sx += d * gx[3]; + //sx += e * gx[4]; + sx += f * gx[5]; + + sx += g * gx[6]; + //sx += h * gx[7]; + sx += i * gx[8]; + + sy += a * gy[0]; + sy += b * gy[1]; + sy += c * gy[2]; + + //sy += d * gy[3]; + //sy += e * gy[4]; + //sy += f * gy[5]; + + sy += g * gy[6]; + sy += h * gy[7]; + sy += i * gy[8]; + + return sx * sx + sy * sy; +} + +void convolve(float * in, uint32_t * out) { - for (int y = 0; y < 480; y++) { - for (int x = 0; x < 640; x++) { - float vx = kernel(in, gx, x, y); - float vy = kernel(in, gy, x, y); - float c = vx * vx + vy * vy; - int d = c > 100.f ? 0 : 1; - uint32_t color = in[y * 640 + x]; + for (int y = 1; y < 480 - 1; y++) { + for (int x = 1; x < 640 - 1; x++) { + float c = kernel2(in, x, y); + int d = c > 100.f ? 0 : 0xffffffff; - int b = color & 0xff; - color >>= 8; - int g = color & 0xff; - color >>= 8; - int r = color & 0xff; - color >>= 8; - int a = color; - - uint32_t color_out = 0; - - //color_out |= (a * d); - //color_out <<= 8; - color_out |= (r * d); - color_out <<= 8; - color_out |= (g * d); - color_out <<= 8; - color_out |= (b * d); - - out[y * 640 + x] = color_out; + out[y * 640 + x] = (uint8_t)d; } } } diff --git a/sobel.hpp b/sobel.hpp new file mode 100644 index 0000000..e773d65 --- /dev/null +++ b/sobel.hpp @@ -0,0 +1,3 @@ +#pragma once + +void convolve(float * in, uint32_t * out); diff --git a/sobel_fipr.s b/sobel_fipr.s new file mode 100644 index 0000000..7d7540e --- /dev/null +++ b/sobel_fipr.s @@ -0,0 +1,152 @@ + /* fv0 fv4 fv8 fv12 */ + .global _sobel_fipr +_sobel_fipr: +__setup: + mov.l r8,@-r15 + mov.l r9,@-r15 + mov.l r10,@-r15 + mov.l r11,@-r15 + + fldi1 fr8 /* 1.0 */ + fldi1 fr9 /* 2.0 */ + fldi1 fr10 /* 1.0 */ + fldi0 fr11 /* 0.0 */ + fadd fr9,fr9 + + fldi1 fr12 + fmov fr9,fr13 + fldi1 fr14 + fldi0 fr15 + fneg fr12 + fneg fr13 + fneg fr14 + + /* constants */ + mova _const_100f,r0 /* r11 as temporary */ + fmov.s @r0,fr0 + fmov dr0,xd0 + + /* save C arguments */ + mov r4,r0 /* r4 saved as r0 */ + mov r5,r8 /* r5 saved as r8 */ + + /* offsets */ + mov #(1 * 4),r1 + mov #(2 * 4),r2 + mov.w _const_640,r3 + mov.w _const_642,r4 + mov.w _const_1280,r5 + mov.w _const_1281,r6 + mov.w _const_1282,r7 + + add r3,r0 /* skip first row */ + add r3,r8 + add #4,r0 /* skip first pixel */ + add #4,r8 + mov.w _const_638,r10 /* skip last pixel */ + + mov.w _const_478,r11 /* row count */ + + bra _loop + nop + + .align 4 +_const_100f: .float 100 +_const_640: .short (640 * 4) +_const_642: .short (642 * 4) +_const_1280: .short (1280 * 4) +_const_1281: .short (1281 * 4) +_const_1282: .short (1282 * 4) + +_const_638: .short 638 +_const_478: .short 478 + + .align 4 +_loop: + +_loop_width: + /* y multiplication */ + fmov.s @r0,fr0 /* 0 */ + fmov.s @(r0,r1),fr1 /* 1 */ + fmov.s @(r0,r2),fr2 /* 2 */ + fldi0 fr3 + fipr fv8,fv0 + + fmov.s @(r0,r5),fr4 /* 1280 */ + fmov.s @(r0,r6),fr5 /* 1281 */ + fmov.s @(r0,r7),fr6 /* 1282 */ + fldi0 fr7 + fipr fv12,fv4 + + fadd fr3,fr7 + fmul fr7,fr7 + + /* save fr7 in FPUL */ + flds fr7,FPUL + + /* x multiplication */ + /* transpose and load + before → + fr0, fr1, fr2, _, + , , , , + fr4, fr5, fr6, _, + + after → + fr0, , fr4, _, + fr1, , fr5, _, + fr2, , fr6, _, + */ + /* exchange fr4/fr2 */ + fmov fr4,fr3 + fmov fr2,fr4 + fmov fr3,fr2 + /* load fr1,fr5 */ + fmov.s @(r0,r3),fr1 /* 640 */ + fldi0 fr3 + fipr fv8,fv0 + fmov.s @(r0,r4),fr5 /* 642 */ + fldi0 fr7 + fipr fv12,fv4 + + fadd fr3,fr7 + fmul fr7,fr7 + /* restore FPUL from y multiplication */ + fsts FPUL,fr3 + fadd fr3,fr7 + + fmov dr0,xd0 /* load 100.f constant */ + + add #4,r0 /* next pixel */ + + fcmp/gt fr0,fr7 + /*subc r9,r9*/ + movt r9 + add #-1,r9 + mov.l r9,@r8 /* save result */ + + dt r10 + bf/s _loop_width + add #4,r8 +/* end of _loop_width */ + + /* skip last pixel and first pixel */ + add #8,r8 + add #8,r0 + + /* row decrement */ + dt r11 + mov.w _const_638_b,r10 + bf/s _loop + nop + + /* restore registers */ +_return: + mov.l @r15+,r11 + mov.l @r15+,r10 + mov.l @r15+,r9 + mov.l @r15+,r8 + + rts + nop + +_const_638_b: .short 638