sobel_fipr wip

This commit is contained in:
Zack Buhman 2025-01-26 03:04:42 -06:00
parent bf2c9a3d8c
commit d41b41409b
9 changed files with 313 additions and 95 deletions

View File

@ -756,7 +756,15 @@ example/holly_recv_dma.elf: $(START_OBJ) $(HOLLY_RECV_DMA_OBJ)
FIPR_OBJ = \
example/fipr.o \
fipr.o \
sobel_fipr.o \
sh7091/serial.o
example/fipr.elf: LDSCRIPT = $(LIB)/main.lds
example/fipr.elf: $(START_OBJ) $(FIPR_OBJ)
ORA_OBJ = \
example/ora.o \
sh7091/serial.o
example/ora.elf: LDSCRIPT = $(LIB)/main.lds
example/ora.elf: $(START_OBJ) $(ORA_OBJ)

View File

@ -3,8 +3,9 @@
#include "sh7091/serial.hpp"
extern "C" float fipr(float * a, float * b);
extern "C" float sobel_y(float * a, uint32_t * i);
void main()
void test1()
{
float a[] = {1, 2, 3, 4};
float b[] = {5, 6, 7, 8};
@ -18,7 +19,35 @@ void main()
v.f = fipr(a, b);
serial::integer(v.i);
serial::integer(v.i);
serial::integer(v.i);
serial::integer(v.i);
}
void test2()
{
float a[640 * 2];
a[0] = 11;
a[1] = 12;
a[2] = 13;
a[0 + 640] = 1400;
a[1 + 640] = 1500;
a[2 + 640] = 1600;
union {
float f;
uint32_t i;
} v;
v.f = sobel_y(a);
// 5952
serial::integer<uint32_t>(v.i);
}
void main()
{
serial::string("test1:\n");
test1();
serial::string("test2:\n");
test2();
serial::string("return\n");
serial::string("return\n");
serial::string("return\n");
}

View File

@ -3,7 +3,7 @@
#include "sh7091/serial.hpp"
#include "memorymap.hpp"
static void dma(uint32_t source, uint32_t destination, uint32_t length)
static void dma(uint32_t source, uint32_t destination, uint32_t transfers)
{
using namespace dmac;
@ -11,22 +11,38 @@ static void dma(uint32_t source, uint32_t destination, uint32_t length)
sh7091.DMAC.SAR1 = source;
sh7091.DMAC.DAR1 = destination;
sh7091.DMAC.DMATCR1 = length & 0x00ff'ffff;
sh7091.DMAC.DMATCR1 = transfers & 0x00ff'ffff;
sh7091.DMAC.CHCR1 = chcr::dm::destination_address_incremented
| chcr::sm::source_address_incremented
| chcr::rs::resource_select(0b0100) /* external address space → external address space */
| chcr::tm::cycle_burst_mode /* transmit mode */
//| chcr::tm::cycle_steal_mode /* transmit mode */
| chcr::rs::resource_select(0b0100) /* auto request, external address space → external address space */
| chcr::tm::cycle_burst_mode /* transmit mode */
//| chcr::tm::cycle_steal_mode /* transmit mode */
| chcr::ts::_32_byte /* transfer size */
//| chcr::ie::interrupt_request_generated
| chcr::de::channel_operation_enabled;
}
static void dma_init()
{
using namespace dmac;
sh7091.DMAC.CHCR0 = 0;
sh7091.DMAC.CHCR1 = 0;
sh7091.DMAC.CHCR2 = 0;
sh7091.DMAC.CHCR3 = 0;
sh7091.DMAC.DMAOR = dmaor::ddt::on_demand_data_transfer_mode /* on-demand data transfer mode */
| dmaor::pr::ch2_ch0_ch1_ch3 /* priority mode; CH2 > CH0 > CH1 > CH3 */
| dmaor::dme::operation_enabled_on_all_channels; /* DMAC master enable */
}
static uint32_t buf[256] __attribute__((aligned(32)));
void main()
{
dma_init();
for (int i = 0; i < 256; i++) {
buf[i] = 0;
texture_memory32[i] = (1 << 31) | i;
@ -46,7 +62,8 @@ void main()
serial::integer<uint32_t>((uint32_t)&buf[0]);
dma((uint32_t)&texture_memory32[0], (uint32_t)&buf[0], (sizeof (buf)));
uint32_t transfers = 256 * 4 / 32;
dma((uint32_t)&texture_memory32[0], (uint32_t)&buf[0], transfers);
uint32_t last_dar = sh7091.DMAC.DAR1;
uint32_t count = 0;

97
example/ora.cpp Normal file
View File

@ -0,0 +1,97 @@
#include "sh7091/sh7091.hpp"
#include "sh7091/sh7091_bits.hpp"
#include "sh7091/serial.hpp"
#include "memorymap.hpp"
static void dma(uint32_t source, uint32_t destination, uint32_t transfers)
{
using namespace dmac;
sh7091.DMAC.CHCR1 = 0;
sh7091.DMAC.SAR1 = source;
sh7091.DMAC.DAR1 = destination;
sh7091.DMAC.DMATCR1 = transfers & 0x00ff'ffff;
sh7091.DMAC.CHCR1 = chcr::dm::destination_address_incremented
| chcr::sm::source_address_incremented
| chcr::rs::resource_select(0b0101) /* auto request, external address space → on-chip peripheral module */
| chcr::tm::cycle_burst_mode /* transmit mode */
//| chcr::tm::cycle_steal_mode /* transmit mode */
| chcr::ts::_32_bit /* transfer size */
//| chcr::ie::interrupt_request_generated
| chcr::de::channel_operation_enabled;
}
static void dma_init()
{
using namespace dmac;
sh7091.DMAC.CHCR0 = 0;
sh7091.DMAC.CHCR1 = 0;
sh7091.DMAC.CHCR2 = 0;
sh7091.DMAC.CHCR3 = 0;
sh7091.DMAC.DMAOR = dmaor::ddt::normal_dma_mode /* on-demand data transfer mode */
| dmaor::pr::ch2_ch0_ch1_ch3 /* priority mode; CH2 > CH0 > CH1 > CH3 */
| dmaor::dme::operation_enabled_on_all_channels; /* DMAC master enable */
}
void main()
{
sh7091.CCN.CCR |= ccn::ccr::ora::_8_kbytes_used_as_cache_8_kbytes_used_as_ram;
dma_init();
// from entry 128 to entry 255 and from entry 384 to entry 511 of the OC are to be used as RAM
uint32_t * oc_a = &sh7091_oc_d[128 * 32 / 4]; // 1024 words
uint32_t * oc_b = &sh7091_oc_d[384 * 32 / 4]; // 1024 words
for (int i = 0; i < 256; i++) {
oc_a[i] = 0;
texture_memory32[i] = (1 << 31) | i;
}
serial::string("tm: ");
serial::integer<uint32_t>((uint32_t)&texture_memory32[0]);
serial::string("oc_a: ");
serial::integer<uint32_t>((uint32_t)&oc_a[0]);
serial::string("dmaor: ");
serial::integer<uint32_t>(sh7091.DMAC.DMAOR);
uint32_t transfers = 64 / 4;
dma((uint32_t)&texture_memory32[0], (uint32_t)&oc_a[0], transfers);
serial::string("sar: ");
serial::integer<uint32_t>(sh7091.DMAC.SAR1);
serial::string("dar: ");
serial::integer<uint32_t>(sh7091.DMAC.DAR1);
uint32_t last_dar = sh7091.DMAC.DAR1;
uint32_t count = 0;
while ((sh7091.DMAC.CHCR1 & dmac::chcr::te::transfers_completed) == 0) {
uint32_t dar = sh7091.DMAC.DAR1;
if (dar == last_dar)
count += 1;
else
count = 0;
if (count > 10)
goto return_main;
serial::integer<uint32_t>(sh7091.DMAC.DMAOR);
}
serial::string("dmaor: ");
serial::integer<uint32_t>(sh7091.DMAC.DMAOR);
serial::string("buf:\n");
for (int i = 0; i < 64; i++) {
serial::integer<uint32_t>(oc_a[i]);
}
return_main:
serial::string("return\n");
serial::string("return\n");
serial::string("return\n");
serial::string("return\n");
}

View File

@ -25,7 +25,7 @@
#include "geometry/wiffle.hpp"
void convolve(uint32_t * in, uint32_t * out);
#include "sobel.hpp"
constexpr float half_degree = 0.01745329f / 2;
@ -250,8 +250,31 @@ void dma_init()
}
static uint32_t inbuf[640 * 480] __attribute__((aligned(32)));
static float temp[640 * 480] __attribute__((aligned(32)));
static uint32_t outbuf[640 * 480] __attribute__((aligned(32)));
void make_temp()
{
for (int i = 0; i < 640 * 480; i++) {
if ((i & 31) == 0) {
asm volatile ("pref @%0"
: // output
: "r" ((uint32_t)&inbuf[i]) // input
);
}
uint32_t n = inbuf[i];
uint32_t sum;
sum = n & 0xff;
n >>= 8;
sum += n & 0xff;
n >>= 8;
sum += n & 0xff;
n >>= 8;
sum += n & 0xff;
temp[i] = (float)(sum * 0.25);
}
}
void main()
{
dma_init();
@ -356,8 +379,6 @@ void main()
serial::string("ch1 dma start\n");
dma_transfer((uint32_t)in, (uint32_t)inbuf, 640 * 480 * 4 / 32);
while ((sh7091.DMAC.CHCR1 & dmac::chcr::te::transfers_completed) == 0);
serial::string("ch1 dma end\n");
for (uint32_t i = 0; i < (sizeof (640 * 480 * 4)) / 32; i++) {
uint32_t address = (uint32_t)&inbuf[0];
@ -367,8 +388,15 @@ void main()
);
}
while ((sh7091.DMAC.CHCR1 & dmac::chcr::te::transfers_completed) == 0);
serial::string("ch1 dma end\n");
serial::string("temp start\n");
make_temp();
serial::string("temp end\n");
serial::string("convolve start\n");
convolve(inbuf, outbuf);
convolve(temp, outbuf);
serial::string("convolve end\n");
uint32_t framebuffer = 0x11000000 + texture_memory_alloc.framebuffer[0].start; // TA FIFO - Direct Texture Path

View File

@ -12,3 +12,4 @@ extern volatile uint32_t ta_fifo_polygon_converter_mirror[0x800000] __asm("ta_fi
extern volatile uint32_t ta_fifo_yuv_converter_mirror[0x800000] __asm("ta_fifo_yuv_converter_mirror");
extern volatile uint32_t ta_fifo_texture_memory_mirror[0x800000] __asm("ta_fifo_texture_memory_mirror");
extern uint32_t store_queue[0x4000000] __asm("store_queue");
extern uint32_t sh7091_oc_d[0x1000] __asm("sh7091_oc_d");

136
sobel.cpp
View File

@ -1,102 +1,76 @@
#include <stdint.h>
int clamp255(float v)
{
int n = (int)v;
if (n < 0)
return 0;
if (n > 255)
return 255;
return n;
}
#include "sobel.hpp"
uint32_t getpx(uint32_t * buf, int x, int y)
static inline float getpx(float * buf, int x, int y)
{
if (x < 0)
x = 0;
if (y < 0)
y = 0;
if (x >= 640)
x = 640 - 1;
if (y >= 480)
y = 480 - 1;
return buf[y * 640 + x];
}
float multiply(uint32_t * buf, int x, int y, float weight)
static inline float kernel2(float * buf, int x, int y)
{
uint32_t color = getpx(buf, x, y);
int b = color & 0xff;
color >>= 8;
int g = color & 0xff;
color >>= 8;
int r = color & 0xff;
color >>= 8;
int a = color;
float luminance = (float)(r + g + b + a) * 0.25;
return luminance * (float)weight;
}
float kernel(uint32_t * buf, const float * weights, int x, int y)
{
float c = 0;
c += multiply(buf, x - 1, y - 1, weights[0]);
c += multiply(buf, x , y - 1, weights[1]);
c += multiply(buf, x + 1, y - 1, weights[2]);
c += multiply(buf, x - 1, y , weights[3]);
c += multiply(buf, x , y , weights[4]);
c += multiply(buf, x + 1, y , weights[5]);
c += multiply(buf, x - 1, y + 1, weights[6]);
c += multiply(buf, x , y + 1, weights[7]);
c += multiply(buf, x + 1, y + 1, weights[8]);
return c;
}
const float gx[] = {
constexpr float gx[] = {
1, 0, -1,
2, 0, -2,
1, 0, -1,
};
};
const float gy[] = {
constexpr float gy[] = {
1, 2, 1,
0, 0, 0,
-1, -2, -1,
};
};
void convolve(uint32_t * in, uint32_t * out)
float a = getpx(buf, x - 1, y - 1);
float b = getpx(buf, x , y - 1);
float c = getpx(buf, x + 1, y - 1);
float d = getpx(buf, x - 1, y );
float e = getpx(buf, x , y );
float f = getpx(buf, x + 1, y );
float g = getpx(buf, x - 1, y + 1);
float h = getpx(buf, x , y + 1);
float i = getpx(buf, x + 1, y + 1);
float sx = 0;
float sy = 0;
sx += a * gx[0];
//sx += b * gx[1];
sx += c * gx[2];
sx += d * gx[3];
//sx += e * gx[4];
sx += f * gx[5];
sx += g * gx[6];
//sx += h * gx[7];
sx += i * gx[8];
sy += a * gy[0];
sy += b * gy[1];
sy += c * gy[2];
//sy += d * gy[3];
//sy += e * gy[4];
//sy += f * gy[5];
sy += g * gy[6];
sy += h * gy[7];
sy += i * gy[8];
return sx * sx + sy * sy;
}
void convolve(float * in, uint32_t * out)
{
for (int y = 0; y < 480; y++) {
for (int x = 0; x < 640; x++) {
float vx = kernel(in, gx, x, y);
float vy = kernel(in, gy, x, y);
float c = vx * vx + vy * vy;
int d = c > 100.f ? 0 : 1;
uint32_t color = in[y * 640 + x];
for (int y = 1; y < 480 - 1; y++) {
for (int x = 1; x < 640 - 1; x++) {
float c = kernel2(in, x, y);
int d = c > 100.f ? 0 : 0xffffffff;
int b = color & 0xff;
color >>= 8;
int g = color & 0xff;
color >>= 8;
int r = color & 0xff;
color >>= 8;
int a = color;
uint32_t color_out = 0;
//color_out |= (a * d);
//color_out <<= 8;
color_out |= (r * d);
color_out <<= 8;
color_out |= (g * d);
color_out <<= 8;
color_out |= (b * d);
out[y * 640 + x] = color_out;
out[y * 640 + x] = (uint8_t)d;
}
}
}

3
sobel.hpp Normal file
View File

@ -0,0 +1,3 @@
#pragma once
void convolve(float * in, uint32_t * out);

61
sobel_fipr.s Normal file
View File

@ -0,0 +1,61 @@
/* fv0 fv4 fv8 fv12 */
.global _sobel_y
_sobel_y:
__setup:
mova _const_100f,r0
mov r0,r3
mov r4,r0
mov r5,r7 /* r5 saved as r7 */
fldi1 fr8 /* 1.0 */
fldi1 fr9 /* 2.0 */
fldi1 fr10 /* 1.0 */
fldi0 fr11 /* 0.0 */
fadd fr9,fr9
fldi1 fr12
fmov fr9,fr13
fldi1 fr14
fldi0 fr15
fneg fr12
fneg fr13
fneg fr14
/* offsets */
mov #(1 * 4),r1
mov #(2 * 4),r2
mov.w _const_640,r4
mov.w _const_644,r5
mov.w _const_648,r6
bra _loop
nop
.align 4
_const_100f: .float 100
_const_640: .word (640 * 4)
_const_644: .word (641 * 4)
_const_648: .word (642 * 4)
_loop:
fmov.s @r0,fr0
fmov.s @(r0,r1),fr1
fmov.s @(r0,r2),fr2
fldi0 fr3
fipr fv8,fv0
fmov.s @(r0,r4),fr4
fmov.s @(r0,r5),fr5
fmov.s @(r0,r6),fr6
fldi0 fr7
fipr fv12,fv4
add #4,r0
fadd fr3,fr7
fmov.s r1,@-r7
rts
nop