sobel_fipr wip
This commit is contained in:
parent
bf2c9a3d8c
commit
d41b41409b
@ -756,7 +756,15 @@ example/holly_recv_dma.elf: $(START_OBJ) $(HOLLY_RECV_DMA_OBJ)
|
||||
FIPR_OBJ = \
|
||||
example/fipr.o \
|
||||
fipr.o \
|
||||
sobel_fipr.o \
|
||||
sh7091/serial.o
|
||||
|
||||
example/fipr.elf: LDSCRIPT = $(LIB)/main.lds
|
||||
example/fipr.elf: $(START_OBJ) $(FIPR_OBJ)
|
||||
|
||||
ORA_OBJ = \
|
||||
example/ora.o \
|
||||
sh7091/serial.o
|
||||
|
||||
example/ora.elf: LDSCRIPT = $(LIB)/main.lds
|
||||
example/ora.elf: $(START_OBJ) $(ORA_OBJ)
|
||||
|
@ -3,8 +3,9 @@
|
||||
#include "sh7091/serial.hpp"
|
||||
|
||||
extern "C" float fipr(float * a, float * b);
|
||||
extern "C" float sobel_y(float * a, uint32_t * i);
|
||||
|
||||
void main()
|
||||
void test1()
|
||||
{
|
||||
float a[] = {1, 2, 3, 4};
|
||||
float b[] = {5, 6, 7, 8};
|
||||
@ -18,7 +19,35 @@ void main()
|
||||
|
||||
v.f = fipr(a, b);
|
||||
serial::integer(v.i);
|
||||
serial::integer(v.i);
|
||||
serial::integer(v.i);
|
||||
serial::integer(v.i);
|
||||
}
|
||||
|
||||
void test2()
|
||||
{
|
||||
float a[640 * 2];
|
||||
a[0] = 11;
|
||||
a[1] = 12;
|
||||
a[2] = 13;
|
||||
a[0 + 640] = 1400;
|
||||
a[1 + 640] = 1500;
|
||||
a[2 + 640] = 1600;
|
||||
|
||||
union {
|
||||
float f;
|
||||
uint32_t i;
|
||||
} v;
|
||||
v.f = sobel_y(a);
|
||||
// 5952
|
||||
serial::integer<uint32_t>(v.i);
|
||||
}
|
||||
|
||||
void main()
|
||||
{
|
||||
serial::string("test1:\n");
|
||||
test1();
|
||||
serial::string("test2:\n");
|
||||
test2();
|
||||
|
||||
serial::string("return\n");
|
||||
serial::string("return\n");
|
||||
serial::string("return\n");
|
||||
}
|
||||
|
@ -3,7 +3,7 @@
|
||||
#include "sh7091/serial.hpp"
|
||||
#include "memorymap.hpp"
|
||||
|
||||
static void dma(uint32_t source, uint32_t destination, uint32_t length)
|
||||
static void dma(uint32_t source, uint32_t destination, uint32_t transfers)
|
||||
{
|
||||
using namespace dmac;
|
||||
|
||||
@ -11,22 +11,38 @@ static void dma(uint32_t source, uint32_t destination, uint32_t length)
|
||||
|
||||
sh7091.DMAC.SAR1 = source;
|
||||
sh7091.DMAC.DAR1 = destination;
|
||||
sh7091.DMAC.DMATCR1 = length & 0x00ff'ffff;
|
||||
sh7091.DMAC.DMATCR1 = transfers & 0x00ff'ffff;
|
||||
|
||||
sh7091.DMAC.CHCR1 = chcr::dm::destination_address_incremented
|
||||
| chcr::sm::source_address_incremented
|
||||
| chcr::rs::resource_select(0b0100) /* external address space → external address space */
|
||||
| chcr::tm::cycle_burst_mode /* transmit mode */
|
||||
//| chcr::tm::cycle_steal_mode /* transmit mode */
|
||||
| chcr::rs::resource_select(0b0100) /* auto request, external address space → external address space */
|
||||
| chcr::tm::cycle_burst_mode /* transmit mode */
|
||||
//| chcr::tm::cycle_steal_mode /* transmit mode */
|
||||
| chcr::ts::_32_byte /* transfer size */
|
||||
//| chcr::ie::interrupt_request_generated
|
||||
| chcr::de::channel_operation_enabled;
|
||||
}
|
||||
|
||||
static void dma_init()
|
||||
{
|
||||
using namespace dmac;
|
||||
|
||||
sh7091.DMAC.CHCR0 = 0;
|
||||
sh7091.DMAC.CHCR1 = 0;
|
||||
sh7091.DMAC.CHCR2 = 0;
|
||||
sh7091.DMAC.CHCR3 = 0;
|
||||
sh7091.DMAC.DMAOR = dmaor::ddt::on_demand_data_transfer_mode /* on-demand data transfer mode */
|
||||
| dmaor::pr::ch2_ch0_ch1_ch3 /* priority mode; CH2 > CH0 > CH1 > CH3 */
|
||||
| dmaor::dme::operation_enabled_on_all_channels; /* DMAC master enable */
|
||||
|
||||
}
|
||||
|
||||
static uint32_t buf[256] __attribute__((aligned(32)));
|
||||
|
||||
void main()
|
||||
{
|
||||
dma_init();
|
||||
|
||||
for (int i = 0; i < 256; i++) {
|
||||
buf[i] = 0;
|
||||
texture_memory32[i] = (1 << 31) | i;
|
||||
@ -46,7 +62,8 @@ void main()
|
||||
|
||||
serial::integer<uint32_t>((uint32_t)&buf[0]);
|
||||
|
||||
dma((uint32_t)&texture_memory32[0], (uint32_t)&buf[0], (sizeof (buf)));
|
||||
uint32_t transfers = 256 * 4 / 32;
|
||||
dma((uint32_t)&texture_memory32[0], (uint32_t)&buf[0], transfers);
|
||||
|
||||
uint32_t last_dar = sh7091.DMAC.DAR1;
|
||||
uint32_t count = 0;
|
||||
|
97
example/ora.cpp
Normal file
97
example/ora.cpp
Normal file
@ -0,0 +1,97 @@
|
||||
#include "sh7091/sh7091.hpp"
|
||||
#include "sh7091/sh7091_bits.hpp"
|
||||
#include "sh7091/serial.hpp"
|
||||
|
||||
#include "memorymap.hpp"
|
||||
|
||||
static void dma(uint32_t source, uint32_t destination, uint32_t transfers)
|
||||
{
|
||||
using namespace dmac;
|
||||
|
||||
sh7091.DMAC.CHCR1 = 0;
|
||||
|
||||
sh7091.DMAC.SAR1 = source;
|
||||
sh7091.DMAC.DAR1 = destination;
|
||||
sh7091.DMAC.DMATCR1 = transfers & 0x00ff'ffff;
|
||||
|
||||
sh7091.DMAC.CHCR1 = chcr::dm::destination_address_incremented
|
||||
| chcr::sm::source_address_incremented
|
||||
| chcr::rs::resource_select(0b0101) /* auto request, external address space → on-chip peripheral module */
|
||||
| chcr::tm::cycle_burst_mode /* transmit mode */
|
||||
//| chcr::tm::cycle_steal_mode /* transmit mode */
|
||||
| chcr::ts::_32_bit /* transfer size */
|
||||
//| chcr::ie::interrupt_request_generated
|
||||
| chcr::de::channel_operation_enabled;
|
||||
}
|
||||
|
||||
static void dma_init()
|
||||
{
|
||||
using namespace dmac;
|
||||
|
||||
sh7091.DMAC.CHCR0 = 0;
|
||||
sh7091.DMAC.CHCR1 = 0;
|
||||
sh7091.DMAC.CHCR2 = 0;
|
||||
sh7091.DMAC.CHCR3 = 0;
|
||||
sh7091.DMAC.DMAOR = dmaor::ddt::normal_dma_mode /* on-demand data transfer mode */
|
||||
| dmaor::pr::ch2_ch0_ch1_ch3 /* priority mode; CH2 > CH0 > CH1 > CH3 */
|
||||
| dmaor::dme::operation_enabled_on_all_channels; /* DMAC master enable */
|
||||
|
||||
}
|
||||
|
||||
void main()
|
||||
{
|
||||
sh7091.CCN.CCR |= ccn::ccr::ora::_8_kbytes_used_as_cache_8_kbytes_used_as_ram;
|
||||
|
||||
dma_init();
|
||||
|
||||
// from entry 128 to entry 255 and from entry 384 to entry 511 of the OC are to be used as RAM
|
||||
uint32_t * oc_a = &sh7091_oc_d[128 * 32 / 4]; // 1024 words
|
||||
uint32_t * oc_b = &sh7091_oc_d[384 * 32 / 4]; // 1024 words
|
||||
|
||||
for (int i = 0; i < 256; i++) {
|
||||
oc_a[i] = 0;
|
||||
texture_memory32[i] = (1 << 31) | i;
|
||||
}
|
||||
|
||||
serial::string("tm: ");
|
||||
serial::integer<uint32_t>((uint32_t)&texture_memory32[0]);
|
||||
serial::string("oc_a: ");
|
||||
serial::integer<uint32_t>((uint32_t)&oc_a[0]);
|
||||
|
||||
serial::string("dmaor: ");
|
||||
serial::integer<uint32_t>(sh7091.DMAC.DMAOR);
|
||||
|
||||
uint32_t transfers = 64 / 4;
|
||||
dma((uint32_t)&texture_memory32[0], (uint32_t)&oc_a[0], transfers);
|
||||
|
||||
serial::string("sar: ");
|
||||
serial::integer<uint32_t>(sh7091.DMAC.SAR1);
|
||||
serial::string("dar: ");
|
||||
serial::integer<uint32_t>(sh7091.DMAC.DAR1);
|
||||
|
||||
uint32_t last_dar = sh7091.DMAC.DAR1;
|
||||
uint32_t count = 0;
|
||||
while ((sh7091.DMAC.CHCR1 & dmac::chcr::te::transfers_completed) == 0) {
|
||||
uint32_t dar = sh7091.DMAC.DAR1;
|
||||
if (dar == last_dar)
|
||||
count += 1;
|
||||
else
|
||||
count = 0;
|
||||
if (count > 10)
|
||||
goto return_main;
|
||||
serial::integer<uint32_t>(sh7091.DMAC.DMAOR);
|
||||
}
|
||||
|
||||
serial::string("dmaor: ");
|
||||
serial::integer<uint32_t>(sh7091.DMAC.DMAOR);
|
||||
serial::string("buf:\n");
|
||||
for (int i = 0; i < 64; i++) {
|
||||
serial::integer<uint32_t>(oc_a[i]);
|
||||
}
|
||||
|
||||
return_main:
|
||||
serial::string("return\n");
|
||||
serial::string("return\n");
|
||||
serial::string("return\n");
|
||||
serial::string("return\n");
|
||||
}
|
@ -25,7 +25,7 @@
|
||||
|
||||
#include "geometry/wiffle.hpp"
|
||||
|
||||
void convolve(uint32_t * in, uint32_t * out);
|
||||
#include "sobel.hpp"
|
||||
|
||||
constexpr float half_degree = 0.01745329f / 2;
|
||||
|
||||
@ -250,8 +250,31 @@ void dma_init()
|
||||
}
|
||||
|
||||
static uint32_t inbuf[640 * 480] __attribute__((aligned(32)));
|
||||
static float temp[640 * 480] __attribute__((aligned(32)));
|
||||
static uint32_t outbuf[640 * 480] __attribute__((aligned(32)));
|
||||
|
||||
void make_temp()
|
||||
{
|
||||
for (int i = 0; i < 640 * 480; i++) {
|
||||
if ((i & 31) == 0) {
|
||||
asm volatile ("pref @%0"
|
||||
: // output
|
||||
: "r" ((uint32_t)&inbuf[i]) // input
|
||||
);
|
||||
}
|
||||
uint32_t n = inbuf[i];
|
||||
uint32_t sum;
|
||||
sum = n & 0xff;
|
||||
n >>= 8;
|
||||
sum += n & 0xff;
|
||||
n >>= 8;
|
||||
sum += n & 0xff;
|
||||
n >>= 8;
|
||||
sum += n & 0xff;
|
||||
temp[i] = (float)(sum * 0.25);
|
||||
}
|
||||
}
|
||||
|
||||
void main()
|
||||
{
|
||||
dma_init();
|
||||
@ -356,8 +379,6 @@ void main()
|
||||
|
||||
serial::string("ch1 dma start\n");
|
||||
dma_transfer((uint32_t)in, (uint32_t)inbuf, 640 * 480 * 4 / 32);
|
||||
while ((sh7091.DMAC.CHCR1 & dmac::chcr::te::transfers_completed) == 0);
|
||||
serial::string("ch1 dma end\n");
|
||||
|
||||
for (uint32_t i = 0; i < (sizeof (640 * 480 * 4)) / 32; i++) {
|
||||
uint32_t address = (uint32_t)&inbuf[0];
|
||||
@ -367,8 +388,15 @@ void main()
|
||||
);
|
||||
}
|
||||
|
||||
while ((sh7091.DMAC.CHCR1 & dmac::chcr::te::transfers_completed) == 0);
|
||||
serial::string("ch1 dma end\n");
|
||||
|
||||
serial::string("temp start\n");
|
||||
make_temp();
|
||||
serial::string("temp end\n");
|
||||
|
||||
serial::string("convolve start\n");
|
||||
convolve(inbuf, outbuf);
|
||||
convolve(temp, outbuf);
|
||||
serial::string("convolve end\n");
|
||||
|
||||
uint32_t framebuffer = 0x11000000 + texture_memory_alloc.framebuffer[0].start; // TA FIFO - Direct Texture Path
|
||||
|
@ -12,3 +12,4 @@ extern volatile uint32_t ta_fifo_polygon_converter_mirror[0x800000] __asm("ta_fi
|
||||
extern volatile uint32_t ta_fifo_yuv_converter_mirror[0x800000] __asm("ta_fifo_yuv_converter_mirror");
|
||||
extern volatile uint32_t ta_fifo_texture_memory_mirror[0x800000] __asm("ta_fifo_texture_memory_mirror");
|
||||
extern uint32_t store_queue[0x4000000] __asm("store_queue");
|
||||
extern uint32_t sh7091_oc_d[0x1000] __asm("sh7091_oc_d");
|
||||
|
136
sobel.cpp
136
sobel.cpp
@ -1,102 +1,76 @@
|
||||
#include <stdint.h>
|
||||
|
||||
int clamp255(float v)
|
||||
{
|
||||
int n = (int)v;
|
||||
if (n < 0)
|
||||
return 0;
|
||||
if (n > 255)
|
||||
return 255;
|
||||
return n;
|
||||
}
|
||||
#include "sobel.hpp"
|
||||
|
||||
uint32_t getpx(uint32_t * buf, int x, int y)
|
||||
static inline float getpx(float * buf, int x, int y)
|
||||
{
|
||||
if (x < 0)
|
||||
x = 0;
|
||||
if (y < 0)
|
||||
y = 0;
|
||||
if (x >= 640)
|
||||
x = 640 - 1;
|
||||
if (y >= 480)
|
||||
y = 480 - 1;
|
||||
return buf[y * 640 + x];
|
||||
}
|
||||
|
||||
float multiply(uint32_t * buf, int x, int y, float weight)
|
||||
static inline float kernel2(float * buf, int x, int y)
|
||||
{
|
||||
uint32_t color = getpx(buf, x, y);
|
||||
int b = color & 0xff;
|
||||
color >>= 8;
|
||||
int g = color & 0xff;
|
||||
color >>= 8;
|
||||
int r = color & 0xff;
|
||||
color >>= 8;
|
||||
int a = color;
|
||||
|
||||
float luminance = (float)(r + g + b + a) * 0.25;
|
||||
return luminance * (float)weight;
|
||||
}
|
||||
|
||||
float kernel(uint32_t * buf, const float * weights, int x, int y)
|
||||
{
|
||||
float c = 0;
|
||||
c += multiply(buf, x - 1, y - 1, weights[0]);
|
||||
c += multiply(buf, x , y - 1, weights[1]);
|
||||
c += multiply(buf, x + 1, y - 1, weights[2]);
|
||||
|
||||
c += multiply(buf, x - 1, y , weights[3]);
|
||||
c += multiply(buf, x , y , weights[4]);
|
||||
c += multiply(buf, x + 1, y , weights[5]);
|
||||
|
||||
c += multiply(buf, x - 1, y + 1, weights[6]);
|
||||
c += multiply(buf, x , y + 1, weights[7]);
|
||||
c += multiply(buf, x + 1, y + 1, weights[8]);
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
const float gx[] = {
|
||||
constexpr float gx[] = {
|
||||
1, 0, -1,
|
||||
2, 0, -2,
|
||||
1, 0, -1,
|
||||
};
|
||||
};
|
||||
|
||||
const float gy[] = {
|
||||
constexpr float gy[] = {
|
||||
1, 2, 1,
|
||||
0, 0, 0,
|
||||
-1, -2, -1,
|
||||
};
|
||||
};
|
||||
|
||||
void convolve(uint32_t * in, uint32_t * out)
|
||||
float a = getpx(buf, x - 1, y - 1);
|
||||
float b = getpx(buf, x , y - 1);
|
||||
float c = getpx(buf, x + 1, y - 1);
|
||||
|
||||
float d = getpx(buf, x - 1, y );
|
||||
float e = getpx(buf, x , y );
|
||||
float f = getpx(buf, x + 1, y );
|
||||
|
||||
float g = getpx(buf, x - 1, y + 1);
|
||||
float h = getpx(buf, x , y + 1);
|
||||
float i = getpx(buf, x + 1, y + 1);
|
||||
|
||||
float sx = 0;
|
||||
float sy = 0;
|
||||
|
||||
sx += a * gx[0];
|
||||
//sx += b * gx[1];
|
||||
sx += c * gx[2];
|
||||
|
||||
sx += d * gx[3];
|
||||
//sx += e * gx[4];
|
||||
sx += f * gx[5];
|
||||
|
||||
sx += g * gx[6];
|
||||
//sx += h * gx[7];
|
||||
sx += i * gx[8];
|
||||
|
||||
sy += a * gy[0];
|
||||
sy += b * gy[1];
|
||||
sy += c * gy[2];
|
||||
|
||||
//sy += d * gy[3];
|
||||
//sy += e * gy[4];
|
||||
//sy += f * gy[5];
|
||||
|
||||
sy += g * gy[6];
|
||||
sy += h * gy[7];
|
||||
sy += i * gy[8];
|
||||
|
||||
return sx * sx + sy * sy;
|
||||
}
|
||||
|
||||
void convolve(float * in, uint32_t * out)
|
||||
{
|
||||
for (int y = 0; y < 480; y++) {
|
||||
for (int x = 0; x < 640; x++) {
|
||||
float vx = kernel(in, gx, x, y);
|
||||
float vy = kernel(in, gy, x, y);
|
||||
float c = vx * vx + vy * vy;
|
||||
int d = c > 100.f ? 0 : 1;
|
||||
uint32_t color = in[y * 640 + x];
|
||||
for (int y = 1; y < 480 - 1; y++) {
|
||||
for (int x = 1; x < 640 - 1; x++) {
|
||||
float c = kernel2(in, x, y);
|
||||
int d = c > 100.f ? 0 : 0xffffffff;
|
||||
|
||||
int b = color & 0xff;
|
||||
color >>= 8;
|
||||
int g = color & 0xff;
|
||||
color >>= 8;
|
||||
int r = color & 0xff;
|
||||
color >>= 8;
|
||||
int a = color;
|
||||
|
||||
uint32_t color_out = 0;
|
||||
|
||||
//color_out |= (a * d);
|
||||
//color_out <<= 8;
|
||||
color_out |= (r * d);
|
||||
color_out <<= 8;
|
||||
color_out |= (g * d);
|
||||
color_out <<= 8;
|
||||
color_out |= (b * d);
|
||||
|
||||
out[y * 640 + x] = color_out;
|
||||
out[y * 640 + x] = (uint8_t)d;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
3
sobel.hpp
Normal file
3
sobel.hpp
Normal file
@ -0,0 +1,3 @@
|
||||
#pragma once
|
||||
|
||||
void convolve(float * in, uint32_t * out);
|
61
sobel_fipr.s
Normal file
61
sobel_fipr.s
Normal file
@ -0,0 +1,61 @@
|
||||
/* fv0 fv4 fv8 fv12 */
|
||||
.global _sobel_y
|
||||
_sobel_y:
|
||||
__setup:
|
||||
mova _const_100f,r0
|
||||
mov r0,r3
|
||||
|
||||
mov r4,r0
|
||||
mov r5,r7 /* r5 saved as r7 */
|
||||
|
||||
fldi1 fr8 /* 1.0 */
|
||||
fldi1 fr9 /* 2.0 */
|
||||
fldi1 fr10 /* 1.0 */
|
||||
fldi0 fr11 /* 0.0 */
|
||||
fadd fr9,fr9
|
||||
|
||||
fldi1 fr12
|
||||
fmov fr9,fr13
|
||||
fldi1 fr14
|
||||
fldi0 fr15
|
||||
fneg fr12
|
||||
fneg fr13
|
||||
fneg fr14
|
||||
|
||||
/* offsets */
|
||||
mov #(1 * 4),r1
|
||||
mov #(2 * 4),r2
|
||||
mov.w _const_640,r4
|
||||
mov.w _const_644,r5
|
||||
mov.w _const_648,r6
|
||||
|
||||
bra _loop
|
||||
nop
|
||||
|
||||
.align 4
|
||||
_const_100f: .float 100
|
||||
_const_640: .word (640 * 4)
|
||||
_const_644: .word (641 * 4)
|
||||
_const_648: .word (642 * 4)
|
||||
|
||||
_loop:
|
||||
fmov.s @r0,fr0
|
||||
fmov.s @(r0,r1),fr1
|
||||
fmov.s @(r0,r2),fr2
|
||||
fldi0 fr3
|
||||
fipr fv8,fv0
|
||||
|
||||
fmov.s @(r0,r4),fr4
|
||||
fmov.s @(r0,r5),fr5
|
||||
fmov.s @(r0,r6),fr6
|
||||
fldi0 fr7
|
||||
fipr fv12,fv4
|
||||
|
||||
add #4,r0
|
||||
|
||||
fadd fr3,fr7
|
||||
|
||||
fmov.s r1,@-r7
|
||||
|
||||
rts
|
||||
nop
|
Loading…
x
Reference in New Issue
Block a user