sobel_fipr

This commit is contained in:
Zack Buhman 2025-01-26 03:04:42 -06:00
parent bf2c9a3d8c
commit 0265a1c4ed
9 changed files with 563 additions and 100 deletions

View File

@ -756,7 +756,15 @@ example/holly_recv_dma.elf: $(START_OBJ) $(HOLLY_RECV_DMA_OBJ)
FIPR_OBJ = \ FIPR_OBJ = \
example/fipr.o \ example/fipr.o \
fipr.o \ fipr.o \
sobel_fipr.o \
sh7091/serial.o sh7091/serial.o
example/fipr.elf: LDSCRIPT = $(LIB)/main.lds example/fipr.elf: LDSCRIPT = $(LIB)/main.lds
example/fipr.elf: $(START_OBJ) $(FIPR_OBJ) example/fipr.elf: $(START_OBJ) $(FIPR_OBJ)
ORA_OBJ = \
example/ora.o \
sh7091/serial.o
example/ora.elf: LDSCRIPT = $(LIB)/main.lds
example/ora.elf: $(START_OBJ) $(ORA_OBJ)

View File

@ -1,10 +1,90 @@
#include "stdint.h" #include "stdint.h"
#include "sh7091/sh7091.hpp"
#include "sh7091/sh7091_bits.hpp"
#include "sh7091/vbr.hpp"
#include "sh7091/serial.hpp" #include "sh7091/serial.hpp"
#include "systembus.hpp"
extern "C" float fipr(float * a, float * b); extern "C" float fipr(float * a, float * b);
extern "C" void sobel_fipr(float * a, int * i);
void main() void vbr100()
{
serial::string("vbr100\n");
serial::string("expevt ");
serial::integer<uint16_t>(sh7091.CCN.EXPEVT);
serial::string("intevt ");
serial::integer<uint16_t>(sh7091.CCN.INTEVT);
serial::string("tra ");
serial::integer<uint16_t>(sh7091.CCN.TRA);
uint32_t spc;
uint32_t ssr;
asm volatile ("stc spc,%0"
: "=r" (spc)
);
asm volatile ("stc ssr,%0"
: "=r" (ssr)
);
serial::string("spc ");
serial::integer(spc);
serial::string("ssr ");
serial::integer(ssr);
while (1);
}
void vbr400()
{
serial::string("vbr400\n");
serial::string("expevt ");
serial::integer<uint16_t>(sh7091.CCN.EXPEVT);
serial::string("intevt ");
serial::integer<uint16_t>(sh7091.CCN.INTEVT);
serial::string("tra ");
serial::integer<uint16_t>(sh7091.CCN.TRA);
uint32_t spc;
uint32_t ssr;
asm volatile ("stc spc,%0"
: "=r" (spc)
);
asm volatile ("stc ssr,%0"
: "=r" (ssr)
);
serial::string("spc ");
serial::integer(spc);
serial::string("ssr ");
serial::integer(ssr);
while (1);
}
void vbr600()
{
serial::string("vbr600\n");
serial::string("expevt ");
serial::integer<uint16_t>(sh7091.CCN.EXPEVT);
serial::string("intevt ");
serial::integer<uint16_t>(sh7091.CCN.INTEVT);
serial::string("tra ");
serial::integer<uint16_t>(sh7091.CCN.TRA);
uint32_t spc;
uint32_t ssr;
asm volatile ("stc spc,%0"
: "=r" (spc)
);
asm volatile ("stc ssr,%0"
: "=r" (ssr)
);
serial::string("spc ");
serial::integer(spc);
serial::string("ssr ");
serial::integer(ssr);
while (1);
}
void test1()
{ {
float a[] = {1, 2, 3, 4}; float a[] = {1, 2, 3, 4};
float b[] = {5, 6, 7, 8}; float b[] = {5, 6, 7, 8};
@ -18,7 +98,110 @@ void main()
v.f = fipr(a, b); v.f = fipr(a, b);
serial::integer(v.i); serial::integer(v.i);
serial::integer(v.i); }
serial::integer(v.i);
serial::integer(v.i); void test2()
{
float a[640 * 480];
a[0] = 11;
a[1] = 12;
a[2] = 13;
a[0 + 640] = 1400;
a[1 + 640] = 1500;
a[2 + 640] = 1600;
a[0 + 1280] = 170000;
a[1 + 1280] = 180000;
a[2 + 1280] = 190000;
// -719952
// -20402
// 518747123908
int i[640 * 480];
// expected value:
for (int j = 0; j < 640 * 480; j++) {
i[j] = 0xeeeeeeee;
}
sobel_fipr(a, i);
// -5952
int v;
v = i[640 + 1];
serial::integer<uint32_t>(v);
v = i[640 + 2];
serial::integer<uint32_t>(v);
v = i[640 + 3];
v = i[640 * 479 - 1];
serial::integer<uint32_t>(v);
v = i[640 * 479 - 2];
serial::integer<uint32_t>(v);
v = i[640 * 479 - 3];
serial::integer<uint32_t>(v);
v = i[640 * 479 - 4];
serial::integer<uint32_t>(v);
v = i[640 * 479 - 5];
serial::integer<uint32_t>(v);
v = i[640 * 479 - 6];
serial::integer<uint32_t>(v);
}
void init_interrupt()
{
system.IML2NRM = 0;
system.IML2ERR = 0;
system.IML2EXT = 0;
system.IML4NRM = 0;
system.IML4ERR = 0;
system.IML4EXT = 0;
system.IML6NRM = 0;
system.IML6ERR = 0;
system.IML6EXT = 0;
sh7091.CCN.INTEVT = 0;
sh7091.CCN.EXPEVT = 0;
uint32_t vbr = reinterpret_cast<uint32_t>(&__vbr_link_start) - 0x100;
asm volatile ("ldc %0,vbr"
:
: "r" (vbr));
uint32_t sr;
asm volatile ("stc sr,%0"
: "=r" (sr));
serial::string("sr ");
serial::integer<uint32_t>(sr);
sr &= ~sh::sr::bl; // BL
sr |= sh::sr::imask(15); // imask
serial::string("sr ");
serial::integer<uint32_t>(sr);
asm volatile ("ldc %0,sr"
:
: "r" (sr));
}
void main()
{
init_interrupt();
serial::string("test1:\n");
test1();
serial::string("test2:\n");
serial::string("test2:\n");
serial::string("test2:\n");
test2();
serial::string("return\n");
serial::string("return\n");
serial::string("return\n");
} }

View File

@ -3,7 +3,7 @@
#include "sh7091/serial.hpp" #include "sh7091/serial.hpp"
#include "memorymap.hpp" #include "memorymap.hpp"
static void dma(uint32_t source, uint32_t destination, uint32_t length) static void dma(uint32_t source, uint32_t destination, uint32_t transfers)
{ {
using namespace dmac; using namespace dmac;
@ -11,22 +11,38 @@ static void dma(uint32_t source, uint32_t destination, uint32_t length)
sh7091.DMAC.SAR1 = source; sh7091.DMAC.SAR1 = source;
sh7091.DMAC.DAR1 = destination; sh7091.DMAC.DAR1 = destination;
sh7091.DMAC.DMATCR1 = length & 0x00ff'ffff; sh7091.DMAC.DMATCR1 = transfers & 0x00ff'ffff;
sh7091.DMAC.CHCR1 = chcr::dm::destination_address_incremented sh7091.DMAC.CHCR1 = chcr::dm::destination_address_incremented
| chcr::sm::source_address_incremented | chcr::sm::source_address_incremented
| chcr::rs::resource_select(0b0100) /* external address space → external address space */ | chcr::rs::resource_select(0b0100) /* auto request, external address space → external address space */
| chcr::tm::cycle_burst_mode /* transmit mode */ | chcr::tm::cycle_burst_mode /* transmit mode */
//| chcr::tm::cycle_steal_mode /* transmit mode */ //| chcr::tm::cycle_steal_mode /* transmit mode */
| chcr::ts::_32_byte /* transfer size */ | chcr::ts::_32_byte /* transfer size */
//| chcr::ie::interrupt_request_generated //| chcr::ie::interrupt_request_generated
| chcr::de::channel_operation_enabled; | chcr::de::channel_operation_enabled;
} }
static void dma_init()
{
using namespace dmac;
sh7091.DMAC.CHCR0 = 0;
sh7091.DMAC.CHCR1 = 0;
sh7091.DMAC.CHCR2 = 0;
sh7091.DMAC.CHCR3 = 0;
sh7091.DMAC.DMAOR = dmaor::ddt::on_demand_data_transfer_mode /* on-demand data transfer mode */
| dmaor::pr::ch2_ch0_ch1_ch3 /* priority mode; CH2 > CH0 > CH1 > CH3 */
| dmaor::dme::operation_enabled_on_all_channels; /* DMAC master enable */
}
static uint32_t buf[256] __attribute__((aligned(32))); static uint32_t buf[256] __attribute__((aligned(32)));
void main() void main()
{ {
dma_init();
for (int i = 0; i < 256; i++) { for (int i = 0; i < 256; i++) {
buf[i] = 0; buf[i] = 0;
texture_memory32[i] = (1 << 31) | i; texture_memory32[i] = (1 << 31) | i;
@ -46,7 +62,8 @@ void main()
serial::integer<uint32_t>((uint32_t)&buf[0]); serial::integer<uint32_t>((uint32_t)&buf[0]);
dma((uint32_t)&texture_memory32[0], (uint32_t)&buf[0], (sizeof (buf))); uint32_t transfers = 256 * 4 / 32;
dma((uint32_t)&texture_memory32[0], (uint32_t)&buf[0], transfers);
uint32_t last_dar = sh7091.DMAC.DAR1; uint32_t last_dar = sh7091.DMAC.DAR1;
uint32_t count = 0; uint32_t count = 0;

97
example/ora.cpp Normal file
View File

@ -0,0 +1,97 @@
#include "sh7091/sh7091.hpp"
#include "sh7091/sh7091_bits.hpp"
#include "sh7091/serial.hpp"
#include "memorymap.hpp"
static void dma(uint32_t source, uint32_t destination, uint32_t transfers)
{
using namespace dmac;
sh7091.DMAC.CHCR1 = 0;
sh7091.DMAC.SAR1 = source;
sh7091.DMAC.DAR1 = destination;
sh7091.DMAC.DMATCR1 = transfers & 0x00ff'ffff;
sh7091.DMAC.CHCR1 = chcr::dm::destination_address_incremented
| chcr::sm::source_address_incremented
| chcr::rs::resource_select(0b0101) /* auto request, external address space → on-chip peripheral module */
| chcr::tm::cycle_burst_mode /* transmit mode */
//| chcr::tm::cycle_steal_mode /* transmit mode */
| chcr::ts::_32_bit /* transfer size */
//| chcr::ie::interrupt_request_generated
| chcr::de::channel_operation_enabled;
}
static void dma_init()
{
using namespace dmac;
sh7091.DMAC.CHCR0 = 0;
sh7091.DMAC.CHCR1 = 0;
sh7091.DMAC.CHCR2 = 0;
sh7091.DMAC.CHCR3 = 0;
sh7091.DMAC.DMAOR = dmaor::ddt::normal_dma_mode /* on-demand data transfer mode */
| dmaor::pr::ch2_ch0_ch1_ch3 /* priority mode; CH2 > CH0 > CH1 > CH3 */
| dmaor::dme::operation_enabled_on_all_channels; /* DMAC master enable */
}
void main()
{
sh7091.CCN.CCR |= ccn::ccr::ora::_8_kbytes_used_as_cache_8_kbytes_used_as_ram;
dma_init();
// from entry 128 to entry 255 and from entry 384 to entry 511 of the OC are to be used as RAM
uint32_t * oc_a = &sh7091_oc_d[128 * 32 / 4]; // 1024 words
uint32_t * oc_b = &sh7091_oc_d[384 * 32 / 4]; // 1024 words
for (int i = 0; i < 256; i++) {
oc_a[i] = 0;
texture_memory32[i] = (1 << 31) | i;
}
serial::string("tm: ");
serial::integer<uint32_t>((uint32_t)&texture_memory32[0]);
serial::string("oc_a: ");
serial::integer<uint32_t>((uint32_t)&oc_a[0]);
serial::string("dmaor: ");
serial::integer<uint32_t>(sh7091.DMAC.DMAOR);
uint32_t transfers = 64 / 4;
dma((uint32_t)&texture_memory32[0], (uint32_t)&oc_a[0], transfers);
serial::string("sar: ");
serial::integer<uint32_t>(sh7091.DMAC.SAR1);
serial::string("dar: ");
serial::integer<uint32_t>(sh7091.DMAC.DAR1);
uint32_t last_dar = sh7091.DMAC.DAR1;
uint32_t count = 0;
while ((sh7091.DMAC.CHCR1 & dmac::chcr::te::transfers_completed) == 0) {
uint32_t dar = sh7091.DMAC.DAR1;
if (dar == last_dar)
count += 1;
else
count = 0;
if (count > 10)
goto return_main;
serial::integer<uint32_t>(sh7091.DMAC.DMAOR);
}
serial::string("dmaor: ");
serial::integer<uint32_t>(sh7091.DMAC.DMAOR);
serial::string("buf:\n");
for (int i = 0; i < 64; i++) {
serial::integer<uint32_t>(oc_a[i]);
}
return_main:
serial::string("return\n");
serial::string("return\n");
serial::string("return\n");
serial::string("return\n");
}

View File

@ -25,7 +25,7 @@
#include "geometry/wiffle.hpp" #include "geometry/wiffle.hpp"
void convolve(uint32_t * in, uint32_t * out); #include "sobel.hpp"
constexpr float half_degree = 0.01745329f / 2; constexpr float half_degree = 0.01745329f / 2;
@ -250,8 +250,31 @@ void dma_init()
} }
static uint32_t inbuf[640 * 480] __attribute__((aligned(32))); static uint32_t inbuf[640 * 480] __attribute__((aligned(32)));
static float temp[640 * 480] __attribute__((aligned(32)));
static uint32_t outbuf[640 * 480] __attribute__((aligned(32))); static uint32_t outbuf[640 * 480] __attribute__((aligned(32)));
void make_temp()
{
for (int i = 0; i < 640 * 480; i++) {
if ((i & 31) == 0) {
asm volatile ("pref @%0"
: // output
: "r" ((uint32_t)&inbuf[i]) // input
);
}
uint32_t n = inbuf[i];
uint32_t sum;
sum = n & 0xff;
n >>= 8;
sum += n & 0xff;
n >>= 8;
sum += n & 0xff;
n >>= 8;
sum += n & 0xff;
temp[i] = (float)(sum * 0.25);
}
}
void main() void main()
{ {
dma_init(); dma_init();
@ -356,8 +379,6 @@ void main()
serial::string("ch1 dma start\n"); serial::string("ch1 dma start\n");
dma_transfer((uint32_t)in, (uint32_t)inbuf, 640 * 480 * 4 / 32); dma_transfer((uint32_t)in, (uint32_t)inbuf, 640 * 480 * 4 / 32);
while ((sh7091.DMAC.CHCR1 & dmac::chcr::te::transfers_completed) == 0);
serial::string("ch1 dma end\n");
for (uint32_t i = 0; i < (sizeof (640 * 480 * 4)) / 32; i++) { for (uint32_t i = 0; i < (sizeof (640 * 480 * 4)) / 32; i++) {
uint32_t address = (uint32_t)&inbuf[0]; uint32_t address = (uint32_t)&inbuf[0];
@ -367,8 +388,15 @@ void main()
); );
} }
while ((sh7091.DMAC.CHCR1 & dmac::chcr::te::transfers_completed) == 0);
serial::string("ch1 dma end\n");
serial::string("temp start\n");
make_temp();
serial::string("temp end\n");
serial::string("convolve start\n"); serial::string("convolve start\n");
convolve(inbuf, outbuf); convolve(temp, outbuf);
serial::string("convolve end\n"); serial::string("convolve end\n");
uint32_t framebuffer = 0x11000000 + texture_memory_alloc.framebuffer[0].start; // TA FIFO - Direct Texture Path uint32_t framebuffer = 0x11000000 + texture_memory_alloc.framebuffer[0].start; // TA FIFO - Direct Texture Path

View File

@ -12,3 +12,4 @@ extern volatile uint32_t ta_fifo_polygon_converter_mirror[0x800000] __asm("ta_fi
extern volatile uint32_t ta_fifo_yuv_converter_mirror[0x800000] __asm("ta_fifo_yuv_converter_mirror"); extern volatile uint32_t ta_fifo_yuv_converter_mirror[0x800000] __asm("ta_fifo_yuv_converter_mirror");
extern volatile uint32_t ta_fifo_texture_memory_mirror[0x800000] __asm("ta_fifo_texture_memory_mirror"); extern volatile uint32_t ta_fifo_texture_memory_mirror[0x800000] __asm("ta_fifo_texture_memory_mirror");
extern uint32_t store_queue[0x4000000] __asm("store_queue"); extern uint32_t store_queue[0x4000000] __asm("store_queue");
extern uint32_t sh7091_oc_d[0x1000] __asm("sh7091_oc_d");

146
sobel.cpp
View File

@ -1,102 +1,76 @@
#include <stdint.h> #include <stdint.h>
int clamp255(float v) #include "sobel.hpp"
{
int n = (int)v;
if (n < 0)
return 0;
if (n > 255)
return 255;
return n;
}
uint32_t getpx(uint32_t * buf, int x, int y) static inline float getpx(float * buf, int x, int y)
{ {
if (x < 0)
x = 0;
if (y < 0)
y = 0;
if (x >= 640)
x = 640 - 1;
if (y >= 480)
y = 480 - 1;
return buf[y * 640 + x]; return buf[y * 640 + x];
} }
float multiply(uint32_t * buf, int x, int y, float weight) static inline float kernel2(float * buf, int x, int y)
{ {
uint32_t color = getpx(buf, x, y); constexpr float gx[] = {
int b = color & 0xff; 1, 0, -1, /* fr0 , _ , xf12 */
color >>= 8; 2, 0, -2, /* fr1 , _ , xf13 */
int g = color & 0xff; 1, 0, -1, /* fr2, _ , xf14 */
color >>= 8; };
int r = color & 0xff;
color >>= 8;
int a = color;
float luminance = (float)(r + g + b + a) * 0.25; constexpr float gy[] = {
return luminance * (float)weight; 1, 2, 1, /* fr0, fr1, fr2 */
}
float kernel(uint32_t * buf, const float * weights, int x, int y)
{
float c = 0;
c += multiply(buf, x - 1, y - 1, weights[0]);
c += multiply(buf, x , y - 1, weights[1]);
c += multiply(buf, x + 1, y - 1, weights[2]);
c += multiply(buf, x - 1, y , weights[3]);
c += multiply(buf, x , y , weights[4]);
c += multiply(buf, x + 1, y , weights[5]);
c += multiply(buf, x - 1, y + 1, weights[6]);
c += multiply(buf, x , y + 1, weights[7]);
c += multiply(buf, x + 1, y + 1, weights[8]);
return c;
}
const float gx[] = {
1, 0, -1,
2, 0, -2,
1, 0, -1,
};
const float gy[] = {
1, 2, 1,
0, 0, 0, 0, 0, 0,
-1, -2, -1, -1, -2, -1, /* fr4, fr5, fr6 */
}; };
void convolve(uint32_t * in, uint32_t * out) float a = getpx(buf, x - 1, y - 1);
float b = getpx(buf, x , y - 1);
float c = getpx(buf, x + 1, y - 1);
float d = getpx(buf, x - 1, y );
float e = getpx(buf, x , y );
float f = getpx(buf, x + 1, y );
float g = getpx(buf, x - 1, y + 1);
float h = getpx(buf, x , y + 1);
float i = getpx(buf, x + 1, y + 1);
float sx = 0;
float sy = 0;
sx += a * gx[0];
//sx += b * gx[1];
sx += c * gx[2];
sx += d * gx[3];
//sx += e * gx[4];
sx += f * gx[5];
sx += g * gx[6];
//sx += h * gx[7];
sx += i * gx[8];
sy += a * gy[0];
sy += b * gy[1];
sy += c * gy[2];
//sy += d * gy[3];
//sy += e * gy[4];
//sy += f * gy[5];
sy += g * gy[6];
sy += h * gy[7];
sy += i * gy[8];
return sx * sx + sy * sy;
}
void convolve(float * in, uint32_t * out)
{ {
for (int y = 0; y < 480; y++) { for (int y = 1; y < 480 - 1; y++) {
for (int x = 0; x < 640; x++) { for (int x = 1; x < 640 - 1; x++) {
float vx = kernel(in, gx, x, y); float c = kernel2(in, x, y);
float vy = kernel(in, gy, x, y); int d = c > 100.f ? 0 : 0xffffffff;
float c = vx * vx + vy * vy;
int d = c > 100.f ? 0 : 1;
uint32_t color = in[y * 640 + x];
int b = color & 0xff; out[y * 640 + x] = (uint8_t)d;
color >>= 8;
int g = color & 0xff;
color >>= 8;
int r = color & 0xff;
color >>= 8;
int a = color;
uint32_t color_out = 0;
//color_out |= (a * d);
//color_out <<= 8;
color_out |= (r * d);
color_out <<= 8;
color_out |= (g * d);
color_out <<= 8;
color_out |= (b * d);
out[y * 640 + x] = color_out;
} }
} }
} }

3
sobel.hpp Normal file
View File

@ -0,0 +1,3 @@
#pragma once
void convolve(float * in, uint32_t * out);

152
sobel_fipr.s Normal file
View File

@ -0,0 +1,152 @@
/* fv0 fv4 fv8 fv12 */
.global _sobel_fipr
_sobel_fipr:
__setup:
mov.l r8,@-r15
mov.l r9,@-r15
mov.l r10,@-r15
mov.l r11,@-r15
fldi1 fr8 /* 1.0 */
fldi1 fr9 /* 2.0 */
fldi1 fr10 /* 1.0 */
fldi0 fr11 /* 0.0 */
fadd fr9,fr9
fldi1 fr12
fmov fr9,fr13
fldi1 fr14
fldi0 fr15
fneg fr12
fneg fr13
fneg fr14
/* constants */
mova _const_100f,r0 /* r11 as temporary */
fmov.s @r0,fr0
fmov dr0,xd0
/* save C arguments */
mov r4,r0 /* r4 saved as r0 */
mov r5,r8 /* r5 saved as r8 */
/* offsets */
mov #(1 * 4),r1
mov #(2 * 4),r2
mov.w _const_640,r3
mov.w _const_642,r4
mov.w _const_1280,r5
mov.w _const_1281,r6
mov.w _const_1282,r7
add r3,r0 /* skip first row */
add r3,r8
add #4,r0 /* skip first pixel */
add #4,r8
mov.w _const_638,r10 /* skip last pixel */
mov.w _const_478,r11 /* row count */
bra _loop
nop
.align 4
_const_100f: .float 100
_const_640: .short (640 * 4)
_const_642: .short (642 * 4)
_const_1280: .short (1280 * 4)
_const_1281: .short (1281 * 4)
_const_1282: .short (1282 * 4)
_const_638: .short 638
_const_478: .short 478
.align 4
_loop:
_loop_width:
/* y multiplication */
fmov.s @r0,fr0 /* 0 */
fmov.s @(r0,r1),fr1 /* 1 */
fmov.s @(r0,r2),fr2 /* 2 */
fldi0 fr3
fipr fv8,fv0
fmov.s @(r0,r5),fr4 /* 1280 */
fmov.s @(r0,r6),fr5 /* 1281 */
fmov.s @(r0,r7),fr6 /* 1282 */
fldi0 fr7
fipr fv12,fv4
fadd fr3,fr7
fmul fr7,fr7
/* save fr7 in FPUL */
flds fr7,FPUL
/* x multiplication */
/* transpose and load
before
fr0, fr1, fr2, _,
, , , ,
fr4, fr5, fr6, _,
after
fr0, , fr4, _,
fr1, , fr5, _,
fr2, , fr6, _,
*/
/* exchange fr4/fr2 */
fmov fr4,fr3
fmov fr2,fr4
fmov fr3,fr2
/* load fr1,fr5 */
fmov.s @(r0,r3),fr1 /* 640 */
fldi0 fr3
fipr fv8,fv0
fmov.s @(r0,r4),fr5 /* 642 */
fldi0 fr7
fipr fv12,fv4
fadd fr3,fr7
fmul fr7,fr7
/* restore FPUL from y multiplication */
fsts FPUL,fr3
fadd fr3,fr7
fmov dr0,xd0 /* load 100.f constant */
add #4,r0 /* next pixel */
fcmp/gt fr0,fr7
/*subc r9,r9*/
movt r9
add #-1,r9
mov.l r9,@r8 /* save result */
dt r10
bf/s _loop_width
add #4,r8
/* end of _loop_width */
/* skip last pixel and first pixel */
add #8,r8
add #8,r0
/* row decrement */
dt r11
mov.w _const_638_b,r10
bf/s _loop
nop
/* restore registers */
_return:
mov.l @r15+,r11
mov.l @r15+,r10
mov.l @r15+,r9
mov.l @r15+,r8
rts
nop
_const_638_b: .short 638