dreamcast/sobel_fipr.s

166 lines
3.5 KiB
ArmAsm

/* fv0 fv4 fv8 fv12 */
.global _sobel_fipr
_sobel_fipr:
__setup:
mov.l r8,@-r15
mov.l r9,@-r15
mov.l r10,@-r15
mov.l r11,@-r15
fmov.s fr12,@-r15
fmov.s fr13,@-r15
fmov.s fr14,@-r15
fmov.s fr15,@-r15
fldi1 fr8 /* 1.0 */
fldi1 fr9 /* 2.0 */
fldi1 fr10 /* 1.0 */
fldi0 fr11 /* 0.0 */
fadd fr9,fr9
fldi1 fr12
fmov fr9,fr13
fldi1 fr14
fldi0 fr15
fneg fr12
fneg fr13
fneg fr14
/* constants */
mova _const_100f,r0 /* r11 as temporary */
fmov.s @r0,fr0
fschg
fmov dr0,xd0
fschg
/* save C arguments */
mov r4,r0 /* r4 saved as r0 */
mov r5,r8 /* r5 saved as r8 */
/* offsets */
mov #(1 * 4),r1
mov #(2 * 4),r2
mov.w _const_640,r3
mov.w _const_642,r4
mov.w _const_1280,r5
mov.w _const_1281,r6
mov.w _const_1282,r7
add r3,r0 /* skip first row */
add r3,r8
add #4,r0 /* skip first pixel */
add #4,r8
mov.w _const_638,r10 /* skip last pixel */
mov.w _const_478,r11 /* row count */
bra _loop
nop
.align 4
_const_100f: .float 100
_const_640: .short (640 * 4)
_const_642: .short (642 * 4)
_const_1280: .short (1280 * 4)
_const_1281: .short (1281 * 4)
_const_1282: .short (1282 * 4)
_const_638: .short 638
_const_478: .short 478
.align 4
_loop:
_loop_width:
/* y multiplication */
fmov.s @r0,fr0 /* 0 */
fmov.s @(r0,r1),fr1 /* 1 */
fmov.s @(r0,r2),fr2 /* 2 */
fldi0 fr3
fipr fv8,fv0
fmov.s @(r0,r5),fr4 /* 1280 */
fmov.s @(r0,r6),fr5 /* 1281 */
fmov.s @(r0,r7),fr6 /* 1282 */
fldi0 fr7
fipr fv12,fv4
fadd fr3,fr7
fmul fr7,fr7
/* save fr7 in FPUL */
flds fr7,FPUL
/* x multiplication */
/* transpose and load
before
fr0, fr1, fr2, _,
, , , ,
fr4, fr5, fr6, _,
after
fr0, , fr4, _,
fr1, , fr5, _,
fr2, , fr6, _,
*/
/* exchange fr4/fr2 */
fmov fr4,fr3
fmov fr2,fr4
fmov fr3,fr2
/* load fr1,fr5 */
fmov.s @(r0,r3),fr1 /* 640 */
fldi0 fr3
fipr fv8,fv0
fmov.s @(r0,r4),fr5 /* 642 */
fldi0 fr7
fipr fv12,fv4
fadd fr3,fr7
fmul fr7,fr7
/* restore FPUL from y multiplication */
fsts FPUL,fr3
fadd fr3,fr7
fschg
fmov xd0,dr0 /* load 100.f constant */
fschg
add #4,r0 /* next pixel */
fcmp/gt fr0,fr7
/*subc r9,r9*/
movt r9
add #-1,r9
mov.l r9,@r8 /* save result */
dt r10
bf/s _loop_width
add #4,r8
/* end of _loop_width */
/* skip last pixel and first pixel */
add #8,r8
add #8,r0
/* row decrement */
dt r11
mov.w _const_638_b,r10
bf/s _loop
nop
/* restore registers */
_return:
fmov.s @r15+,fr15
fmov.s @r15+,fr14
fmov.s @r15+,fr13
fmov.s @r15+,fr12
mov.l @r15+,r11
mov.l @r15+,r10
mov.l @r15+,r9
mov.l @r15+,r8
rts
nop
_const_638_b: .short 638