166 lines
3.5 KiB
ArmAsm
166 lines
3.5 KiB
ArmAsm
/* fv0 fv4 fv8 fv12 */
|
|
.global _sobel_fipr
|
|
_sobel_fipr:
|
|
__setup:
|
|
mov.l r8,@-r15
|
|
mov.l r9,@-r15
|
|
mov.l r10,@-r15
|
|
mov.l r11,@-r15
|
|
fmov.s fr12,@-r15
|
|
fmov.s fr13,@-r15
|
|
fmov.s fr14,@-r15
|
|
fmov.s fr15,@-r15
|
|
|
|
fldi1 fr8 /* 1.0 */
|
|
fldi1 fr9 /* 2.0 */
|
|
fldi1 fr10 /* 1.0 */
|
|
fldi0 fr11 /* 0.0 */
|
|
fadd fr9,fr9
|
|
|
|
fldi1 fr12
|
|
fmov fr9,fr13
|
|
fldi1 fr14
|
|
fldi0 fr15
|
|
fneg fr12
|
|
fneg fr13
|
|
fneg fr14
|
|
|
|
/* constants */
|
|
mova _const_100f,r0 /* r11 as temporary */
|
|
fmov.s @r0,fr0
|
|
fschg
|
|
fmov dr0,xd0
|
|
fschg
|
|
|
|
/* save C arguments */
|
|
mov r4,r0 /* r4 saved as r0 */
|
|
mov r5,r8 /* r5 saved as r8 */
|
|
|
|
/* offsets */
|
|
mov #(1 * 4),r1
|
|
mov #(2 * 4),r2
|
|
mov.w _const_640,r3
|
|
mov.w _const_642,r4
|
|
mov.w _const_1280,r5
|
|
mov.w _const_1281,r6
|
|
mov.w _const_1282,r7
|
|
|
|
add r3,r0 /* skip first row */
|
|
add r3,r8
|
|
add #4,r0 /* skip first pixel */
|
|
add #4,r8
|
|
mov.w _const_638,r10 /* skip last pixel */
|
|
|
|
mov.w _const_478,r11 /* row count */
|
|
|
|
bra _loop
|
|
nop
|
|
|
|
.align 4
|
|
_const_100f: .float 100
|
|
_const_640: .short (640 * 4)
|
|
_const_642: .short (642 * 4)
|
|
_const_1280: .short (1280 * 4)
|
|
_const_1281: .short (1281 * 4)
|
|
_const_1282: .short (1282 * 4)
|
|
|
|
_const_638: .short 638
|
|
_const_478: .short 478
|
|
|
|
.align 4
|
|
_loop:
|
|
|
|
_loop_width:
|
|
/* y multiplication */
|
|
fmov.s @r0,fr0 /* 0 */
|
|
fmov.s @(r0,r1),fr1 /* 1 */
|
|
fmov.s @(r0,r2),fr2 /* 2 */
|
|
fldi0 fr3
|
|
fipr fv8,fv0
|
|
|
|
fmov.s @(r0,r5),fr4 /* 1280 */
|
|
fmov.s @(r0,r6),fr5 /* 1281 */
|
|
fmov.s @(r0,r7),fr6 /* 1282 */
|
|
fldi0 fr7
|
|
fipr fv12,fv4
|
|
|
|
fadd fr3,fr7
|
|
fmul fr7,fr7
|
|
|
|
/* save fr7 in FPUL */
|
|
flds fr7,FPUL
|
|
|
|
/* x multiplication */
|
|
/* transpose and load
|
|
before →
|
|
fr0, fr1, fr2, _,
|
|
, , , ,
|
|
fr4, fr5, fr6, _,
|
|
|
|
after →
|
|
fr0, , fr4, _,
|
|
fr1, , fr5, _,
|
|
fr2, , fr6, _,
|
|
*/
|
|
/* exchange fr4/fr2 */
|
|
fmov fr4,fr3
|
|
fmov fr2,fr4
|
|
fmov fr3,fr2
|
|
/* load fr1,fr5 */
|
|
fmov.s @(r0,r3),fr1 /* 640 */
|
|
fldi0 fr3
|
|
fipr fv8,fv0
|
|
fmov.s @(r0,r4),fr5 /* 642 */
|
|
fldi0 fr7
|
|
fipr fv12,fv4
|
|
|
|
fadd fr3,fr7
|
|
fmul fr7,fr7
|
|
/* restore FPUL from y multiplication */
|
|
fsts FPUL,fr3
|
|
fadd fr3,fr7
|
|
|
|
fschg
|
|
fmov xd0,dr0 /* load 100.f constant */
|
|
fschg
|
|
|
|
add #4,r0 /* next pixel */
|
|
|
|
fcmp/gt fr0,fr7
|
|
/*subc r9,r9*/
|
|
movt r9
|
|
add #-1,r9
|
|
mov.l r9,@r8 /* save result */
|
|
|
|
dt r10
|
|
bf/s _loop_width
|
|
add #4,r8
|
|
/* end of _loop_width */
|
|
|
|
/* skip last pixel and first pixel */
|
|
add #8,r8
|
|
add #8,r0
|
|
|
|
/* row decrement */
|
|
dt r11
|
|
mov.w _const_638_b,r10
|
|
bf/s _loop
|
|
nop
|
|
|
|
/* restore registers */
|
|
_return:
|
|
fmov.s @r15+,fr15
|
|
fmov.s @r15+,fr14
|
|
fmov.s @r15+,fr13
|
|
fmov.s @r15+,fr12
|
|
|
|
mov.l @r15+,r11
|
|
mov.l @r15+,r10
|
|
mov.l @r15+,r9
|
|
mov.l @r15+,r8
|
|
|
|
rts
|
|
nop
|
|
|
|
_const_638_b: .short 638
|