saturn-examples/scu-dsp/div10_vdp2.asm
2025-08-08 21:13:39 -05:00

105 lines
2.9 KiB
NASM

mov 0,ct0 ; num
mov 0,ct1 ; vdp2 address
mov 0,ct3 ; output
mvi 12345,mc0 ; m0[0] (12345)
;; clear m3
mov 7,lop
lps
mov 0,mc3
mov 0,ct3
;; output: m3
base10_loop:
mov 0,ct0
mvi div10_unsigned,pc
;; [X ] [Y ] [D1 ]
mov mc0,y mov 1,lop ; mvi imm,pc delay slot (executed twice)
;; after function return:
mov all,mc0 ; m0[1] (1234)
mov all,pl ; ??? why can't this happen in the next instruction?
;; mod10: multiply by 10:
sl mov alu,a
sl mov alu,a
sl mov alu,a
add mov alu,a
add mov alu,a mov 0,ct0 ; restore ct0 after 'mov all,mc1'
;; a: 12340
mov mc0,a mov all,pl
;; a: 12345 m0[0]
;; p: 12340
;; mod10: subtract (a - p)
sub mov alu,a
;; a: 5
;; convert to vdp2 character
mvi 16,pl
add mov alu,a
;; store digit in m3
mov mc0,p clr a mov all,mc3 ; m0[1]
;; p: 1234
;; a: 0
add mov alu,a mov 0,ct0
;; a: 1234
jmp nz,base10_loop
mov all,mc0 ; jmp delay slot
;;
;; transfer to vdp2
;;
;; vdp2 address calculation
mvi ((8 * 0x4000 + (64 - 8) * 4 + 0x05e00000) >> 2),wa0
mov 0,ct3
;; clr a mov 0,ct3
;; add mov alu,a
;; mov all,wa0
;; end vdp2 address calculation
dma1 mc3,d0,8
dma_wait:
jmp t0,dma_wait
nop
endi
nop
;; argument: ry
;; return: a ← ry / 10
;; maximum ry is somewhere between 2 ^ 21 and 2 ^ 22
div10_unsigned:
;; 1 / 10 * (2 ^ 24) ~= 1677722 = 0x19999a
mvi 1677722,rx
mov mul,p clr a
ad2 mov alu,a
;; ALH: [nmlkjihgfedcba9876543210________]
;; ALL: [76543210________________________]
clr a mov alh,pl ; alh is (a >> 16)
add mov alu,a
;; ALL: [nmlkjihgfedcba9876543210________]
;; rotate left 24 requires fewer cycles than shift right 8
rl8 mov alu,a
rl8 mov alu,a
rl8 mov alu,a
;; ALL: [________nmlkjihgfedcba9876543210]
;; mask 24 bit result
mvi 0xffffff,pl
;; return to caller ; reset ct0
btm
and mov alu,a mov 0,ct0