add scu-dsp demos

This commit is contained in:
Zack Buhman 2025-08-08 21:13:39 -05:00
parent 6e28a7d147
commit 4503b3542e
11 changed files with 419 additions and 5 deletions

2
.gitignore vendored
View File

@ -11,6 +11,8 @@
*.pcm
*.su
*.ss
*.dsp
*.exe
res/mai.data
tools/ttf-convert
tools/ttf-bitmap

View File

@ -177,6 +177,8 @@ scu-dsp/add.elf: scu-dsp/add.o saturn/start.o cdc/serial.o scu-dsp/input.bin.o
scu-dsp/div10.elf: scu-dsp/div10.o saturn/start.o cdc/serial.o scu-dsp/div10.dsp.o
scu-dsp/div10_vdp2.elf: scu-dsp/div10_vdp2.o saturn/start.o cdc/serial.o scu-dsp/div10_vdp2.dsp.o font/hp_100lx_4bit_flip.data.o
%.dsp: %.asm
~/scu-dsp-asm/scu-dsp-asm $< $@

Binary file not shown.

View File

@ -0,0 +1,5 @@
#pragma once
#include <stdint.h>
extern uint32_t _binary_font_hp_100lx_4bit_flip_data_start __asm("_binary_font_hp_100lx_4bit_flip_data_start");
extern uint32_t _binary_font_hp_100lx_4bit_flip_data_end __asm("_binary_font_hp_100lx_4bit_flip_data_end");
extern uint32_t _binary_font_hp_100lx_4bit_flip_data_size __asm("_binary_font_hp_100lx_4bit_flip_data_size");

76
scu-dsp/div10.asm Normal file
View File

@ -0,0 +1,76 @@
mov 0,ct0 ; num
mov 0,ct1 ; vdp2 address
mov 0,ct3 ; output
mvi 12345,mc0 ; m0[0] (12345)
;; output: m3
base10_loop:
mov 0,ct0
mvi div10_unsigned,pc
;; [X ] [Y ] [D1 ]
mov mc0,y mov 1,lop ; mvi imm,pc delay slot (executed twice)
;; after function return:
mov all,mc0 ; m0[1] (1234)
mov all,pl ; ??? why can't this happen in the next instruction?
;; mod10: multiply by 10:
sl mov alu,a
sl mov alu,a
sl mov alu,a
add mov alu,a
add mov alu,a mov 0,ct0 ; restore ct0 after 'mov all,mc1'
;; a: 12340
mov mc0,a mov all,pl
;; a: 12345 m0[0]
;; p: 12340
;; mod10: subtract (a - p)
sub mov alu,a
;; a: 5
;; store digit in m3
mov mc0,p clr a mov all,mc3 ; m0[1]
;; p: 1234
;; a: 0
add mov alu,a mov 0,ct0
;; a: 1234
jmp nz,base10_loop
mov all,mc0 ; jmp delay slot
endi
nop
;; argument: ry
;; return: a ← ry / 10
;; maximum ry is somewhere between 2 ^ 21 and 2 ^ 22
div10_unsigned:
;; 1 / 10 * (2 ^ 24) ~= 1677722 = 0x19999a
mvi 1677722,rx
mov mul,p clr a
ad2 mov alu,a
;; ALH: [nmlkjihgfedcba9876543210________]
;; ALL: [76543210________________________]
clr a mov alh,pl ; alh is (a >> 16)
add mov alu,a
;; ALL: [nmlkjihgfedcba9876543210________]
;; rotate left 24 requires fewer cycles than shift right 8
rl8 mov alu,a
rl8 mov alu,a
rl8 mov alu,a
;; ALL: [________nmlkjihgfedcba9876543210]
;; mask 24 bit result
mvi 0xffffff,pl
;; return to caller ; reset ct0
btm
and mov alu,a mov 0,ct0

35
scu-dsp/div10.cpp Normal file
View File

@ -0,0 +1,35 @@
#include "cdc/serial.h"
#include "scu.h"
#include "vdp2.h"
#include "scu-dsp/div10.dsp.h"
void main()
{
scu.reg.PPAF = 0; // stop execution
scu.reg.PPAF = (1 << 15) | 0; // reset PC
uint32_t * program = (uint32_t *)&_binary_scu_dsp_div10_dsp_start;
int program_length = ((int)&_binary_scu_dsp_div10_dsp_size) / 4;
for (int i = 0; i < program_length; i++) {
uint32_t p = program[i];
scu.reg.PPD = p;
serial_integer(p, 8, '\n');
}
scu.reg.PPAF = (1 << 16); // execute
int end_flag = 0;
while (end_flag == 0) {
end_flag = (scu.reg.PPAF >> 18) & 1;
}
scu.reg.PPAF = 0;
scu.reg.PDA = 64 * 3; // m3[0]
serial_string("answer:\n");
for (int i = 0; i < 8; i++) {
serial_integer(scu.reg.PDD, 8, '\n');
}
}

5
scu-dsp/div10.dsp.h Normal file
View File

@ -0,0 +1,5 @@
#pragma once
#include <stdint.h>
extern uint32_t _binary_scu_dsp_div10_dsp_start __asm("_binary_scu_dsp_div10_dsp_start");
extern uint32_t _binary_scu_dsp_div10_dsp_end __asm("_binary_scu_dsp_div10_dsp_end");
extern uint32_t _binary_scu_dsp_div10_dsp_size __asm("_binary_scu_dsp_div10_dsp_size");

104
scu-dsp/div10_vdp2.asm Normal file
View File

@ -0,0 +1,104 @@
mov 0,ct0 ; num
mov 0,ct1 ; vdp2 address
mov 0,ct3 ; output
mvi 12345,mc0 ; m0[0] (12345)
;; clear m3
mov 7,lop
lps
mov 0,mc3
mov 0,ct3
;; output: m3
base10_loop:
mov 0,ct0
mvi div10_unsigned,pc
;; [X ] [Y ] [D1 ]
mov mc0,y mov 1,lop ; mvi imm,pc delay slot (executed twice)
;; after function return:
mov all,mc0 ; m0[1] (1234)
mov all,pl ; ??? why can't this happen in the next instruction?
;; mod10: multiply by 10:
sl mov alu,a
sl mov alu,a
sl mov alu,a
add mov alu,a
add mov alu,a mov 0,ct0 ; restore ct0 after 'mov all,mc1'
;; a: 12340
mov mc0,a mov all,pl
;; a: 12345 m0[0]
;; p: 12340
;; mod10: subtract (a - p)
sub mov alu,a
;; a: 5
;; convert to vdp2 character
mvi 16,pl
add mov alu,a
;; store digit in m3
mov mc0,p clr a mov all,mc3 ; m0[1]
;; p: 1234
;; a: 0
add mov alu,a mov 0,ct0
;; a: 1234
jmp nz,base10_loop
mov all,mc0 ; jmp delay slot
;;
;; transfer to vdp2
;;
;; vdp2 address calculation
mvi ((8 * 0x4000 + (64 - 8) * 4 + 0x05e00000) >> 2),wa0
mov 0,ct3
;; clr a mov 0,ct3
;; add mov alu,a
;; mov all,wa0
;; end vdp2 address calculation
dma1 mc3,d0,8
dma_wait:
jmp t0,dma_wait
nop
endi
nop
;; argument: ry
;; return: a ← ry / 10
;; maximum ry is somewhere between 2 ^ 21 and 2 ^ 22
div10_unsigned:
;; 1 / 10 * (2 ^ 24) ~= 1677722 = 0x19999a
mvi 1677722,rx
mov mul,p clr a
ad2 mov alu,a
;; ALH: [nmlkjihgfedcba9876543210________]
;; ALL: [76543210________________________]
clr a mov alh,pl ; alh is (a >> 16)
add mov alu,a
;; ALL: [nmlkjihgfedcba9876543210________]
;; rotate left 24 requires fewer cycles than shift right 8
rl8 mov alu,a
rl8 mov alu,a
rl8 mov alu,a
;; ALL: [________nmlkjihgfedcba9876543210]
;; mask 24 bit result
mvi 0xffffff,pl
;; return to caller ; reset ct0
btm
and mov alu,a mov 0,ct0

171
scu-dsp/div10_vdp2.cpp Normal file
View File

@ -0,0 +1,171 @@
#include "cdc/serial.h"
#include "scu.h"
#include "vdp2.h"
#include "../common/vdp2_func.hpp"
#include "scu-dsp/div10_vdp2.dsp.h"
#include "font/hp_100lx_4bit_flip.data.h"
void cell_data()
{
const uint32_t * start = reinterpret_cast<uint32_t *>(&_binary_font_hp_100lx_4bit_flip_data_start);
const int size = reinterpret_cast<uint32_t>(&_binary_font_hp_100lx_4bit_flip_data_size);
// the start of VRAM-A0
for (int i = 0; i < (size / 4); i++) {
vdp2.vram.u32[i] = start[i];
}
}
void palette_data()
{
vdp2.cram.u16[0] = 0x0000;
vdp2.cram.u16[1] = 0xffff;
}
void vdp2_init()
{
v_blank_in();
// DISP: Please make sure to change this bit from 0 to 1 during V blank.
vdp2.reg.TVMD = ( TVMD__DISP | TVMD__LSMD__NON_INTERLACE
| TVMD__VRESO__240 | TVMD__HRESO__NORMAL_320);
/* set the color mode to 5bits per channel, 1024 colors */
vdp2.reg.RAMCTL = RAMCTL__CRMD__RGB_5BIT_1024
| RAMCTL__VRAMD
| RAMCTL__VRBMD
| RAMCTL__RDBSA0__CHARACTER_PATTERN_TABLE // VRAM-A0 0x000000
| RAMCTL__RDBSA1__PATTERN_NAME_TABLE; // VRAM-A1 0x020000
vdp2.reg.VRSIZE = 0;
/* enable display of NBG0 */
vdp2.reg.BGON = BGON__R0ON | BGON__R0TPON;
/* set character format for NBG0 to palettized 16 color
set enable "cell format" for NBG0
set character size for NBG0 to 1x1 cell */
vdp2.reg.CHCTLB = CHCTLB__R0CHCN__16_COLOR
| CHCTLB__R0BMEN__CELL_FORMAT
| CHCTLB__R0CHSZ__1x1_CELL;
/* plane size */
vdp2.reg.PLSZ = PLSZ__RAPLSZ__1x1
| PLSZ__RBPLSZ__1x1;
/* map plane offset
1-word: value of bit 6-0 * 0x2000
2-word: value of bit 5-0 * 0x4000
*/
// plane_a_offset is at the start of VRAM-A1
constexpr int plane_a = 8;
constexpr int plane_a_offset = plane_a * 0x4000;
constexpr int page_size = 64 * 64 * 2; // N0PNB__1WORD (16-bit)
constexpr int plane_size = page_size * 1;
/* cycle pattern table not used for RBG0 ? */
vdp2.reg.CYCA0 = 0x0F44F99F;
vdp2.reg.CYCA1 = 0x0F44F99F;
vdp2.reg.CYCB0 = 0x0F44F99F;
vdp2.reg.CYCB1 = 0x0F44F99F;
vdp2.reg.MPOFR = MPOFR__RAMP(0); // bits 8~6
vdp2.reg.MPABRA = MPABRA__RAMPB(plane_a) | MPABRA__RAMPA(plane_a); // bits 5~0
vdp2.reg.MPCDRA = MPCDRA__RAMPD(plane_a) | MPCDRA__RAMPC(plane_a); // bits 5~0
vdp2.reg.MPEFRA = MPEFRA__RAMPF(plane_a) | MPEFRA__RAMPE(plane_a); // bits 5~0
vdp2.reg.MPGHRA = MPGHRA__RAMPH(plane_a) | MPGHRA__RAMPG(plane_a); // bits 5~0
vdp2.reg.MPIJRA = MPIJRA__RAMPJ(plane_a) | MPIJRA__RAMPI(plane_a); // bits 5~0
vdp2.reg.MPKLRA = MPKLRA__RAMPL(plane_a) | MPKLRA__RAMPK(plane_a); // bits 5~0
vdp2.reg.MPMNRA = MPMNRA__RAMPN(plane_a) | MPMNRA__RAMPM(plane_a); // bits 5~0
vdp2.reg.MPOPRA = MPOPRA__RAMPP(plane_a) | MPOPRA__RAMPO(plane_a); // bits 5~0
vdp2.reg.PNCR = PNCR__R0PNB__2WORD;
vdp2.reg.RPMD = RPMD__ROTATION_PARAMETER_A;
//vdp2.reg.RPRCTL = 0;
vdp2.reg.KTCTL = 0;
vdp2.reg.KTAOF = 0;
vdp2.reg.PRIR = 3;
palette_data();
cell_data();
volatile struct vdp2_rotation_parameter_table * table = (struct vdp2_rotation_parameter_table *)&vdp2.vram.u32[0x4000 / 4];
table->screen_start_coordinate_xst = 0;
table->screen_start_coordinate_yst = 0;
table->screen_start_coordinate_zst = 0;
table->screen_vertical_coordinate_increment_dxst = 0;
table->screen_vertical_coordinate_increment_dyst = (1 << 16);
table->screen_horizontal_coordinate_increment_dx = (1 << 16);
table->screen_horizontal_coordinate_increment_dy = 0;
table->rotation_matrix_parameter_a = (-1 << 16);
table->rotation_matrix_parameter_b = 0;
table->rotation_matrix_parameter_c = 0;
table->rotation_matrix_parameter_d = 0;
table->rotation_matrix_parameter_e = (1 << 16);
table->rotation_matrix_parameter_f = 0;
table->viewpoint_coordinate_px = 0;
table->viewpoint_coordinate_py = 0;
table->viewpoint_coordinate_pz = 0;
table->center_point_coordinate_px = 0;
table->center_point_coordinate_py = 0;
table->center_point_coordinate_pz = 0;
table->horizontal_shift_mx = 0;
table->horizontal_shift_my = 0;
table->scaling_coefficient_kx = (1 << 16);
table->scaling_coefficient_ky = (1 << 16);
vdp2.reg.RPTA = (((uint32_t)table) >> 1) & 0x7ffff;
/* */
for (int i = 0; i < 64 * 64; i++) {
vdp2.vram.u32[(plane_a_offset / 4) + i] = ' ' - 0x20;
}
}
void main()
{
vdp2_init();
scu.reg.PPAF = 0; // stop execution
scu.reg.PPAF = (1 << 15) | 0; // reset PC
uint32_t * program = (uint32_t *)&_binary_scu_dsp_div10_vdp2_dsp_start;
int program_length = ((int)&_binary_scu_dsp_div10_vdp2_dsp_size) / 4;
for (int i = 0; i < program_length; i++) {
uint32_t p = program[i];
scu.reg.PPD = p;
//serial_integer(p, 8, '\n');
}
scu.reg.PPAF = (1 << 16); // execute
int end_flag = 0;
while (end_flag == 0) {
end_flag = (scu.reg.PPAF >> 18) & 1;
}
scu.reg.PPAF = 0;
scu.reg.PDA = 64 * 3; // m3[0]
serial_string("answer:\n");
for (int i = 0; i < 8; i++) {
serial_integer(scu.reg.PDD, 8, '\n');
}
serial_string("answer2:\n");
volatile uint32_t * buf = &vdp2.vram.u32[(8 * 0x4000) / 4 + 64];
for (int i = 0; i < 8; i++) {
uint32_t addr = (uint32_t)&buf[-(i + 1)];
serial_integer(addr, 8, ' ');
serial_integer(buf[-(i + 1)], 8, '\n');
}
}

5
scu-dsp/div10_vdp2.dsp.h Normal file
View File

@ -0,0 +1,5 @@
#pragma once
#include <stdint.h>
extern uint32_t _binary_scu_dsp_div10_vdp2_dsp_start __asm("_binary_scu_dsp_div10_vdp2_dsp_start");
extern uint32_t _binary_scu_dsp_div10_vdp2_dsp_end __asm("_binary_scu_dsp_div10_vdp2_dsp_end");
extern uint32_t _binary_scu_dsp_div10_vdp2_dsp_size __asm("_binary_scu_dsp_div10_vdp2_dsp_size");

View File

@ -8,6 +8,7 @@
int
load_bitmap_char(FT_Face face,
FT_ULong char_code,
bool hflip,
uint8_t * buf)
{
FT_Error error;
@ -33,6 +34,9 @@ load_bitmap_char(FT_Face face,
for (int x = 0; x < (int)face->glyph->bitmap.width; x += 1) {
const int bit = (row[x / 8] >> (7 - (x % 8))) & 1;
//std::cerr << (bit ? "█" : " ");
if (hflip)
buf[y * face->glyph->bitmap.width + (7 - x)] = bit;
else
buf[y * face->glyph->bitmap.width + x] = bit;
}
//std::cerr << "|\n";
@ -68,7 +72,7 @@ int load_font(FT_Library * library, FT_Face * face, const char * font_file_path)
void usage(const char * argv_0)
{
printf("%s [start-hex] [end-hex] [font-width] [font-height] [font-file-path] [output-file-path]\n", argv_0);
printf("%s [start-hex] [end-hex] [font-width] [font-height] [hflip] [font-file-path] [output-file-path]\n", argv_0);
}
void pack_4bit(const uint8_t * src, int width, int height, int size, uint8_t * dst)
@ -88,7 +92,7 @@ void pack_4bit(const uint8_t * src, int width, int height, int size, uint8_t * d
int main(int argc, const char * argv[])
{
if (argc != 7) {
if (argc != 8) {
usage(argv[0]);
return -1;
}
@ -105,8 +109,12 @@ int main(int argc, const char * argv[])
int font_height = strtol(argv[4], &endptr, 10);
assert(*endptr == 0);
const char * font_file_path = argv[5];
const char * output_file_path = argv[6];
int hflip = strtol(argv[5], &endptr, 10);
assert(*endptr == 0);
assert(hflip == 0 || hflip == 1);
const char * font_file_path = argv[6];
const char * output_file_path = argv[7];
printf("start_hex %x\n", start_hex);
printf("end_hex %x\n", start_hex);
@ -132,6 +140,7 @@ int main(int argc, const char * argv[])
res = load_bitmap_char(face,
char_code,
hflip,
&texture[offset]);
if (res < 0)
return - 1;