From 4503b3542ee8d76288b953ef8babb789da525da1 Mon Sep 17 00:00:00 2001 From: Zack Buhman Date: Fri, 8 Aug 2025 21:13:39 -0500 Subject: [PATCH] add scu-dsp demos --- .gitignore | 2 + Makefile | 2 + font/hp_100lx_4bit_flip.data | Bin 0 -> 3072 bytes font/hp_100lx_4bit_flip.data.h | 5 + scu-dsp/div10.asm | 76 +++++++++++++++ scu-dsp/div10.cpp | 35 +++++++ scu-dsp/div10.dsp.h | 5 + scu-dsp/div10_vdp2.asm | 104 ++++++++++++++++++++ scu-dsp/div10_vdp2.cpp | 171 +++++++++++++++++++++++++++++++++ scu-dsp/div10_vdp2.dsp.h | 5 + tools/ttf_bitmap2.cpp | 19 +++- 11 files changed, 419 insertions(+), 5 deletions(-) create mode 100644 font/hp_100lx_4bit_flip.data create mode 100644 font/hp_100lx_4bit_flip.data.h create mode 100644 scu-dsp/div10.asm create mode 100644 scu-dsp/div10.cpp create mode 100644 scu-dsp/div10.dsp.h create mode 100644 scu-dsp/div10_vdp2.asm create mode 100644 scu-dsp/div10_vdp2.cpp create mode 100644 scu-dsp/div10_vdp2.dsp.h diff --git a/.gitignore b/.gitignore index eb17b6f..9bb1b6a 100644 --- a/.gitignore +++ b/.gitignore @@ -11,6 +11,8 @@ *.pcm *.su *.ss +*.dsp +*.exe res/mai.data tools/ttf-convert tools/ttf-bitmap diff --git a/Makefile b/Makefile index 3e43a22..3cc46b2 100644 --- a/Makefile +++ b/Makefile @@ -177,6 +177,8 @@ scu-dsp/add.elf: scu-dsp/add.o saturn/start.o cdc/serial.o scu-dsp/input.bin.o scu-dsp/div10.elf: scu-dsp/div10.o saturn/start.o cdc/serial.o scu-dsp/div10.dsp.o +scu-dsp/div10_vdp2.elf: scu-dsp/div10_vdp2.o saturn/start.o cdc/serial.o scu-dsp/div10_vdp2.dsp.o font/hp_100lx_4bit_flip.data.o + %.dsp: %.asm ~/scu-dsp-asm/scu-dsp-asm $< $@ diff --git a/font/hp_100lx_4bit_flip.data b/font/hp_100lx_4bit_flip.data new file mode 100644 index 0000000000000000000000000000000000000000..eab1fa2e15d3da11d0785c319c65fa31c2d93991 GIT binary patch literal 3072 zcmai$31X!%3`5}taR1AWo@8e$orXdZ$ChO!@OxcjX4-~*`Mu<~tuEcYYu7f1eHrNVXvz^?F<;g+}jxEMFSu}rVHXC=={ zEdb5yhD3#X(DQa-J{`=-`QhDs)*SfjR-LKK@vFlzwm-I=uj3Eb&uQgj4a!dUa}aQA zF2&F-du08*=KvT>!S}*lw!x+2Er}lr2^QhmQ!I?Te9gS)*kbp*IA9|kEqUy33~+QW z@yThc`8)eqPfeX4zyP=>!Qk&4;ZCN(FDKBb_9X@!9!qOnOzXH^R-4BxM2~HgSfhT* zT3YMf{ah;Nj)9hg8K{l=80&7}>V+q>-WS#UT7MQ)?RWk1niuf!0l_H78e><`_(>dW zD_Zz#eB%_BFu5?zkoFA&R1Ad{s_Z<%n zpyu5Igr9i0p5m{}u;*J9eD^aLpshadjWEzz-#zaYHj`@pt}5?u?P2rp`q#MM^v8D; za_G0k=RM_h-u6Fd?Wj+f;UaIwO5yp?Pu%r^cR8!hy*_tm8-Yaw>%6?y+b55~9sSUe z%L!-vh=F4vp7_HD?9N|j#@i;ley9>Jy9wHF(N*i#xZ*WIMF7vv;m-3B-uabKp~#2s zRdgc%d`gRigz5kN%at>NKI8Y*r$9E@*Nfki4Y1-`@#dfNAZx9+_7lHQdfmDTTXP!m zPGt#8!yZ>C9e9nMTo^5^wLkgK_p{0vL^WUV8PfLUWNQ5E3`F5jD$VylcmS<;)faSy z63NB+t?xjuCI3Jk`>?Jj_fTNy*FNl5?;l!_QAJgCntkZEQgl2b5O>6qqcHuu0;=YP zO{W;o6U#f~V?|)K$vmU;e5~u%y*H-ft>4G{zv7QL@6!&j;!}%P7Xe%kpKdMq8_@N+ q%+kbBHSY2O`t61snpQ}1%EsJU +extern uint32_t _binary_font_hp_100lx_4bit_flip_data_start __asm("_binary_font_hp_100lx_4bit_flip_data_start"); +extern uint32_t _binary_font_hp_100lx_4bit_flip_data_end __asm("_binary_font_hp_100lx_4bit_flip_data_end"); +extern uint32_t _binary_font_hp_100lx_4bit_flip_data_size __asm("_binary_font_hp_100lx_4bit_flip_data_size"); diff --git a/scu-dsp/div10.asm b/scu-dsp/div10.asm new file mode 100644 index 0000000..09922ea --- /dev/null +++ b/scu-dsp/div10.asm @@ -0,0 +1,76 @@ + mov 0,ct0 ; num + mov 0,ct1 ; vdp2 address + mov 0,ct3 ; output + mvi 12345,mc0 ; m0[0] (12345) + + ;; output: m3 +base10_loop: + mov 0,ct0 + mvi div10_unsigned,pc + ;; [X ] [Y ] [D1 ] + mov mc0,y mov 1,lop ; mvi imm,pc delay slot (executed twice) + ;; after function return: + mov all,mc0 ; m0[1] (1234) + mov all,pl ; ??? why can't this happen in the next instruction? + ;; mod10: multiply by 10: + sl mov alu,a + sl mov alu,a + sl mov alu,a + add mov alu,a + add mov alu,a mov 0,ct0 ; restore ct0 after 'mov all,mc1' + + ;; a: 12340 + mov mc0,a mov all,pl + + ;; a: 12345 m0[0] + ;; p: 12340 + + ;; mod10: subtract (a - p) + sub mov alu,a + + ;; a: 5 + + ;; store digit in m3 + mov mc0,p clr a mov all,mc3 ; m0[1] + + ;; p: 1234 + ;; a: 0 + add mov alu,a mov 0,ct0 + + ;; a: 1234 + jmp nz,base10_loop + mov all,mc0 ; jmp delay slot + + endi + nop + + ;; argument: ry + ;; return: a ← ry / 10 + ;; maximum ry is somewhere between 2 ^ 21 and 2 ^ 22 +div10_unsigned: + ;; 1 / 10 * (2 ^ 24) ~= 1677722 = 0x19999a + mvi 1677722,rx + mov mul,p clr a + ad2 mov alu,a + + ;; ALH: [nmlkjihgfedcba9876543210________] + ;; ALL: [76543210________________________] + + clr a mov alh,pl ; alh is (a >> 16) + add mov alu,a + + ;; ALL: [nmlkjihgfedcba9876543210________] + + ;; rotate left 24 requires fewer cycles than shift right 8 + rl8 mov alu,a + rl8 mov alu,a + rl8 mov alu,a + + ;; ALL: [________nmlkjihgfedcba9876543210] + + ;; mask 24 bit result + mvi 0xffffff,pl + + ;; return to caller ; reset ct0 + btm + and mov alu,a mov 0,ct0 diff --git a/scu-dsp/div10.cpp b/scu-dsp/div10.cpp new file mode 100644 index 0000000..bed4e38 --- /dev/null +++ b/scu-dsp/div10.cpp @@ -0,0 +1,35 @@ +#include "cdc/serial.h" +#include "scu.h" +#include "vdp2.h" + +#include "scu-dsp/div10.dsp.h" + +void main() +{ + scu.reg.PPAF = 0; // stop execution + scu.reg.PPAF = (1 << 15) | 0; // reset PC + + uint32_t * program = (uint32_t *)&_binary_scu_dsp_div10_dsp_start; + int program_length = ((int)&_binary_scu_dsp_div10_dsp_size) / 4; + + for (int i = 0; i < program_length; i++) { + uint32_t p = program[i]; + scu.reg.PPD = p; + serial_integer(p, 8, '\n'); + } + + scu.reg.PPAF = (1 << 16); // execute + + int end_flag = 0; + while (end_flag == 0) { + end_flag = (scu.reg.PPAF >> 18) & 1; + } + + scu.reg.PPAF = 0; + scu.reg.PDA = 64 * 3; // m3[0] + + serial_string("answer:\n"); + for (int i = 0; i < 8; i++) { + serial_integer(scu.reg.PDD, 8, '\n'); + } +} diff --git a/scu-dsp/div10.dsp.h b/scu-dsp/div10.dsp.h new file mode 100644 index 0000000..fbf7539 --- /dev/null +++ b/scu-dsp/div10.dsp.h @@ -0,0 +1,5 @@ +#pragma once +#include +extern uint32_t _binary_scu_dsp_div10_dsp_start __asm("_binary_scu_dsp_div10_dsp_start"); +extern uint32_t _binary_scu_dsp_div10_dsp_end __asm("_binary_scu_dsp_div10_dsp_end"); +extern uint32_t _binary_scu_dsp_div10_dsp_size __asm("_binary_scu_dsp_div10_dsp_size"); diff --git a/scu-dsp/div10_vdp2.asm b/scu-dsp/div10_vdp2.asm new file mode 100644 index 0000000..ce0c263 --- /dev/null +++ b/scu-dsp/div10_vdp2.asm @@ -0,0 +1,104 @@ + mov 0,ct0 ; num + mov 0,ct1 ; vdp2 address + mov 0,ct3 ; output + mvi 12345,mc0 ; m0[0] (12345) + + ;; clear m3 + mov 7,lop + lps + mov 0,mc3 + mov 0,ct3 + + ;; output: m3 +base10_loop: + mov 0,ct0 + mvi div10_unsigned,pc + ;; [X ] [Y ] [D1 ] + mov mc0,y mov 1,lop ; mvi imm,pc delay slot (executed twice) + ;; after function return: + mov all,mc0 ; m0[1] (1234) + mov all,pl ; ??? why can't this happen in the next instruction? + ;; mod10: multiply by 10: + sl mov alu,a + sl mov alu,a + sl mov alu,a + add mov alu,a + add mov alu,a mov 0,ct0 ; restore ct0 after 'mov all,mc1' + + ;; a: 12340 + mov mc0,a mov all,pl + + ;; a: 12345 m0[0] + ;; p: 12340 + + ;; mod10: subtract (a - p) + sub mov alu,a + + ;; a: 5 + + ;; convert to vdp2 character + mvi 16,pl + add mov alu,a + + ;; store digit in m3 + mov mc0,p clr a mov all,mc3 ; m0[1] + + ;; p: 1234 + ;; a: 0 + add mov alu,a mov 0,ct0 + + ;; a: 1234 + jmp nz,base10_loop + mov all,mc0 ; jmp delay slot + + + ;; + ;; transfer to vdp2 + ;; + + ;; vdp2 address calculation + mvi ((8 * 0x4000 + (64 - 8) * 4 + 0x05e00000) >> 2),wa0 + mov 0,ct3 + ;; clr a mov 0,ct3 + ;; add mov alu,a + ;; mov all,wa0 + + ;; end vdp2 address calculation + dma1 mc3,d0,8 +dma_wait: + jmp t0,dma_wait + nop + + endi + nop + + ;; argument: ry + ;; return: a ← ry / 10 + ;; maximum ry is somewhere between 2 ^ 21 and 2 ^ 22 +div10_unsigned: + ;; 1 / 10 * (2 ^ 24) ~= 1677722 = 0x19999a + mvi 1677722,rx + mov mul,p clr a + ad2 mov alu,a + + ;; ALH: [nmlkjihgfedcba9876543210________] + ;; ALL: [76543210________________________] + + clr a mov alh,pl ; alh is (a >> 16) + add mov alu,a + + ;; ALL: [nmlkjihgfedcba9876543210________] + + ;; rotate left 24 requires fewer cycles than shift right 8 + rl8 mov alu,a + rl8 mov alu,a + rl8 mov alu,a + + ;; ALL: [________nmlkjihgfedcba9876543210] + + ;; mask 24 bit result + mvi 0xffffff,pl + + ;; return to caller ; reset ct0 + btm + and mov alu,a mov 0,ct0 diff --git a/scu-dsp/div10_vdp2.cpp b/scu-dsp/div10_vdp2.cpp new file mode 100644 index 0000000..2a704ab --- /dev/null +++ b/scu-dsp/div10_vdp2.cpp @@ -0,0 +1,171 @@ +#include "cdc/serial.h" +#include "scu.h" +#include "vdp2.h" +#include "../common/vdp2_func.hpp" + +#include "scu-dsp/div10_vdp2.dsp.h" + +#include "font/hp_100lx_4bit_flip.data.h" + +void cell_data() +{ + const uint32_t * start = reinterpret_cast(&_binary_font_hp_100lx_4bit_flip_data_start); + const int size = reinterpret_cast(&_binary_font_hp_100lx_4bit_flip_data_size); + + // the start of VRAM-A0 + for (int i = 0; i < (size / 4); i++) { + vdp2.vram.u32[i] = start[i]; + } +} + +void palette_data() +{ + vdp2.cram.u16[0] = 0x0000; + vdp2.cram.u16[1] = 0xffff; +} + +void vdp2_init() +{ + v_blank_in(); + + // DISP: Please make sure to change this bit from 0 to 1 during V blank. + vdp2.reg.TVMD = ( TVMD__DISP | TVMD__LSMD__NON_INTERLACE + | TVMD__VRESO__240 | TVMD__HRESO__NORMAL_320); + + /* set the color mode to 5bits per channel, 1024 colors */ + vdp2.reg.RAMCTL = RAMCTL__CRMD__RGB_5BIT_1024 + | RAMCTL__VRAMD + | RAMCTL__VRBMD + | RAMCTL__RDBSA0__CHARACTER_PATTERN_TABLE // VRAM-A0 0x000000 + | RAMCTL__RDBSA1__PATTERN_NAME_TABLE; // VRAM-A1 0x020000 + + vdp2.reg.VRSIZE = 0; + + /* enable display of NBG0 */ + vdp2.reg.BGON = BGON__R0ON | BGON__R0TPON; + + /* set character format for NBG0 to palettized 16 color + set enable "cell format" for NBG0 + set character size for NBG0 to 1x1 cell */ + vdp2.reg.CHCTLB = CHCTLB__R0CHCN__16_COLOR + | CHCTLB__R0BMEN__CELL_FORMAT + | CHCTLB__R0CHSZ__1x1_CELL; + + /* plane size */ + vdp2.reg.PLSZ = PLSZ__RAPLSZ__1x1 + | PLSZ__RBPLSZ__1x1; + + /* map plane offset + 1-word: value of bit 6-0 * 0x2000 + 2-word: value of bit 5-0 * 0x4000 + */ + // plane_a_offset is at the start of VRAM-A1 + constexpr int plane_a = 8; + constexpr int plane_a_offset = plane_a * 0x4000; + + constexpr int page_size = 64 * 64 * 2; // N0PNB__1WORD (16-bit) + constexpr int plane_size = page_size * 1; + + /* cycle pattern table not used for RBG0 ? */ + vdp2.reg.CYCA0 = 0x0F44F99F; + vdp2.reg.CYCA1 = 0x0F44F99F; + vdp2.reg.CYCB0 = 0x0F44F99F; + vdp2.reg.CYCB1 = 0x0F44F99F; + + vdp2.reg.MPOFR = MPOFR__RAMP(0); // bits 8~6 + vdp2.reg.MPABRA = MPABRA__RAMPB(plane_a) | MPABRA__RAMPA(plane_a); // bits 5~0 + vdp2.reg.MPCDRA = MPCDRA__RAMPD(plane_a) | MPCDRA__RAMPC(plane_a); // bits 5~0 + vdp2.reg.MPEFRA = MPEFRA__RAMPF(plane_a) | MPEFRA__RAMPE(plane_a); // bits 5~0 + vdp2.reg.MPGHRA = MPGHRA__RAMPH(plane_a) | MPGHRA__RAMPG(plane_a); // bits 5~0 + vdp2.reg.MPIJRA = MPIJRA__RAMPJ(plane_a) | MPIJRA__RAMPI(plane_a); // bits 5~0 + vdp2.reg.MPKLRA = MPKLRA__RAMPL(plane_a) | MPKLRA__RAMPK(plane_a); // bits 5~0 + vdp2.reg.MPMNRA = MPMNRA__RAMPN(plane_a) | MPMNRA__RAMPM(plane_a); // bits 5~0 + vdp2.reg.MPOPRA = MPOPRA__RAMPP(plane_a) | MPOPRA__RAMPO(plane_a); // bits 5~0 + + vdp2.reg.PNCR = PNCR__R0PNB__2WORD; + + vdp2.reg.RPMD = RPMD__ROTATION_PARAMETER_A; + + //vdp2.reg.RPRCTL = 0; + + vdp2.reg.KTCTL = 0; + + vdp2.reg.KTAOF = 0; + + vdp2.reg.PRIR = 3; + + palette_data(); + cell_data(); + + volatile struct vdp2_rotation_parameter_table * table = (struct vdp2_rotation_parameter_table *)&vdp2.vram.u32[0x4000 / 4]; + table->screen_start_coordinate_xst = 0; + table->screen_start_coordinate_yst = 0; + table->screen_start_coordinate_zst = 0; + table->screen_vertical_coordinate_increment_dxst = 0; + table->screen_vertical_coordinate_increment_dyst = (1 << 16); + table->screen_horizontal_coordinate_increment_dx = (1 << 16); + table->screen_horizontal_coordinate_increment_dy = 0; + table->rotation_matrix_parameter_a = (-1 << 16); + table->rotation_matrix_parameter_b = 0; + table->rotation_matrix_parameter_c = 0; + table->rotation_matrix_parameter_d = 0; + table->rotation_matrix_parameter_e = (1 << 16); + table->rotation_matrix_parameter_f = 0; + table->viewpoint_coordinate_px = 0; + table->viewpoint_coordinate_py = 0; + table->viewpoint_coordinate_pz = 0; + table->center_point_coordinate_px = 0; + table->center_point_coordinate_py = 0; + table->center_point_coordinate_pz = 0; + table->horizontal_shift_mx = 0; + table->horizontal_shift_my = 0; + table->scaling_coefficient_kx = (1 << 16); + table->scaling_coefficient_ky = (1 << 16); + + vdp2.reg.RPTA = (((uint32_t)table) >> 1) & 0x7ffff; + + /* */ + + for (int i = 0; i < 64 * 64; i++) { + vdp2.vram.u32[(plane_a_offset / 4) + i] = ' ' - 0x20; + } +} + +void main() +{ + vdp2_init(); + + scu.reg.PPAF = 0; // stop execution + scu.reg.PPAF = (1 << 15) | 0; // reset PC + + uint32_t * program = (uint32_t *)&_binary_scu_dsp_div10_vdp2_dsp_start; + int program_length = ((int)&_binary_scu_dsp_div10_vdp2_dsp_size) / 4; + + for (int i = 0; i < program_length; i++) { + uint32_t p = program[i]; + scu.reg.PPD = p; + //serial_integer(p, 8, '\n'); + } + + scu.reg.PPAF = (1 << 16); // execute + + int end_flag = 0; + while (end_flag == 0) { + end_flag = (scu.reg.PPAF >> 18) & 1; + } + + scu.reg.PPAF = 0; + scu.reg.PDA = 64 * 3; // m3[0] + + serial_string("answer:\n"); + for (int i = 0; i < 8; i++) { + serial_integer(scu.reg.PDD, 8, '\n'); + } + serial_string("answer2:\n"); + volatile uint32_t * buf = &vdp2.vram.u32[(8 * 0x4000) / 4 + 64]; + for (int i = 0; i < 8; i++) { + uint32_t addr = (uint32_t)&buf[-(i + 1)]; + serial_integer(addr, 8, ' '); + serial_integer(buf[-(i + 1)], 8, '\n'); + } +} diff --git a/scu-dsp/div10_vdp2.dsp.h b/scu-dsp/div10_vdp2.dsp.h new file mode 100644 index 0000000..e20a3bf --- /dev/null +++ b/scu-dsp/div10_vdp2.dsp.h @@ -0,0 +1,5 @@ +#pragma once +#include +extern uint32_t _binary_scu_dsp_div10_vdp2_dsp_start __asm("_binary_scu_dsp_div10_vdp2_dsp_start"); +extern uint32_t _binary_scu_dsp_div10_vdp2_dsp_end __asm("_binary_scu_dsp_div10_vdp2_dsp_end"); +extern uint32_t _binary_scu_dsp_div10_vdp2_dsp_size __asm("_binary_scu_dsp_div10_vdp2_dsp_size"); diff --git a/tools/ttf_bitmap2.cpp b/tools/ttf_bitmap2.cpp index c61b3b8..f49d3d6 100644 --- a/tools/ttf_bitmap2.cpp +++ b/tools/ttf_bitmap2.cpp @@ -8,6 +8,7 @@ int load_bitmap_char(FT_Face face, FT_ULong char_code, + bool hflip, uint8_t * buf) { FT_Error error; @@ -33,7 +34,10 @@ load_bitmap_char(FT_Face face, for (int x = 0; x < (int)face->glyph->bitmap.width; x += 1) { const int bit = (row[x / 8] >> (7 - (x % 8))) & 1; //std::cerr << (bit ? "█" : " "); - buf[y * face->glyph->bitmap.width + x] = bit; + if (hflip) + buf[y * face->glyph->bitmap.width + (7 - x)] = bit; + else + buf[y * face->glyph->bitmap.width + x] = bit; } //std::cerr << "|\n"; } @@ -68,7 +72,7 @@ int load_font(FT_Library * library, FT_Face * face, const char * font_file_path) void usage(const char * argv_0) { - printf("%s [start-hex] [end-hex] [font-width] [font-height] [font-file-path] [output-file-path]\n", argv_0); + printf("%s [start-hex] [end-hex] [font-width] [font-height] [hflip] [font-file-path] [output-file-path]\n", argv_0); } void pack_4bit(const uint8_t * src, int width, int height, int size, uint8_t * dst) @@ -88,7 +92,7 @@ void pack_4bit(const uint8_t * src, int width, int height, int size, uint8_t * d int main(int argc, const char * argv[]) { - if (argc != 7) { + if (argc != 8) { usage(argv[0]); return -1; } @@ -105,8 +109,12 @@ int main(int argc, const char * argv[]) int font_height = strtol(argv[4], &endptr, 10); assert(*endptr == 0); - const char * font_file_path = argv[5]; - const char * output_file_path = argv[6]; + int hflip = strtol(argv[5], &endptr, 10); + assert(*endptr == 0); + assert(hflip == 0 || hflip == 1); + + const char * font_file_path = argv[6]; + const char * output_file_path = argv[7]; printf("start_hex %x\n", start_hex); printf("end_hex %x\n", start_hex); @@ -132,6 +140,7 @@ int main(int argc, const char * argv[]) res = load_bitmap_char(face, char_code, + hflip, &texture[offset]); if (res < 0) return - 1;