From 4af5a3ba926153a72423eb08cd61dcf1aadd98de Mon Sep 17 00:00:00 2001
From: Zack Buhman <zack@buhman.org>
Date: Sat, 28 Jan 2023 17:09:25 -0800
Subject: [PATCH] vdp1: add normal_sprite example

---
 Makefile               |   1 +
 math/vec.hpp           |  10 +--
 res/mai.data           | Bin 0 -> 8000 bytes
 res/mai.data.pal       | Bin 0 -> 48 bytes
 vdp1/normal_sprite.cpp | 159 +++++++++++++++++++++++++++++++++++++++++
 vdp2/nbg0.cpp          |   6 +-
 6 files changed, 168 insertions(+), 8 deletions(-)
 create mode 100644 res/mai.data
 create mode 100644 res/mai.data.pal
 create mode 100644 vdp1/normal_sprite.cpp
diff --git a/Makefile b/Makefile
index 65e5e93..9fd810c 100644
--- a/Makefile
+++ b/Makefile
@@ -28,6 +28,7 @@ raytracing/raytracing.elf: raytracing/main-saturn.o raytracing/raytracing.o sh/l
 vdp2/nbg0.elf: vdp2/nbg0.o res/butterfly.data.o res/butterfly.data.pal.o
 
 vdp1/polygon.elf: vdp1/polygon.o
+vdp1/normal_sprite.elf: vdp1/normal_sprite.o res/mai.data.o res/mai.data.pal.o
 
 # clean
 clean: clean-sh
diff --git a/math/vec.hpp b/math/vec.hpp
index 7ba21d5..1a69ad5 100644
--- a/math/vec.hpp
+++ b/math/vec.hpp
@@ -2,14 +2,14 @@
 
 #include "math.hpp"
 
-template<int L, typename T>
+template <int L, typename T>
 struct vec;
 
 //
 // vec3
 //
 
-template<typename T>
+template <typename T>
 struct vec<3, T>
 {
   union
@@ -67,7 +67,7 @@ inline constexpr T const& vec<3, T>::operator[](int i) const
   }
 }
 
-template<typename T>
+template <typename T>
 inline constexpr vec<3, T>& vec<3, T>::operator=(vec<3, T> const& v)
 {
   this->x = static_cast<T>(v.x);
@@ -76,14 +76,14 @@ inline constexpr vec<3, T>& vec<3, T>::operator=(vec<3, T> const& v)
   return *this;
 }
 
-template<typename T>
+template <typename T>
 inline constexpr vec<3, T>& vec<3, T>::operator+=(vec<3, T> const& v)
 {
   *this = *this + vec<3, T>(v);
   return *this;
 }
 
-template<typename T>
+template <typename T>
 inline constexpr vec<3, T>& vec<3, T>::operator-=(vec<3, T> const& v)
 {
   *this = *this + vec<3, T>(v);
diff --git a/res/mai.data b/res/mai.data
new file mode 100644
index 0000000000000000000000000000000000000000..e85e2ceeaf61ad461eeb418ed2ba7f8cdaf2acae
GIT binary patch
literal 8000
zcmeI1+m_-W5JfR!Bm&9*|LyEkgqYa23ijhLYiTDTn^PCCV4VBkjvg3kE-lOPZugaN
zV_INNe}NpvfhOLc0KuJTQ{oRdrEV+hi-X|=JPm({JGp6^WcXMI>ib^Rot&F2E;tiE
zcDIUyS8#j+Y$VR}oP%k2<c0Bc;$9OlgPZ3V1ypFQIl^lS%VmSWoDG=coMTvNr;}4C
zXgS9dcZbhC2rpl+*Bqo0DpJ!zvKTK$o<>IboW~p28^B;f4On<V@`5RB;f0928}Hu6
z+0<l~f{XV<l)Wb&%L||Db0J=@qa<2XBGxYHa#@Cm1NlTEVQSzxP$LE_#&d|V;F!M%
zT<`*!)W&n}1`ixDAs#{M(i}k#3RXs2lRo;7NCO8Wgyg|xc(_hYncm8Cw;QoEUP_+$
z!i%Fq9vf2m;ACKqf8fm891F0_!$Z%O3t4OO@LnVXTi$)vH+TiZ+@^pe@6%UnbhsdI
zHeR5?`|fw{f;`P*0zoUWd1l9l_plauS{EvhOjH7PwjECr)5XJ^J7K)$@jv=Nu_&@I
zToRf1B~Q^6ZqBR=@bVYDv+_fkCm!;A(wytJ_gsKMJkt8&oy7L@l0TnQ(mcj_B;S|3
zw4hHG2U5>)U+F_qD!|ET$aCNLPrOuIh)f!e`+`UQE@?^=#3D~|=;vh;cx}{HwN@iC
zb60@*<;WhtZLMU9cwGs|Jji5AB~C?@a9tBleu`evRRhcNU)+c8BDLwW@QigPAn;5e
zx4U^Unl}3oPe9{!#6xZ}-fFS&;E1=@nr-Idbrj*bqOtIzixf5;@|@~EmUv(VxbmpH
zsBW5+mCnkF0^ITP43A2wmj|#4)T+uD#_Pa2(^o<wfh>YlNbox4Rh|TDfjOsX<Mqg^
zJf@s@EYim7Hh1IA%(*?UFB<O7)0(j9qaeR(QEhK>erdpHvRB)Z>*%{GKwFj!7&jiY
z+co%})x}vfjh8Ersc~c5<hz#VvTho1<xx$7i|~2`@^*3GJg>ZBe2AVbZ;!{$VB<~N
ziLe)m8D5{jb;_AOH?!=`fgQ;V?>m9g=a4sMwj?XBBWG+qk&VU!=d-7dF@1!k*@as~
zl+X^`l3nHD$7UeB;z98i_6hn0F7d#DJZ)6S8x~ScV8cxUSiY#%53Nz=9pkaUP)Sl4
z6+J;t6tA)D&`YXv^LSns>Z@txIgju3kY}Hptj^-`F@nQ+73VxQ7l|ML5GxL#%q~t6
z!BuOqS?iw)YDxS|pt{lyxVyZI{8)O>M;C8e?JFd^Bkd^f*0N*L>o%4;zG3aVvVHc`
z>by;!Teqtf_+H>9F9h$W$zoB9U4|?A?h4-ttOJ7`#j{4uTi^5Mzq=jp1m5L=Q&TCv
z<6SDR`mhf4{JY8vIIpy|g09`Gy}Gu}uLTx)flj5aqC7BtTR`@_0(^BASVpuVcRcy<
znZBZMcmb5WYCr+9W7y-i_^x~e{C-$DNM6M`)m8rNN_|k}ak>3zf7prW@z!=F-iFfw
zHVuFgYo{OO3&8Z_W*KT2Lb#utkt6~d?@?8y^9}BvoxA&bLc8Bl))k(R_xlfj(Tfwa
zhnu?e%K_)0i6=L{;<1_Kl;(-WoQ3S=wefxeJ%=F&N1$%g_q(~I0S615a_u}NYdIt>
z2M)Thw(%6;dh^Nm${P;AGiNJ5dAN^>_sV<XCk}czPd-c&24N*WOim|j>He0-Sv+UT
zKY1L2`)!@mz=a1;7r!^1?GFz;cSbY3Ln75x;0@0i@Q@o=#j!edK-zn?@_v`LK6#i4
fcGm_O_}`beZ-3U|uX$zqK6v3XDgS%@p$Gl}jD}Fi

literal 0
HcmV?d00001

diff --git a/res/mai.data.pal b/res/mai.data.pal
new file mode 100644
index 0000000000000000000000000000000000000000..2096178803f669d81cc8fc9e5a09197ebc02564e
GIT binary patch
literal 48
zcmV-00MGvbfB-BF06P!|p#T8P008~~0B|lA!AeP;TS0$*n*U^DzlB(+rM%vzY5mB7
G|NsAHofPT-

literal 0
HcmV?d00001

diff --git a/vdp1/normal_sprite.cpp b/vdp1/normal_sprite.cpp
new file mode 100644
index 0000000..fdc9461
--- /dev/null
+++ b/vdp1/normal_sprite.cpp
@@ -0,0 +1,159 @@
+#include <stdint.h>
+#include "vdp2.h"
+#include "vdp1.h"
+
+extern void * _mai_data_pal_start __asm("_binary_res_mai_data_pal_start");
+extern void * _mai_data_pal_size __asm("_binary_res_mai_data_pal_size");
+
+extern void * _mai_data_start __asm("_binary_res_mai_data_start");
+extern void * _mai_data_size __asm("_binary_res_mai_data_size");
+
+inline constexpr uint16_t rgb15(const uint8_t * rgb24)
+{
+  return ((rgb24[2] >> 3) << 10) // blue
+       | ((rgb24[1] >> 3) << 5)  // green
+       | ((rgb24[0] >> 3) << 0); // red
+}
+
+uint32_t color_lookup_table(const uint32_t top)
+{
+  const uint32_t buf_size = reinterpret_cast<uint32_t>(&_mai_data_pal_size);
+  if (buf_size != (0x20 * 3 / 2)) while (1); // halt if buf_size is incorrect
+
+  const uint8_t * buf = reinterpret_cast<uint8_t*>(&_mai_data_pal_start);
+
+  // "The size of a color lookup table is 20H (32) bytes"
+  // (assume top is already aligned to 0x20)
+  const uint32_t table_address = top - 0x20;
+
+  // "The color lookup table defines the respective color codes of 16 colors in
+  // VRAM as 16-bit data"
+  uint16_t * table = &vdp1.vram.u16[(table_address / 2)];
+
+  uint32_t buf_ix = 0;
+  for (uint32_t i = 0; i < (buf_size / 3); i++) {
+    // there is a typo in "5.2 Color Lookup Tables" "If RGB code, MSB = 0"
+    // should be "MSB = 1". The "MSB = 0" claim is correctly contradicted later.
+
+    table[i] = 1 << 15 | rgb15(&buf[buf_ix]);
+    // _mai_data_pal is rgb24, 3 bytes per color
+    buf_ix += 3;
+  }
+
+  return table_address;
+}
+
+uint32_t character_pattern_table(const uint32_t top)
+{
+  const uint32_t buf_size = reinterpret_cast<uint32_t>(&_mai_data_size);
+  const uint32_t * buf = reinterpret_cast<uint32_t*>(&_mai_data_start);
+
+  // Unlike vdp2 cell format, vdp1 sprites appear to be much more dimensionally
+  // flexible. The data is interpreted as a row-major packed array, where the
+  // row/horizontal stride is equal to the sprite width (as configured in the
+  // draw command). This is identical to how the input palette index data is
+  // structured, so there is no transformation to do here, only a plain memory
+  // copy.
+
+  // Divide `buf_size` by two because this converts (indexed color) 8 bit pixels
+  // to 4 bit pixels. Round up to the nearest 0x20 (for an 8000 pixel/8000 byte
+  // image, this rounding is a no-op).
+  const uint32_t table_size = ((buf_size / 2) + 0x20 - 1) & (-0x20);
+  const uint32_t table_address = top - table_size;
+  uint16_t * table = &vdp1.vram.u16[(table_address / 2)];
+
+  // `table_size` is in bytes; divide by two to get uint16_t indicies.
+  uint32_t buf_ix = 0;
+  for (uint32_t table_ix = 0; table_ix < (table_size / 2); table_ix++) {
+    uint32_t tmp = buf[buf_ix];
+    table[table_ix] = (((tmp >> 24) & 0xf) << 12)
+                    | (((tmp >> 16) & 0xf) << 8 )
+                    | (((tmp >> 8 ) & 0xf) << 4 )
+                    | (((tmp >> 0 ) & 0xf) << 0 );
+    buf_ix += 1;
+  }
+
+  return table_address;
+}
+
+void main()
+{
+  uint32_t color_address, character_address;
+  uint32_t top = (sizeof (union vdp1_vram));
+  top = color_address     = color_lookup_table(top);
+  top = character_address = character_pattern_table(top);
+
+  // DISP: Please make sure to change this bit from 0 to 1 during V blank.
+  vdp2.reg.TVMD = ( TVMD__DISP | TVMD__LSMD__NON_INTERLACE
+                  | TVMD__VRESO__240 | TVMD__HRESO__NORMAL_320);
+
+  // VDP2 User's Manual:
+  // "When sprite data is in an RGB format, sprite register 0 is selected"
+  // "When the value of a priority number is 0h, it is read as transparent"
+  //
+  // From a VDP2 perspective: in VDP1 16-color lookup table mode, VDP1 is still
+  // sending RGB data to VDP2. This sprite color data as configured in
+  // `color_lookup_table` from a VDP2 priority perspective uses sprite register 0.
+  //
+  // The power-on value of PRISA is zero. Set the priority for sprite register 0
+  // to some number greater than zero, so that the color data is not interpreted
+  // as "transparent".
+  vdp2.reg.PRISA = PRISA__S0PRIN(1); // Sprite register 0 PRIority Number
+
+  /* TVM settings must be performed from the second H-blank IN interrupt after the
+  V-blank IN interrupt to the H-blank IN interrupt immediately after the V-blank
+  OUT interrupt. */
+  // "normal" display resolution, 16 bits per pixel, 512x256 framebuffer
+  vdp1.reg.TVMR = TVMR__TVM__NORMAL;
+
+  // swap framebuffers every 1 cycle; non-interlace
+  vdp1.reg.FBCR = 0;
+
+  // during a framebuffer erase cycle, write the color "black" to each pixel
+  constexpr uint16_t black = 0x0000;
+  vdp1.reg.EWDR = black;
+
+  // the EWLR/EWRR macros use somewhat nontrivial math for the X coordinates
+  // erase upper-left coordinate
+  vdp1.reg.EWLR = EWLR__16BPP_X1(0) | EWLR__Y1(0);
+
+  // erase lower-right coordinate
+  vdp1.reg.EWRR = EWRR__16BPP_X3(319) | EWRR__Y3(239);
+
+  vdp1.vram.cmd[0].CTRL = CTRL__JP__JUMP_NEXT | CTRL__COMM__SYSTEM_CLIP_COORDINATES;
+  vdp1.vram.cmd[0].LINK = 0;
+  vdp1.vram.cmd[0].XC = 319;
+  vdp1.vram.cmd[0].YC = 239;
+
+  vdp1.vram.cmd[1].CTRL = CTRL__JP__JUMP_NEXT | CTRL__COMM__LOCAL_COORDINATE;
+  vdp1.vram.cmd[1].LINK = 0;
+  vdp1.vram.cmd[1].XA = 0;
+  vdp1.vram.cmd[1].YA = 0;
+
+  vdp1.vram.cmd[2].CTRL = CTRL__JP__JUMP_NEXT | CTRL__COMM__NORMAL_SPRITE;
+  vdp1.vram.cmd[2].LINK = 0;
+  // The "end code" is 0xf, which is being used in the mai sprite palette. If
+  // both transparency and end codes are enabled, it seems there are only 14
+  // usable colors in the 4-bit color mode.
+  vdp1.vram.cmd[2].PMOD = PMOD__ECD | PMOD__COLOR_MODE__LOOKUP_TABLE_16;
+  // It appears Kronos does not correctly calculate the color address in the
+  // VDP1 debugger. Kronos will report FFFC when the actual color table address
+  // in this example is 7FFE0.
+  vdp1.vram.cmd[2].COLR = color_address >> 3; // non-palettized (rgb15) color data
+  vdp1.vram.cmd[2].SRCA = character_address >> 3;
+  vdp1.vram.cmd[2].SIZE = SIZE__X(80) | SIZE__Y(100);
+  vdp1.vram.cmd[2].XA = 100;
+  vdp1.vram.cmd[2].YA = 100;
+
+  vdp1.vram.cmd[3].CTRL = CTRL__END;
+
+  // start drawing (execute the command list) on every frame
+  vdp1.reg.PTMR = PTMR__PTM__FRAME_CHANGE;
+}
+
+extern "C"
+void start(void)
+{
+  main();
+  while (1) {}
+}
diff --git a/vdp2/nbg0.cpp b/vdp2/nbg0.cpp
index 308b8e8..0701e62 100644
--- a/vdp2/nbg0.cpp
+++ b/vdp2/nbg0.cpp
@@ -20,9 +20,9 @@ extern void * _butterfly_data_size __asm("_binary_res_butterfly_data_size");
 
 inline constexpr uint16_t rgb15(const uint8_t * buf)
 {
-  return ((buf[2] >> 3) << 10)
-       | ((buf[1] >> 3) << 5)
-       | ((buf[0] >> 3) << 0);
+  return ((buf[2] >> 3) << 10) // blue
+       | ((buf[1] >> 3) << 5)  // green
+       | ((buf[0] >> 3) << 0); // red
 }
 
 void palette_data()