diff --git a/.gitignore b/.gitignore
index 5e7a2b1..37473f1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -19,3 +19,5 @@ verbatim/*.svg
 verbatim/*.pdf
 verbatim/output
 images/*.data
+/index*.svg
+diagrams/*-.svg
\ No newline at end of file
diff --git a/build.sh b/build.sh
index 8492761..427b17b 100644
--- a/build.sh
+++ b/build.sh
@@ -15,10 +15,17 @@ echo 'figure.figure { margin-left: 20px; margin-right: 20px;  }' >> index.css
 echo 'pre.verbatim { font-size: 0.9em; }' >> index.css
 sed -i 's|color-scheme: light dark;||g' index.css
 echo 'figcaption.caption { margin-bottom: 1.3em; margin-top: 0.3em; }' >> index.css
+echo '.cmti-10 { font-style: italic; }' >> index.css
 
-sed -i 's/index.css/index2.css/g' index.html
+sed -i 's/˜/~/g' index.html
 
-mv index.css index2.css
+sed -i "s|<p class='noindent'><object class='graphics' data='diagrams/z_operations.svg' name='picture diagrams/z_operations' type='image/svg+xml'></object>|<p class='noindent' style='text-align: center;'><object class='graphics' style='width: 40em;' data='diagrams/z_operations.svg' name='picture diagrams/z_operations' type='image/svg+xml'></object>|g" index.html
+
+sed -i '/height: 2.5em;/d' index.css
+
+sed -i 's/index.css/index3.css/g' index.html
+
+mv index.css index3.css
 
 python replace_video.py index.html
 
diff --git a/diagrams/build.sh b/diagrams/build.sh
new file mode 100644
index 0000000..ff7f228
--- /dev/null
+++ b/diagrams/build.sh
@@ -0,0 +1,3 @@
+dot -Tsvg z_operations.dot > z_operations.svg
+
+#sed -i 's/scale(1 1)/scale(0.75 0.75)/g' z_operations.svg
diff --git a/diagrams/resize_dot_svg.py b/diagrams/resize_dot_svg.py
new file mode 100644
index 0000000..edf8eaf
--- /dev/null
+++ b/diagrams/resize_dot_svg.py
@@ -0,0 +1,31 @@
+import sys
+
+scale = 0.75
+
+def scale_svg(lines):
+    svg = "".join(lines)
+    head, viewbox = svg.split("viewBox=\"", maxsplit=1)
+    viewbox, tail = viewbox.split('"', maxsplit=1)
+    x, y, width, height = map(float, viewbox.split())
+    yield head
+    yield f'viewBox="{x} {y} {width * scale} {height * scale}"'
+    yield tail
+
+def transform():
+    with open(sys.argv[1]) as f:
+        svg_lines = []
+
+        for line in f.readlines():
+            if line.strip().startswith("<svg"):
+                svg_lines.append(line)
+            elif svg_lines != []:
+                svg_lines.append(line)
+                if line.strip().endswith(">"):
+                    yield from scale_svg(svg_lines)
+                    svg_lines = []
+            else:
+                yield line
+
+lines = list(transform())
+with open(sys.argv[1], 'w') as f:
+    f.write(''.join(lines))
diff --git a/diagrams/sin_clamp.pdf b/diagrams/sin_clamp.pdf
new file mode 100644
index 0000000..57006be
Binary files /dev/null and b/diagrams/sin_clamp.pdf differ
diff --git a/diagrams/sin_clamp.tex b/diagrams/sin_clamp.tex
new file mode 100644
index 0000000..6ad7efa
--- /dev/null
+++ b/diagrams/sin_clamp.tex
@@ -0,0 +1,34 @@
+\documentclass[varwidth=13.1cm, border={0.0cm 0.0cm 0.0cm 0.0cm}]{standalone}
+\usepackage{tikz}
+\usepackage[dvipsnames]{xcolor}
+\usepackage{pgfplots}
+\pgfplotsset{compat=1.18}
+\usepackage{amsmath}
+\newcommand{\Clamp}[1]{\operatorname{clamp}#1}
+
+\begin{document}
+
+\begin{tikzpicture}[scale=0.5]
+
+  \draw[very thin,color=gray] (-pi * 3,-pi * 1.2) grid (pi * 3, pi * 1.2);
+  \draw[->] (-3.2*pi,0) -- (3.2*pi,0) node[right] {$x$};
+  \draw[->] (0,-pi * 1.4) -- (0,pi * 1.5) node[above] {$f(x)$};
+
+\draw[thick, color=NavyBlue] plot [domain=-pi * 3:pi * 3, samples=100] (\x, {min(max(\x, -pi), pi)} );
+\draw[thick, color=OrangeRed] plot [domain=-pi * 3:pi * 3, samples=1000] (\x, {sin(min(max(\x, -pi), pi) r)} );
+
+\node[NavyBlue] at (0, -5.4) {$f(x) = \Clamp(x, -\pi, +\pi) $};
+\node[OrangeRed] at (0, -6.6) {$f(x) = \sin(\Clamp(x, -\pi, +\pi)) $};
+
+\draw [dashed, color=ForestGreen] (-2 * pi,-3.8) -- (-2 * pi,3.8) node[above] {$x=-2\pi$};
+\draw [dashed, color=Brown] (2 * pi,-3.8) -- (2 * pi,3.8) node[above] {$x=2\pi$};
+
+\draw [dashed, color=Fuchsia] (-3.0,pi) -- (3.0,pi) ;
+\draw [color=Fuchsia] (0, pi + 0.5) node {$y=\pi$};
+
+\draw [dashed, color=Peach] (-3.0,-pi) -- (3.0,-pi) ;
+\draw [color=Peach] (0, -pi + 0.5) node {$y=-\pi$};
+
+\end{tikzpicture}
+
+\end{document}
diff --git a/diagrams/sin_frac.pdf b/diagrams/sin_frac.pdf
new file mode 100644
index 0000000..46528b3
Binary files /dev/null and b/diagrams/sin_frac.pdf differ
diff --git a/diagrams/sin_frac.tex b/diagrams/sin_frac.tex
new file mode 100644
index 0000000..64101a8
--- /dev/null
+++ b/diagrams/sin_frac.tex
@@ -0,0 +1,38 @@
+\documentclass[varwidth=13.1cm, border={0.0cm 0.0cm 0.0cm 0.0cm}]{standalone}
+\usepackage{tikz}
+\usepackage[dvipsnames]{xcolor}
+\usepackage{pgfplots}
+\pgfplotsset{compat=1.18}
+\usepackage{amsmath}
+\newcommand{\Frac}[1]{\operatorname{frac}#1}
+
+\begin{document}
+
+\begin{tikzpicture}[scale=0.5]
+
+  \draw[very thin,color=gray] (-pi * 3,-pi * 1.2) grid (pi * 3, pi * 1.2);
+  \draw[->] (-3.2*pi,0) -- (3.2*pi,0) node[right] {$x$};
+  \draw[->] (0,-pi * 1.4) -- (0,pi * 1.5) node[above] {$f(x)$};
+
+\foreach \i in {-2, 0, 2}{
+    \pgfmathsetmacro{\start}{(\i - 1) * pi}
+    \pgfmathsetmacro{\end}  {(\i + 1) * pi}
+    \draw[thick, color=NavyBlue] plot [domain=\start:\end, samples=100] (\x, {((\x * 1/(2 * pi) + 0.5) - floor(\x * 1/(2 * pi) + 0.5)) * 2 * pi - pi} );
+}
+\draw[thick, color=OrangeRed] plot [domain=-pi * 3:pi * 3, samples=1000] (\x, {sin((((\x * 1/(2 * pi) + 0.5) - floor(\x * 1/(2 * pi) + 0.5)) * 2 * pi - pi) r)} );
+
+\node[NavyBlue] at (0, -5.4) {$f(x) = \Frac(x \cdot \frac{1}{2\pi}+0.5) \cdot 2\pi - \pi $};
+\node[OrangeRed] at (0, -6.6) {$f(x) = \sin( \Frac(x \cdot \frac{1}{2\pi}+0.5) \cdot 2\pi - \pi ) $};
+
+\draw [dashed, color=ForestGreen] (-2 * pi,-3.8) -- (-2 * pi,3.8) node[above] {$x=-2\pi$};
+\draw [dashed, color=Brown] (2 * pi,-3.8) -- (2 * pi,3.8) node[above] {$x=2\pi$};
+
+\draw [dashed, color=Fuchsia] (-3.0,pi) -- (3.0,pi) ;
+\draw [color=Fuchsia] (0, pi + 0.5) node {$y=\pi$};
+
+\draw [dashed, color=Peach] (-3.0,-pi) -- (3.0,-pi) ;
+\draw [color=Peach] (0, -pi + 0.5) node {$y=-\pi$};
+
+\end{tikzpicture}
+
+\end{document}
diff --git a/diagrams/z_operations.dot b/diagrams/z_operations.dot
new file mode 100644
index 0000000..f8d1e99
--- /dev/null
+++ b/diagrams/z_operations.dot
@@ -0,0 +1,60 @@
+digraph G {
+
+  vertex_shader [label="(from the vertex shader)"]
+
+  subgraph cluster_clipping {
+    label = "clipping"
+    DX_CLIP_SPACE_DEF [label="DX_CLIP_SPACE_DEF
+possibly clip the polygon"]
+  }
+
+  subgraph cluster_perspective {
+    label = "perspective division"
+
+    VTX_Z_FMT [nojustify=true label="VTX_Z_FMT
+(if enabled) divide Z by W"]
+  }
+
+  subgraph cluster_viewport_transformation {
+    label = "viewport transformation"
+
+    VPORT_Z_SCALE
+    VPORT_Z_OFFSET
+  }
+
+  subgraph cluster_geometry_assembly {
+  }
+
+  subgraph cluster_setup_unit {
+    label = "setup unit"
+
+    SU_DEPTH_SCALE
+    SU_DEPTH_OFFSET
+  }
+
+  subgraph cluster_zfunc {
+    label = "ZFUNC"
+    { rank=same
+    depth_test [shape=box label="depth test"]
+    depth_pass [shape=box label="depth pass"]
+    }
+    depth_test -> depth_pass
+  }
+
+  Z_BUFFER [shape=invhouse label="(write the new Z
+value to the Z-buffer)"]
+
+  fragment_shader [label="(to the fragment shader)"]
+
+  vertex_shader -> DX_CLIP_SPACE_DEF
+  DX_CLIP_SPACE_DEF -> VTX_Z_FMT
+  VTX_Z_FMT -> VPORT_Z_SCALE
+  VPORT_Z_SCALE -> VPORT_Z_OFFSET
+  VPORT_Z_OFFSET -> SU_DEPTH_SCALE
+  SU_DEPTH_SCALE -> SU_DEPTH_OFFSET
+  SU_DEPTH_OFFSET -> depth_test
+  depth_test -> Z_BUFFER
+
+  VPORT_Z_OFFSET -> depth_pass
+  depth_pass -> fragment_shader
+}
\ No newline at end of file
diff --git a/diagrams/z_operations.svg b/diagrams/z_operations.svg
new file mode 100644
index 0000000..1da76d6
--- /dev/null
+++ b/diagrams/z_operations.svg
@@ -0,0 +1,173 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
+ "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<!-- Generated by graphviz version 12.2.1 (20241206.2353)
+ -->
+<!-- Title: G Pages: 1 -->
+<svg width="588pt" height="765pt"
+ viewBox="0.00 0.00 588.26 764.75" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 760.75)">
+<title>G</title>
+<polygon fill="white" stroke="none" points="-4,4 -4,-760.75 584.26,-760.75 584.26,4 -4,4"/>
+<g id="clust1" class="cluster">
+<title>cluster_clipping</title>
+<polygon fill="none" stroke="black" points="94.4,-611.4 94.4,-712.75 382.4,-712.75 382.4,-611.4 94.4,-611.4"/>
+<text text-anchor="middle" x="238.4" y="-695.45" font-family="Times,serif" font-size="14.00">clipping</text>
+</g>
+<g id="clust2" class="cluster">
+<title>cluster_perspective</title>
+<polygon fill="none" stroke="black" points="89.4,-502.04 89.4,-603.4 387.4,-603.4 387.4,-502.04 89.4,-502.04"/>
+<text text-anchor="middle" x="238.4" y="-586.1" font-family="Times,serif" font-size="14.00">perspective division</text>
+</g>
+<g id="clust3" class="cluster">
+<title>cluster_viewport_transformation</title>
+<polygon fill="none" stroke="black" points="125.4,-344.79 125.4,-494.04 351.4,-494.04 351.4,-344.79 125.4,-344.79"/>
+<text text-anchor="middle" x="238.4" y="-476.74" font-family="Times,serif" font-size="14.00">viewport transformation</text>
+</g>
+<g id="clust5" class="cluster">
+<title>cluster_setup_unit</title>
+<polygon fill="none" stroke="black" points="101.4,-187.54 101.4,-336.79 347.4,-336.79 347.4,-187.54 101.4,-187.54"/>
+<text text-anchor="middle" x="224.4" y="-319.49" font-family="Times,serif" font-size="14.00">setup unit</text>
+</g>
+<g id="clust6" class="cluster">
+<title>cluster_zfunc</title>
+<polygon fill="none" stroke="black" points="187.4,-102.29 187.4,-179.54 405.4,-179.54 405.4,-102.29 187.4,-102.29"/>
+<text text-anchor="middle" x="296.4" y="-162.24" font-family="Times,serif" font-size="14.00">ZFUNC</text>
+</g>
+<!-- vertex_shader -->
+<g id="node1" class="node">
+<title>vertex_shader</title>
+<ellipse fill="none" stroke="black" cx="238.4" cy="-738.75" rx="134.33" ry="18"/>
+<text text-anchor="middle" x="238.4" y="-734.08" font-family="Times,serif" font-size="14.00">(from the vertex shader)</text>
+</g>
+<!-- DX_CLIP_SPACE_DEF -->
+<g id="node2" class="node">
+<title>DX_CLIP_SPACE_DEF</title>
+<ellipse fill="none" stroke="black" cx="238.4" cy="-649.45" rx="136.47" ry="30.05"/>
+<text text-anchor="middle" x="238.4" y="-653.4" font-family="Times,serif" font-size="14.00">DX_CLIP_SPACE_DEF</text>
+<text text-anchor="middle" x="238.4" y="-636.15" font-family="Times,serif" font-size="14.00">possibly clip the polygon</text>
+</g>
+<!-- vertex_shader&#45;&gt;DX_CLIP_SPACE_DEF -->
+<g id="edge2" class="edge">
+<title>vertex_shader&#45;&gt;DX_CLIP_SPACE_DEF</title>
+<path fill="none" stroke="black" d="M238.4,-720.5C238.4,-712.05 238.4,-701.49 238.4,-691.15"/>
+<polygon fill="black" stroke="black" points="241.9,-691.21 238.4,-681.21 234.9,-691.21 241.9,-691.21"/>
+</g>
+<!-- VTX_Z_FMT -->
+<g id="node3" class="node">
+<title>VTX_Z_FMT</title>
+<ellipse fill="none" stroke="black" cx="238.4" cy="-540.09" rx="141.24" ry="30.05"/>
+<text text-anchor="middle" x="238.4" y="-544.04" font-family="Times,serif" font-size="14.00">VTX_Z_FMT</text>
+<text text-anchor="middle" x="238.4" y="-526.79" font-family="Times,serif" font-size="14.00">(if enabled) divide Z by W</text>
+</g>
+<!-- DX_CLIP_SPACE_DEF&#45;&gt;VTX_Z_FMT -->
+<g id="edge3" class="edge">
+<title>DX_CLIP_SPACE_DEF&#45;&gt;VTX_Z_FMT</title>
+<path fill="none" stroke="black" d="M238.4,-619.11C238.4,-607.63 238.4,-594.27 238.4,-581.88"/>
+<polygon fill="black" stroke="black" points="241.9,-581.91 238.4,-571.91 234.9,-581.91 241.9,-581.91"/>
+</g>
+<!-- VPORT_Z_SCALE -->
+<g id="node4" class="node">
+<title>VPORT_Z_SCALE</title>
+<ellipse fill="none" stroke="black" cx="238.4" cy="-442.79" rx="97.51" ry="18"/>
+<text text-anchor="middle" x="238.4" y="-438.12" font-family="Times,serif" font-size="14.00">VPORT_Z_SCALE</text>
+</g>
+<!-- VTX_Z_FMT&#45;&gt;VPORT_Z_SCALE -->
+<g id="edge4" class="edge">
+<title>VTX_Z_FMT&#45;&gt;VPORT_Z_SCALE</title>
+<path fill="none" stroke="black" d="M238.4,-509.72C238.4,-497.95 238.4,-484.43 238.4,-472.7"/>
+<polygon fill="black" stroke="black" points="241.9,-472.8 238.4,-462.8 234.9,-472.8 241.9,-472.8"/>
+</g>
+<!-- VPORT_Z_OFFSET -->
+<g id="node5" class="node">
+<title>VPORT_Z_OFFSET</title>
+<ellipse fill="none" stroke="black" cx="238.4" cy="-370.79" rx="104.87" ry="18"/>
+<text text-anchor="middle" x="238.4" y="-366.12" font-family="Times,serif" font-size="14.00">VPORT_Z_OFFSET</text>
+</g>
+<!-- VPORT_Z_SCALE&#45;&gt;VPORT_Z_OFFSET -->
+<g id="edge5" class="edge">
+<title>VPORT_Z_SCALE&#45;&gt;VPORT_Z_OFFSET</title>
+<path fill="none" stroke="black" d="M238.4,-424.49C238.4,-417.2 238.4,-408.52 238.4,-400.33"/>
+<polygon fill="black" stroke="black" points="241.9,-400.41 238.4,-390.41 234.9,-400.41 241.9,-400.41"/>
+</g>
+<!-- SU_DEPTH_SCALE -->
+<g id="node6" class="node">
+<title>SU_DEPTH_SCALE</title>
+<ellipse fill="none" stroke="black" cx="227.4" cy="-285.54" rx="107.5" ry="18"/>
+<text text-anchor="middle" x="227.4" y="-280.87" font-family="Times,serif" font-size="14.00">SU_DEPTH_SCALE</text>
+</g>
+<!-- VPORT_Z_OFFSET&#45;&gt;SU_DEPTH_SCALE -->
+<g id="edge6" class="edge">
+<title>VPORT_Z_OFFSET&#45;&gt;SU_DEPTH_SCALE</title>
+<path fill="none" stroke="black" d="M236.12,-352.54C234.69,-341.72 232.81,-327.49 231.16,-315.02"/>
+<polygon fill="black" stroke="black" points="234.65,-314.73 229.87,-305.27 227.72,-315.64 234.65,-314.73"/>
+</g>
+<!-- depth_pass -->
+<g id="node9" class="node">
+<title>depth_pass</title>
+<polygon fill="none" stroke="black" points="397.4,-146.29 303.4,-146.29 303.4,-110.29 397.4,-110.29 397.4,-146.29"/>
+<text text-anchor="middle" x="350.4" y="-123.62" font-family="Times,serif" font-size="14.00">depth pass</text>
+</g>
+<!-- VPORT_Z_OFFSET&#45;&gt;depth_pass -->
+<g id="edge10" class="edge">
+<title>VPORT_Z_OFFSET&#45;&gt;depth_pass</title>
+<path fill="none" stroke="black" d="M321.07,-359.37C332.91,-354.32 343.68,-347.09 351.4,-336.79 390.89,-284.12 373.49,-200.91 360.3,-157.6"/>
+<polygon fill="black" stroke="black" points="363.67,-156.65 357.29,-148.19 357,-158.78 363.67,-156.65"/>
+</g>
+<!-- SU_DEPTH_OFFSET -->
+<g id="node7" class="node">
+<title>SU_DEPTH_OFFSET</title>
+<ellipse fill="none" stroke="black" cx="224.4" cy="-213.54" rx="114.87" ry="18"/>
+<text text-anchor="middle" x="224.4" y="-208.87" font-family="Times,serif" font-size="14.00">SU_DEPTH_OFFSET</text>
+</g>
+<!-- SU_DEPTH_SCALE&#45;&gt;SU_DEPTH_OFFSET -->
+<g id="edge7" class="edge">
+<title>SU_DEPTH_SCALE&#45;&gt;SU_DEPTH_OFFSET</title>
+<path fill="none" stroke="black" d="M226.66,-267.24C226.35,-259.95 225.97,-251.27 225.62,-243.08"/>
+<polygon fill="black" stroke="black" points="229.12,-243 225.2,-233.16 222.13,-243.3 229.12,-243"/>
+</g>
+<!-- depth_test -->
+<g id="node8" class="node">
+<title>depth_test</title>
+<polygon fill="none" stroke="black" points="285.15,-146.29 195.65,-146.29 195.65,-110.29 285.15,-110.29 285.15,-146.29"/>
+<text text-anchor="middle" x="240.4" y="-123.62" font-family="Times,serif" font-size="14.00">depth test</text>
+</g>
+<!-- SU_DEPTH_OFFSET&#45;&gt;depth_test -->
+<g id="edge8" class="edge">
+<title>SU_DEPTH_OFFSET&#45;&gt;depth_test</title>
+<path fill="none" stroke="black" d="M227.72,-195.29C229.8,-184.47 232.53,-170.24 234.93,-157.77"/>
+<polygon fill="black" stroke="black" points="238.35,-158.49 236.8,-148.01 231.48,-157.17 238.35,-158.49"/>
+</g>
+<!-- depth_test&#45;&gt;depth_pass -->
+<g id="edge1" class="edge">
+<title>depth_test&#45;&gt;depth_pass</title>
+<path fill="none" stroke="black" d="M285.52,-128.29C287.52,-128.29 289.53,-128.29 291.53,-128.29"/>
+<polygon fill="black" stroke="black" points="291.49,-131.79 301.49,-128.29 291.49,-124.79 291.49,-131.79"/>
+</g>
+<!-- Z_BUFFER -->
+<g id="node10" class="node">
+<title>Z_BUFFER</title>
+<polygon fill="none" stroke="black" points="0,-25.67 146.4,0 292.8,-25.67 292.66,-67.2 0.14,-67.2 0,-25.67"/>
+<text text-anchor="middle" x="146.4" y="-41.1" font-family="Times,serif" font-size="14.00">(write the new Z</text>
+<text text-anchor="middle" x="146.4" y="-23.85" font-family="Times,serif" font-size="14.00">value to the Z&#45;buffer)</text>
+</g>
+<!-- depth_test&#45;&gt;Z_BUFFER -->
+<g id="edge9" class="edge">
+<title>depth_test&#45;&gt;Z_BUFFER</title>
+<path fill="none" stroke="black" d="M222.28,-110.1C211.86,-100.23 198.36,-87.42 185.63,-75.35"/>
+<polygon fill="black" stroke="black" points="188.19,-72.95 178.52,-68.61 183.37,-78.03 188.19,-72.95"/>
+</g>
+<!-- fragment_shader -->
+<g id="node11" class="node">
+<title>fragment_shader</title>
+<ellipse fill="none" stroke="black" cx="445.4" cy="-37.15" rx="134.86" ry="18"/>
+<text text-anchor="middle" x="445.4" y="-32.47" font-family="Times,serif" font-size="14.00">(to the fragment shader)</text>
+</g>
+<!-- depth_pass&#45;&gt;fragment_shader -->
+<g id="edge11" class="edge">
+<title>depth_pass&#45;&gt;fragment_shader</title>
+<path fill="none" stroke="black" d="M368.72,-110.1C383.04,-96.66 403.15,-77.79 419.18,-62.75"/>
+<polygon fill="black" stroke="black" points="421.14,-65.71 426.04,-56.32 416.35,-60.61 421.14,-65.71"/>
+</g>
+</g>
+</svg>
diff --git a/images/cube_scene.png b/images/cube_scene.png
new file mode 100644
index 0000000..969039d
Binary files /dev/null and b/images/cube_scene.png differ
diff --git a/images/plane_scene.png b/images/plane_scene.png
new file mode 100644
index 0000000..8bec1b4
Binary files /dev/null and b/images/plane_scene.png differ
diff --git a/images/z_buffer_clipped.png b/images/z_buffer_clipped.png
new file mode 100644
index 0000000..7b7a175
Binary files /dev/null and b/images/z_buffer_clipped.png differ
diff --git a/images/z_buffer_cube.png b/images/z_buffer_cube.png
new file mode 100644
index 0000000..385bf90
Binary files /dev/null and b/images/z_buffer_cube.png differ
diff --git a/images/z_buffer_cube_range.png b/images/z_buffer_cube_range.png
new file mode 100644
index 0000000..8ddb642
Binary files /dev/null and b/images/z_buffer_cube_range.png differ
diff --git a/images/z_buffer_cube_range_back.png b/images/z_buffer_cube_range_back.png
new file mode 100644
index 0000000..327cded
Binary files /dev/null and b/images/z_buffer_cube_range_back.png differ
diff --git a/images/z_buffer_gradient.png b/images/z_buffer_gradient.png
new file mode 100644
index 0000000..3fc177d
Binary files /dev/null and b/images/z_buffer_gradient.png differ
diff --git a/images/z_buffer_overflow.png b/images/z_buffer_overflow.png
new file mode 100644
index 0000000..e238e1e
Binary files /dev/null and b/images/z_buffer_overflow.png differ
diff --git a/images/z_buffer_perspective.png b/images/z_buffer_perspective.png
new file mode 100644
index 0000000..d5ba56e
Binary files /dev/null and b/images/z_buffer_perspective.png differ
diff --git a/images/z_buffer_perspective_scale.png b/images/z_buffer_perspective_scale.png
new file mode 100644
index 0000000..93108bb
Binary files /dev/null and b/images/z_buffer_perspective_scale.png differ
diff --git a/index.tex b/index.tex
index ded3477..07f79e1 100644
--- a/index.tex
+++ b/index.tex
@@ -1,5 +1,6 @@
 \documentclass[20pt]{article}
 
+\usepackage{amsmath}
 \usepackage[font=small,labelfont=bf]{caption}
 \usepackage{hyperref}
 \hypersetup{
@@ -15,6 +16,7 @@
 \graphicspath{ {./images/} }
 
 \usepackage{minted}
+\usepackage{nicefrac}
 
 \title{Radeon R500}
 \date{}
@@ -28,9 +30,9 @@
 
 \section{Introduction}
 
-The primary/minimal project goal is "draw a triangle on a Radeon R500 via direct
-memory-mapped hardware register and texture memory accesses". This means no
-\href{https://mesa3d.org/}{Mesa}, no
+The primary/minimal project goal is ``draw a triangle on a Radeon R500 via
+direct memory-mapped hardware register and texture memory accesses''. This means
+no \href{https://mesa3d.org/}{Mesa}, no
 \href{https://github.com/torvalds/linux/tree/v6.12/drivers/gpu/drm/radeon}{radeon}
 kernel module, and certainly no OpenGL or Direct3D.
 
@@ -661,14 +663,45 @@ from scratch. I first implemented the rotation in GLSL:
   \caption*{\texttt{cube\_rotate.vs.glsl}}
 \end{figure}
 
-I verified that the GLSL version worked as expected in OpenGL, then I translated
-the GLSL to R500 vertex shader assembly, as:
+\subsubsection{Remapping shader unit sin/cos operands}
+
+Because this shader program depends on being able to calculate sin and cos, this
+meant I immediately needed to understand how to use the \texttt{ME\_SIN} and
+\texttt{ME\_COS} operations.
+
+The R500 vertex shader ME unit clamps sin/cos operands to the range
+$(-\pi,+\pi)$, as in:
+
+\begin{figure}
+  \href{diagrams/sin_clamp.pdf}{\includegraphics{diagrams/sin_clamp.pdf}}
+\end{figure}
+
+``Remapping'' floating point values from $(-\infty,+\infty)$ to $(-\pi,+\pi)$ is not
+obvious. I was not previously aware of this transformation:
+
+\begin{figure}
+  \href{diagrams/sin_frac.pdf}{\includegraphics{diagrams/sin_frac.pdf}}
+\end{figure}
+
+Or, expressed as R500 vertex shader assembly:
+
+\begin{figure}
+  \href{verbatim/sin_operand_remap.vs.asm}{\includegraphics{verbatim/output/sin_operand_remap.vs.asm.pdf}}
+\end{figure}
+
+\subsubsection{Translation of the GLSL vertex shader to R500 vertex shader assembly}
+
+Having verified that the GLSL version works as expected in OpenGL, and knowing
+how to use the R500 vertex shader sin/cos operations, then I translated the GLSL
+to R500 vertex shader assembly, as:
 
 \begin{figure}
   \href{verbatim/cube_rotate.vs.asm}{\includegraphics{verbatim/output/cube_rotate.vs.asm.pdf}}
   \caption*{\texttt{cube\_rotate.vs.asm}}
 \end{figure}
 
+\subsubsection{Vertex shader assembler/code generator debugging}
+
 However, when I first executed the vertex shader cube rotation demo, I found
 it did not work as expected:
 
@@ -775,8 +808,8 @@ I've written several \href{https://github.com/buhman/scu-dsp-asm}{nice assembler
 for other architectures in the past, but I've never seen any instruction set
 as expressive as R500 fragment shaders.
 
-I attempted to directly reflect this ``multiple tiers of operand argument
-decoding'' in the syntax I invented for fragment shader ALU instructions.
+I attempted to directly represent this ``multiple tiers of operand argument
+decoding'' in my fragment shader ALU instructions syntax.
 
 These instructions are also vector instructions: a total of 24 floating point
 input operands and 8 floating results could be evaluated per instruction.
@@ -902,4 +935,426 @@ except:
 The exponent/mantissa table that shows example 7-bit float values on page 106 of
 \href{doc/R5xx_Acceleration_v1.5.pdf}{R5xx\_Acceleration\_v1.5.pdf} is incorrect.
 
+\section{Progress: 26 Oct 2025}
+
+From 21 Oct 2025 to 26 Oct 2025, I achieved the following (roughly in chronological order):
+
+\begin{itemize}
+\item I \href{https://git.idk.st/bilbo/r500/commit/8594bc4a38f6fcab2ac6e437b46bcf1e0e6d32dd}{rewrote} most of the vertex shader assembler parser/validator, and implemented support for \href{https://git.idk.st/bilbo/r500/commit/f3f1969f4a9b336536f5fb23d246f7103c41e20d}{assembling/disassembling ``dual math'' operations}
+\item I implemented support for \href{https://git.idk.st/bilbo/r500/commit/96d7286e7cd3270b9dca0924d3a046d585d6dc9d}{assembling} and \href{https://git.idk.st/bilbo/r500/commit/27227426eaac265bc3126edd7d017c791640e789}{disassembling} TEX fragment shader instructions
+\item I presented this project (including live demos on real hardware) at
+  a \href{https://itch.io/jam/spoopy-jam-7-heckraiser}{local in-person game jam event}
+\end{itemize}
+
+\subsection{Vertex shader optimization part 1: ``MOV'' elimination}
+
+After talking about it in-person, I decided to try to golf my original
+15-instruction
+\href{https://git.idk.st/bilbo/r500/src/commit/c8ae311e60/drm/cube_rotate.vs.asm}{cube\_rotate.vs.asm} vertex shader.
+
+The first opportunity for optimization is in the first two instructions of:
+
+\begin{figure}
+  \href{verbatim/cube_rotate_const_move.vs.asm}{\includegraphics{verbatim/output/cube_rotate_const_move.vs.asm.pdf}}
+\end{figure}
+
+The \texttt{VE\_ADD} (being used here as a ``MOV'' instruction) is needed
+because there is only a single 128-bit read port into \texttt{const} memory, so
+a multiply-add like this is illegal:
+
+\begin{figure}
+  \href{verbatim/cube_rotate_const_move_illegal.vs.asm}{\includegraphics{verbatim/output/cube_rotate_const_move_illegal.vs.asm.pdf}}
+\end{figure}
+
+I observed that because I never need to reference the last two constants in the
+same instruction that references the first two constants, if I rearrange the
+ordering of the constants to:
+
+\begin{figure}
+  \href{verbatim/cube_rotate_const_move_rearrange.vs.asm}{\includegraphics{verbatim/output/cube_rotate_const_move_rearrange.vs.asm.pdf}}
+\end{figure}
+
+I can then rewrite the multiply-add instructions as:
+
+\begin{figure}
+  \href{verbatim/cube_rotate_const_move_rearrange_mad.vs.asm}{\includegraphics{verbatim/output/cube_rotate_const_move_rearrange_mad.vs.asm.pdf}}
+\end{figure}
+
+\subsection{Vertex shader optimization part 2: ``dual math'' instructions}
+
+I spent an entire day rewriting large portions of the vertex shader assembler to
+add support for ``dual math'' instructions.
+
+The original
+\href{https://git.idk.st/bilbo/r500/src/commit/c8ae311e60/drm/cube_rotate.vs.asm}{cube\_rotate.vs.asm}
+contains this sequence of \texttt{ME_SIN}/\texttt{ME\_COS} instructions:
+
+\begin{figure}
+  \href{verbatim/cube_rotate_sin_cos.vs.asm}{\includegraphics{verbatim/output/cube_rotate_sin_cos.vs.asm.pdf}}
+\end{figure}
+
+The \texttt{temp[3].x} and \texttt{temp[3].y} results are needed immediately,
+but \texttt{temp[3].z} and \texttt{temp[3].w} are not needed until after the
+first pair of \texttt{VE\_MUL}/\texttt{VE\_MAD} operations.
+
+The dual math instruction mode replaces the 3rd \texttt{VE_} instruction operand
+with any \texttt{ME\_} operation, so it is only usable with 2-operand
+\texttt{VE\_} instructions like \texttt{VE\_MUL}.
+
+The dual math encoding also has several restrictions (it only has \nicefrac{1}{4}th the
+control word bits compared to a normal \texttt{ME\_} instruction). A notable
+restriction is that it must write to \texttt{alt\_temp}.
+
+Unlike the fancy things that can be done with fragment shader
+operands/sources/swizzles, a single vertex shader operand can also only read
+from a single 128-bit register, so this means to be able to continue to access
+\texttt{temp[3].zw} as a vector, both \texttt{z} and \texttt{w} must now be
+stored in \texttt{alt\_temp}, even if only one of them was written by a ``dual
+math'' instruction.
+
+The change (and my newly-implemented dual math syntax) is:
+
+\begin{figure}
+  \href{verbatim/cube_rotate_dual_math.vs.asm}{\includegraphics{verbatim/output/cube_rotate_dual_math.vs.asm.pdf}}
+\end{figure}
+
+Where the dual math instruction:
+
+\begin{figure}
+  \href{verbatim/cube_rotate_dual_math_single_instruction.vs.asm}{\includegraphics{verbatim/output/cube_rotate_dual_math_single_instruction.vs.asm.pdf}}
+\end{figure}
+
+Is encoded by the assembler as single instruction and is executed by the vertex
+shader unit in a single clock cycle.
+
+The final
+\href{https://git.idk.st/bilbo/r500/src/commit/c8ae311e60/drm/cube_rotate_optimize.vs.asm}{cube\_rotate\_optimize.vs.asm}
+was reduced from 15 instructions to 13 instructions (compared
+to Mesa's R500 vertex shader compiler's 27 instructions).
+
+\section{Progress: 29 Oct 2025}
+
+From 27 Oct 2025 to 29 Oct 2025, I achieved the following (roughly in chronological order):
+
+\begin{itemize}
+\item I implemented support for \href{https://git.idk.st/bilbo/r500/commit/9aecbbfc6f297ea71c72f4c4fba1b8107be95ca1}{``multiple render targets''} in the fragment shader assembler
+\item I wrote a \href{https://git.idk.st/bilbo/r500/src/commit/18b7a593bd/drm/texture_blur_horizontal.fs.asm}{gaussian blur fragment shader}
+\item I made a demo that draws \href{https://git.idk.st/bilbo/r500/src/commit/18b7a593bd/drm/pumpkin_man.c#L963}{multiple 3D ``objects''} where each object's UV coordinates sample a \href{https://git.idk.st/bilbo/r500/src/commit/18b7a593bd/drm/pumpkin_man.c#L1029-L1069}{different} \href{https://git.idk.st/bilbo/r500/src/commit/18b7a593bd/drm/pumpkin_man.c#L314}{texture}
+\item I did several experiments related to R500's Z-buffer implementation
+\end{itemize}
+
+\subsection{Z-buffer experiments}
+\label{sec:z-buffer-experiments}
+Though I produced a ``properly'' Z-buffered 3D cube demo previously, I felt I
+did not fully understand the relationship between Z coordinates, W coordinates,
+viewport transformations, and the actual values that are written the the
+Z-buffer. At some point, I'd like to write fragment shaders that sample the
+Z-buffer, so I feel I need to understand this more rigorously.
+
+For comparison, Sega Dreamcast stores 32-bit floating-point values in the
+``depth accumulation buffer''. This effectively means that any Z coordinates can
+be stored in the depth accumulation buffer without scaling or range
+remapping. I've made several
+\href{https://az1.idk.st/public/20kdm2-demo.mp4}{moderately fancy} Dreamcast
+demos in that happily store arbitrary ``view space'' Z values in the depth
+accumulation buffer without any visible depth aliasing/artifacts.
+
+In contrast, the Radeon R500 does not have a 32-bit floating point Z-buffer
+format. Instead, R500 supports (\href{doc/R5xx_Acceleration_v1.5.pdf}{R5xx\_Acceleration\_v1.5.pdf}, page 283,
+\texttt{ZB\_FORMAT}):
+
+\begin{itemize}
+\item 16-bit integer Z
+\item 16-bit floating point
+\item 24-bit integer Z with 8-bit stencil
+\end{itemize}
+
+The third option, with the most bits, clearly ought to give the most
+precision--with the caveat that the Z values that are written to the Z-buffer
+should be scaled to be uniformly distributed across the range of 24-bit integers.
+
+I performed several tests with variations of
+\href{https://git.idk.st/bilbo/r500/src/branch/main/drm/zbuffer_test.c}{zbuffer\_test.c}. The
+general strategy was:
+
+\begin{itemize}
+\item Define some contrived/illustrative 3D scene
+\item Manipulate the scale/range of Z and W values
+\item Observe the state of the Z-buffer after rendering
+\end{itemize}
+
+The first scene I chose was of a tilted plane that is non-coplanar with the view
+space XY plane, as in:
+
+\begin{figure}
+  \href{images/plane_scene.png}{\includegraphics{images/plane_scene.png}}
+  \caption*{Blender screenshot, ``plane scene''}
+\end{figure}
+
+Where the grey plane is the object that is to be rendered, the yellow lines
+represent a ``camera'' from which the plane is to be viewed, and the blue line
+represents the view/clip-space Z axis.
+
+To view the content of the Z buffer, I wrote a
+\href{https://git.idk.st/bilbo/r500/src/commit/18b7a593bd/tools/zbuf_decode.py}{simple script}
+to convert the 24-bit integer Z-buffer to 16-bit
+\href{https://en.wikipedia.org/wiki/Netpbm}{PGM},
+so that it can be easily viewed in an image editor. This tool also shows the
+minimum and maximum values found in the Z-buffer, intended to help verify that
+the entire numeric range of the Z-buffer is being used.
+
+While I expected to see the (orthographic, directly facing the camera) plane
+drawn on the Z-buffer as a smooth gradient such as:
+
+\begin{figure}
+  \href{images/z_buffer_gradient.png}{\includegraphics{images/z_buffer_gradient.png}}
+  \caption*{R500 framebuffer capture, \texttt{z\_buffer\_gradient.png}}
+\end{figure}
+
+Several of my tests displayed numeric aliasing, overflows, underflows, etc..:
+
+\begin{figure}
+  \href{images/z_buffer_overflow.png}{\includegraphics{images/z_buffer_overflow.png}}
+  \caption*{R500 framebuffer capture, \texttt{z\_buffer\_overflow.png}}
+\end{figure}
+
+Of particular interest to me was to verify the behavior of the
+\texttt{DX\_CLIP\_SPACE\_DEF} bit
+(\href{doc/R5xx_Acceleration_v1.5.pdf}{R5xx\_Acceleration\_v1.5.pdf}, page
+255--this is also the only place in the entire manual where ``non-user'' clip
+planes are even defined), and to understand the order of pipeline operations.
+
+I played with moving the plane around, to observe clipping behavior (here the
+lower half of the scene was clipped due to intersecting the Z=+1.0 clip plane):
+
+\begin{figure}
+  \href{images/z_buffer_clipped.png}{\includegraphics{images/z_buffer_clipped.png}}
+  \caption*{R500 framebuffer capture, \texttt{z\_buffer\_clipped.png}\\
+    (also simultaneously showing overflow/underflow artifacts)}
+\end{figure}
+
+Thinking at this point that I nearly understood most of the pieces, I then
+re-enabled XY perspective division:
+
+\begin{figure}
+  \href{images/z_buffer_perspective.png}{\includegraphics{images/z_buffer_perspective.png}}
+  \caption*{R500 framebuffer capture, \texttt{z\_buffer\_perspective.png}}
+\end{figure}
+
+The above image was not quite what I wanted: I noticed the range of the Z buffer
+values were roughly between \texttt{0} and \texttt{8388607}, but what I really
+wanted was \texttt{0} to \texttt{16777215}. Adjusting scale again produced this
+Z-buffer:
+
+\begin{figure}
+  \href{images/z_buffer_perspective_scale.png}{\includegraphics{images/z_buffer_perspective_scale.png}}
+  \caption*{R500 framebuffer capture, \texttt{z\_buffer\_perspective\_scale.png}}
+\end{figure}
+
+Up to this point, I was using \texttt{ZFUNC=GREATER} with a Z-buffer cleared
+with an initial depth of zero, where all Z values are negative numbers.
+
+I decided it might be more intuitive to use a Z-buffer that is cleared with an
+initial depth of one, using \texttt{ZFUNC=LESS} instead where all Z values are
+positive numbers.
+
+With these adjustments, I captured a Z-buffer from the earlier cube demo:
+
+\begin{figure}
+  \href{images/z_buffer_cube.png}{\includegraphics{images/z_buffer_cube.png}}
+  \caption*{R500 framebuffer capture, \texttt{z\_buffer\_cube.png}}
+\end{figure}
+
+This was still not quite ``correct'', because the minimum depth of the cube is
+being drawn as \textasciitilde{}\texttt{2763306} (\textasciitilde{}0.16), but I expected
+something closer to zero.
+
+Adjusting my range/scale arithmetic again produced this image:
+
+\begin{figure}
+  \href{images/z_buffer_cube_range.png}{\includegraphics{images/z_buffer_cube_range.png}}
+  \caption*{R500 framebuffer capture, \texttt{z\_buffer\_cube\_range.png}}
+\end{figure}
+
+The minimum Z value now appears to be closer to zero, but the ``back'' faces of
+the cube (and maximum Z values) are not visible. Without changing any
+scale/range constants, inverting \texttt{ZFUNC} and using a zero-initialized
+Z-buffer produced this image of the back faces of the cube:
+
+\begin{figure}
+  \href{images/z_buffer_cube_range_back.png}{\includegraphics{images/z_buffer_cube_range_back.png}}
+  \caption*{R500 framebuffer capture, \texttt{z\_buffer\_cube\_range\_back.png}}
+\end{figure}
+
+Indeed, the maximum Z value is close to \textasciitilde{}\texttt{16777215}
+(\textasciitilde{}1.0), as intended. I feel at this point I have a better intuition
+for using integer Z-buffers. The pipeline (and relevant registers) appears to be
+something like this:
+
+\begin{figure}
+  \includegraphics{diagrams/z_operations.svg}
+  \caption*{R500 Z transform pipeline (simplified)}
+\end{figure}
+
+Prior to these experiments, I was not aware \texttt{SU\_DEPTH\_SCALE} is the
+thing directly responsible for scaling floating point Z values to the integer Z
+values stored in the depth buffer.
+
+In general, the hardware perspective divide, viewport transform, clipping, and
+setup units are absolutely fascinating.
+
+\subsection{3D perspective}
+
+Despite making many 3D demos in the past, I feel that every time I want to
+``draw something 3D'' on a new platform, I need to re-relearn 3D/perspective
+transformations, (perhaps because I never truly \textit{learned} anything).
+
+In many OpenGL articles/tutorials/books the
+\href{https://learnopengl.com/Getting-started/Coordinate-Systems}{standard}
+\href{https://ogldev.org/www/tutorial12/tutorial12.html}{formula} for
+\href{https://songho.ca/opengl/gl_projectionmatrix.html}{explaining}
+\href{https://www.scratchapixel.com/lessons/3d-basic-rendering/perspective-and-orthographic-projection-matrix/opengl-perspective-projection-matrix.html}{perspective}
+\href{https://learnwebgl.brown37.net/08_projections/projections_perspective.html}{projection}
+appears to be:
+
+\begin{itemize}
+\item Begin with an overly-academic explanation of perspective in terms of camera optics and trigonometry
+\item Do not implement or demonstrate the any of the systems or mathematics
+  described in the preceding pages of explanations; intead abruptly hide all
+  magic behind \texttt{glm::perspective}
+\item Refuse to explain or clarify further
+\item Continue for the next 30 chapters/articles without ever revisiting focal
+  length, view frustums, depth of field, etc.. again
+\end{itemize}
+
+It is sufficient to instead rationalize/implement ``perspective'' as:
+
+\begin{quote}
+  Perspective is the division of X and Y coordinates by Z, where the coordinate
+  $(0, 0, 0)$ is the view origin (and the center of the screen/projection).
+\end{quote}
+
+Defining perspective this way also works for OpenGL, with some slight
+adjustment, notably to deal with OpenGL's
+\href{https://registry.khronos.org/OpenGL/specs/gl/glspec20.pdf}{definition} of
+``normalized device coordinates''.
+
+I note that (unlike Dreamcast) one can't actually divide by Z on R500 (nor
+OpenGL), both because the VTE doesn't support this, and because the texture
+unit doesn't support this. Of course, I tried it anyway:
+
+\begin{figure}
+  \includegraphics{videos/cube_warped_textures.png}
+  \caption*{R500 DVI capture, \texttt{texture\_cube\_warping.c} \\
+    (unrelated to this demo, R500 also interestingly has a dedicated ``disable perspective-correct texture mapping'' bit)}
+\end{figure}
+
+Instead, in both cases, the R500 uses the W coordinate for division. This turns
+out to be very convenient, because it means that that the ``field of
+view''/perspective scale (W) and the Z-buffer/depth test scale (Z) can be
+adjusted independently.
+
+\subsection{3D clipping}
+
+Here are several examples of improperly scaled Z values, which are being clipped
+by the setup unit:
+
+\begin{figure}
+  \includegraphics{videos/cube_clipped_far.png}
+  \caption*{R500 DVI capture, \texttt{texture\_cube\_clear\_zwrite\_vertex\_shader\_optimize\_zscale.c} \\
+  (``far'' clip plane intersection)}
+\end{figure}
+
+\begin{figure}
+  \includegraphics{videos/cube_clipped_near.png}
+  \caption*{R500 DVI capture, \texttt{texture\_cube\_clear\_zwrite\_vertex\_shader\_optimize\_zscale.c} \\
+  (``near'' clip plane intersection)}
+\end{figure}
+
+\begin{figure}
+  \includegraphics{videos/cube_clipped_near_opengl.png}
+  \caption*{R500 DVI capture, \texttt{texture\_cube\_clear\_zwrite\_vertex\_shader\_optimize\_zscale.c} \\
+  (I am curious to learn under what circumstances the OpenGL designers thought\\ $-w_{c} < z_{c} < w_{c}$ was a good idea)}
+\end{figure}
+
+\section{Progress: 31 Oct 2025}
+
+From 30 Oct 2025 to 31 Oct 2025, I achieved the following (non-chronological):
+
+\begin{itemize}
+\item I implemented a \href{https://git.idk.st/bilbo/r500/src/branch/main/drm/matrix_cubesphere_specular.fs.asm}{diffuse/specular lighting fragment shader} in R500 fragment shader assembly
+\item I made vertex shaders that represent coordinate space transformations
+  using matrix multiplications rather than ad-hoc arithmetic
+\item While writing demos that pass multiple (interpolated) vectors from the
+  vertex shader to the fragment shader, I learned more about \href{https://git.idk.st/bilbo/r500/src/commit/f43ac599f9/drm/matrix_cubesphere_specular_suzanne.cpp#L444-L512}{``rasterizer instructions''}
+\item I made a demo that uses more than one texture for the entire scene
+  (by \href{https://git.idk.st/bilbo/r500/src/commit/f43ac599f9/drm/pumpkin_man.c#L272-L317}{reconfiguring
+  the texture unit for each ``object''})
+\end{itemize}
+
+\subsection{Lighting demo}
+
+\begin{figure}
+  \includegraphics{videos/suzanne.png}
+  \caption*{R500 DVI capture, \texttt{matrix\_cubesphere\_specular\_suzanne.cpp} \\
+  (subdivided Suzanne mesh, 15,744 triangles)}
+\end{figure}
+
+Despite being a ``simple'' lighting demo, a surprising number of things need to
+happen simultaneously before it becomes possible.
+
+Where vertex shaders from previous demos were passed at most a single scalar
+variable for animation/timing, the vertex shader in this demo uses
+\href{https://git.idk.st/bilbo/r500/src/commit/f43ac599f9/drm/matrix_cubesphere_specular_suzanne.cpp#L301-L326}{10 vectors} as
+input:
+
+\begin{itemize}
+\item 4 vectors for a ``local space to clip space'' transformation matrix
+\item 4 vectors for a ``local space to world space'' transformation matrix (used for lighting)
+\item 1 vector for a ``light position'' (in world space coordinates, used for lighting)
+\item 1 vector for a ``view origin'' (in world space coordinates, used for lighting)
+\end{itemize}
+
+Additionally, where previous demos passed at most a single vector from the
+vertex shader to the fragment shader (vertex color or texture coordinates), this
+demo passes
+\href{https://git.idk.st/bilbo/r500/src/commit/f43ac599f9/drm/matrix_cubesphere_specular_suzanne.cpp#L444-L512}{5 vectors}
+from the vertex shader to the fragment shader, all of which are used
+by the lighting calculation:
+
+\begin{itemize}
+\item world space position
+\item world space normal
+\item world space light position
+\item world space view origin
+\item uv space texture coordinates
+\end{itemize}
+
+\subsection{Learn algebra by writing fragment shader assembly}
+
+Prior to today, I did not know about this transformation/equivalence:
+
+\begin{gather*}
+x^{n} \iff 2^{\left( n\cdot\frac{\log(x)}{\log(2)} \right)}
+\end{gather*}
+
+While the R500 fragment shader alpha unit does not have a \texttt{POW} operation,
+it does have \href{https://git.idk.st/bilbo/r500/src/commit/f43ac599f9/drm/matrix_cubesphere_specular.fs.asm#L93-L99}{\texttt{EX2} and \texttt{LN2}}
+operations.
+
+For example, one could implement $a^{32}$ in R500 fragment shader assembly as:
+
+\begin{figure}
+  \href{verbatim/pow_fragment_shader.fs.asm}{\includegraphics{verbatim/output/pow_fragment_shader.fs.asm.pdf}}
+\end{figure}
+
+This ``arbitrary exponents with arbitrary bases'' pattern is used in the
+lighting demo fragment shader as part of the ``specular intensity'' calculation.
+
+This fragment shader unit feature is very cool, because a software
+implementation of a generalized floating-point \texttt{pow} function is
+extremely
+\href{https://git.musl-libc.org/cgit/musl/tree/src/math/powf.c?id=cb5c057c87240a9534f8e0d9b7ff2560082f6218}{computationally expensive}
+otherwise.
+
 \end{document}
diff --git a/resize_svg.py b/resize_svg.py
index 62b9ab2..77e6d77 100644
--- a/resize_svg.py
+++ b/resize_svg.py
@@ -19,4 +19,4 @@ def transform():
 
 lines = list(transform())
 with open(sys.argv[1], 'w') as f:
-    f.write('\n'.join(lines))
+    f.write(''.join(lines))
diff --git a/verbatim/cube_rotate_const_move.vs.asm b/verbatim/cube_rotate_const_move.vs.asm
new file mode 100644
index 0000000..dec95bd
--- /dev/null
+++ b/verbatim/cube_rotate_const_move.vs.asm
@@ -0,0 +1,8 @@
+-- CONST[0] = {0.159155, 0.5, 6.283185, -3.141593}
+-- CONST[1] = {theta1, theta2, 0.2, 0.5}
+
+temp[0].xy   = VE_ADD  const[1].xy__ const[1].00__ ;
+
+temp[0].xy   = VE_MAD   temp[0].xy__   const[0].xx__  const[0].yy__ ;
+temp[0].xy   = VE_FRC   temp[0].xy__ ;
+temp[0].xy   = VE_MAD   temp[0].xy__   const[0].zz__  const[0].ww__ ;
diff --git a/verbatim/cube_rotate_const_move_illegal.vs.asm b/verbatim/cube_rotate_const_move_illegal.vs.asm
new file mode 100644
index 0000000..b4df9d4
--- /dev/null
+++ b/verbatim/cube_rotate_const_move_illegal.vs.asm
@@ -0,0 +1,3 @@
+-- this is an illegal instruction:
+-- const[1] and const[0] can not be read simultaneously
+temp[0].xy   = VE_MAD   const[1].xy__   const[0].xx__  const[0].yy__ ;
diff --git a/verbatim/cube_rotate_const_move_rearrange.vs.asm b/verbatim/cube_rotate_const_move_rearrange.vs.asm
new file mode 100644
index 0000000..4eb3de9
--- /dev/null
+++ b/verbatim/cube_rotate_const_move_rearrange.vs.asm
@@ -0,0 +1,2 @@
+-- CONST[0] = {theta1, theta2, 0.159155, 0.5}
+-- CONST[1] = {6.283185, -3.141593, 0.2, 0.5}
diff --git a/verbatim/cube_rotate_const_move_rearrange_mad.vs.asm b/verbatim/cube_rotate_const_move_rearrange_mad.vs.asm
new file mode 100644
index 0000000..ea23274
--- /dev/null
+++ b/verbatim/cube_rotate_const_move_rearrange_mad.vs.asm
@@ -0,0 +1,7 @@
+-- the VE_ADD instruction is now not necessary/deleted:
+-- temp[0].xy   = VE_ADD  const[1].xy__ const[1].00__ ;
+
+-- const addresses and swizzles changed:
+temp[0].xy   = VE_MAD   const[0].xy__  const[0].zz__  const[0].ww__ ;
+temp[0].xy   = VE_FRC   temp[0].xy__ ;
+temp[0].xy   = VE_MAD   temp[0].xy__   const[1].xx__  const[1].yy__ ;
diff --git a/verbatim/cube_rotate_dual_math.vs.asm b/verbatim/cube_rotate_dual_math.vs.asm
new file mode 100644
index 0000000..5cc8c32
--- /dev/null
+++ b/verbatim/cube_rotate_dual_math.vs.asm
@@ -0,0 +1,14 @@
+temp[3].x     = ME_SIN  temp[0].___x ;
+temp[3].y     = ME_COS  temp[0].___x ;
+alt_temp[3].z = ME_SIN  temp[0].___y ;
+
+-- first rotation
+temp[1].yz    = VE_MUL  input[0]._-zz_  temp[3]._xy_ ,
+alt_temp[3].w = ME_COS  temp[0].y_ ;
+
+temp[1].xyz   = VE_MAD  input[0].xyy_  temp[3].1yx_      temp[1].0yz_ ;
+
+-- second rotation
+temp[2].xz    = VE_MUL  temp[1].-z_z_  alt_temp[3].z_w_ ;
+
+temp[2].xyz   = VE_MAD  temp[1].xyx_   alt_temp[3].w1z_  temp[2].x0z_ ;
diff --git a/verbatim/cube_rotate_dual_math_single_instruction.vs.asm b/verbatim/cube_rotate_dual_math_single_instruction.vs.asm
new file mode 100644
index 0000000..7dedb3c
--- /dev/null
+++ b/verbatim/cube_rotate_dual_math_single_instruction.vs.asm
@@ -0,0 +1,2 @@
+temp[1].yz    = VE_MUL   input[0]._-zz_  temp[3]._xy_ ,
+alt_temp[3].w = ME_COS   temp[0].y_ ;
diff --git a/verbatim/cube_rotate_sin_cos.vs.asm b/verbatim/cube_rotate_sin_cos.vs.asm
new file mode 100644
index 0000000..4cf230a
--- /dev/null
+++ b/verbatim/cube_rotate_sin_cos.vs.asm
@@ -0,0 +1,14 @@
+temp[3].x     = ME_SIN  temp[0].___x ;
+temp[3].y     = ME_COS  temp[0].___x ;
+temp[3].z     = ME_SIN  temp[0].___y ;
+temp[3].w     = ME_COS  temp[0].___y ;
+
+-- first rotation
+temp[1].yz    = VE_MUL  input[0]._-zz_  temp[3]._xy_ ;
+
+temp[1].xyz   = VE_MAD  input[0].xyy_   temp[3].1yx_   temp[1].0yz_ ;
+
+-- second rotation
+temp[2].xz    = VE_MUL  temp[1].-z_z_   temp[3].z_w_ ;
+
+temp[2].xyz   = VE_MAD  temp[1].xyx_    temp[3].w1z_   temp[2].x0z_ ;
diff --git a/verbatim/pow_fragment_shader.fs.asm b/verbatim/pow_fragment_shader.fs.asm
new file mode 100644
index 0000000..c587ea8
--- /dev/null
+++ b/verbatim/pow_fragment_shader.fs.asm
@@ -0,0 +1,12 @@
+-- a = log(a) / log(2)
+src0.a = temp[0] :
+  temp[0].a = LN2 src0.a ;
+
+-- a = a * 32.0 + 0
+src0.a = temp[0] ,
+src1.a = float(96) :  -- 32.0 (or any other constant)
+  temp[0].a = MAD src0.a src1.a src1.0 ;
+
+-- a = 2 ^ a
+src0.a = temp[0] :
+  temp[0].a = EX2 src0.a ;
diff --git a/verbatim/r500_view_clip.c b/verbatim/r500_view_clip.c
new file mode 100644
index 0000000..0a77aae
--- /dev/null
+++ b/verbatim/r500_view_clip.c
@@ -0,0 +1,6 @@
+VAP_VTE_CNTL__VPORT_Z_SCALE_ENA(0)
+VAP_VTE_CNTL__VPORT_Z_OFFSET_ENA(0)
+VAP_VTE_CNTL__VTX_XY_FMT(1)
+VAP_VTE_CNTL__VTX_Z_FMT(0)
+VAP_VTE_CNTL__VTX_W0_FMT(1)
+VAP_CNTL__DX_CLIP_SPACE_DEF(1)
diff --git a/verbatim/sin_operand_remap.vs.asm b/verbatim/sin_operand_remap.vs.asm
new file mode 100644
index 0000000..8ae317c
--- /dev/null
+++ b/verbatim/sin_operand_remap.vs.asm
@@ -0,0 +1,8 @@
+-- CONST[0] = {0.159155, 0.5, 6.283185, -3.141593}
+
+-- t = t * 0.159155 + 0.5
+temp[0].xy   = VE_MAD   temp[0].xy__   const[0].xx__  const[0].yy__ ;
+-- t = frac(t)
+temp[0].xy   = VE_FRC   temp[0].xy__ ;
+-- t = t * 6.283185 + -3.141593
+temp[0].xy   = VE_MAD   temp[0].xy__   const[0].zz__  const[0].ww__ ;
diff --git a/videos/cube_clipped_far.mp4 b/videos/cube_clipped_far.mp4
new file mode 100644
index 0000000..4eebc38
Binary files /dev/null and b/videos/cube_clipped_far.mp4 differ
diff --git a/videos/cube_clipped_far.png b/videos/cube_clipped_far.png
new file mode 100644
index 0000000..51ad3b0
Binary files /dev/null and b/videos/cube_clipped_far.png differ
diff --git a/videos/cube_clipped_near.mp4 b/videos/cube_clipped_near.mp4
new file mode 100644
index 0000000..9abc332
Binary files /dev/null and b/videos/cube_clipped_near.mp4 differ
diff --git a/videos/cube_clipped_near.png b/videos/cube_clipped_near.png
new file mode 100644
index 0000000..91f1c8d
Binary files /dev/null and b/videos/cube_clipped_near.png differ
diff --git a/videos/cube_clipped_near_opengl.mp4 b/videos/cube_clipped_near_opengl.mp4
new file mode 100644
index 0000000..a415571
Binary files /dev/null and b/videos/cube_clipped_near_opengl.mp4 differ
diff --git a/videos/cube_clipped_near_opengl.png b/videos/cube_clipped_near_opengl.png
new file mode 100644
index 0000000..3f3320a
Binary files /dev/null and b/videos/cube_clipped_near_opengl.png differ
diff --git a/videos/cube_warped_textures.mp4 b/videos/cube_warped_textures.mp4
new file mode 100644
index 0000000..0ccc367
Binary files /dev/null and b/videos/cube_warped_textures.mp4 differ
diff --git a/videos/cube_warped_textures.png b/videos/cube_warped_textures.png
new file mode 100644
index 0000000..4b97ea1
Binary files /dev/null and b/videos/cube_warped_textures.png differ
diff --git a/videos/suzanne.mp4 b/videos/suzanne.mp4
new file mode 100644
index 0000000..f97dd1a
Binary files /dev/null and b/videos/suzanne.mp4 differ
diff --git a/videos/suzanne.png b/videos/suzanne.png
new file mode 100644
index 0000000..3184ef3
Binary files /dev/null and b/videos/suzanne.png differ