october 31 update

2025-11-11 18:34:41 -06:00 · 2025-11-11 18:34:41 -06:00 · 2c6e735350
commit 2c6e735350
parent ca4d7fb8ee
42 changed files with 889 additions and 10 deletions
--- a/.gitignore
+++ b/.gitignore
@ -19,3 +19,5 @@ verbatim/*.svg
 verbatim/*.pdf
 verbatim/output
 images/*.data
 /index*.svg
 diagrams/*-.svg
--- a/build.sh
+++ b/build.sh
@ -15,10 +15,17 @@ echo 'figure.figure { margin-left: 20px; margin-right: 20px;  }' >> index.css
 echo 'pre.verbatim { font-size: 0.9em; }' >> index.css
 sed -i 's|color-scheme: light dark;||g' index.css
 echo 'figcaption.caption { margin-bottom: 1.3em; margin-top: 0.3em; }' >> index.css
 echo '.cmti-10 { font-style: italic; }' >> index.css
-sed -i 's/index.css/index2.css/g' index.html
+sed -i 's/˜/~/g' index.html
-mv index.css index2.css
+sed -i "s|<p class='noindent'><object class='graphics' data='diagrams/z_operations.svg' name='picture diagrams/z_operations' type='image/svg+xml'></object>|<p class='noindent' style='text-align: center;'><object class='graphics' style='width: 40em;' data='diagrams/z_operations.svg' name='picture diagrams/z_operations' type='image/svg+xml'></object>|g" index.html
 sed -i '/height: 2.5em;/d' index.css
 sed -i 's/index.css/index3.css/g' index.html
 mv index.css index3.css
 python replace_video.py index.html
--- a/diagrams/build.sh
+++ b/diagrams/build.sh
@ -0,0 +1,3 @@
 dot -Tsvg z_operations.dot > z_operations.svg
 #sed -i 's/scale(1 1)/scale(0.75 0.75)/g' z_operations.svg
--- a/diagrams/resize_dot_svg.py
+++ b/diagrams/resize_dot_svg.py
@ -0,0 +1,31 @@
 import sys
 scale = 0.75
 def scale_svg(lines):
    svg = "".join(lines)
    head, viewbox = svg.split("viewBox=\"", maxsplit=1)
    viewbox, tail = viewbox.split('"', maxsplit=1)
    x, y, width, height = map(float, viewbox.split())
    yield head
    yield f'viewBox="{x} {y} {width * scale} {height * scale}"'
    yield tail
 def transform():
    with open(sys.argv[1]) as f:
        svg_lines = []
        for line in f.readlines():
            if line.strip().startswith("<svg"):
                svg_lines.append(line)
            elif svg_lines != []:
                svg_lines.append(line)
                if line.strip().endswith(">"):
                    yield from scale_svg(svg_lines)
                    svg_lines = []
            else:
                yield line
 lines = list(transform())
 with open(sys.argv[1], 'w') as f:
    f.write(''.join(lines))
--- a/diagrams/sin_clamp.pdf
+++ b/diagrams/sin_clamp.pdf
--- a/diagrams/sin_clamp.tex
+++ b/diagrams/sin_clamp.tex
@ -0,0 +1,34 @@
 \documentclass[varwidth=13.1cm, border={0.0cm 0.0cm 0.0cm 0.0cm}]{standalone}
 \usepackage{tikz}
 \usepackage[dvipsnames]{xcolor}
 \usepackage{pgfplots}
 \pgfplotsset{compat=1.18}
 \usepackage{amsmath}
 \newcommand{\Clamp}[1]{\operatorname{clamp}#1}
 \begin{document}
 \begin{tikzpicture}[scale=0.5]
  \draw[very thin,color=gray] (-pi * 3,-pi * 1.2) grid (pi * 3, pi * 1.2);
  \draw[->] (-3.2*pi,0) -- (3.2*pi,0) node[right] {$x$};
  \draw[->] (0,-pi * 1.4) -- (0,pi * 1.5) node[above] {$f(x)$};
 \draw[thick, color=NavyBlue] plot [domain=-pi * 3:pi * 3, samples=100] (\x, {min(max(\x, -pi), pi)} );
 \draw[thick, color=OrangeRed] plot [domain=-pi * 3:pi * 3, samples=1000] (\x, {sin(min(max(\x, -pi), pi) r)} );
 \node[NavyBlue] at (0, -5.4) {$f(x) = \Clamp(x, -\pi, +\pi) $};
 \node[OrangeRed] at (0, -6.6) {$f(x) = \sin(\Clamp(x, -\pi, +\pi)) $};
 \draw [dashed, color=ForestGreen] (-2 * pi,-3.8) -- (-2 * pi,3.8) node[above] {$x=-2\pi$};
 \draw [dashed, color=Brown] (2 * pi,-3.8) -- (2 * pi,3.8) node[above] {$x=2\pi$};
 \draw [dashed, color=Fuchsia] (-3.0,pi) -- (3.0,pi) ;
 \draw [color=Fuchsia] (0, pi + 0.5) node {$y=\pi$};
 \draw [dashed, color=Peach] (-3.0,-pi) -- (3.0,-pi) ;
 \draw [color=Peach] (0, -pi + 0.5) node {$y=-\pi$};
 \end{tikzpicture}
 \end{document}
--- a/diagrams/sin_frac.pdf
+++ b/diagrams/sin_frac.pdf
--- a/diagrams/sin_frac.tex
+++ b/diagrams/sin_frac.tex
@ -0,0 +1,38 @@
 \documentclass[varwidth=13.1cm, border={0.0cm 0.0cm 0.0cm 0.0cm}]{standalone}
 \usepackage{tikz}
 \usepackage[dvipsnames]{xcolor}
 \usepackage{pgfplots}
 \pgfplotsset{compat=1.18}
 \usepackage{amsmath}
 \newcommand{\Frac}[1]{\operatorname{frac}#1}
 \begin{document}
 \begin{tikzpicture}[scale=0.5]
  \draw[very thin,color=gray] (-pi * 3,-pi * 1.2) grid (pi * 3, pi * 1.2);
  \draw[->] (-3.2*pi,0) -- (3.2*pi,0) node[right] {$x$};
  \draw[->] (0,-pi * 1.4) -- (0,pi * 1.5) node[above] {$f(x)$};
 \foreach \i in {-2, 0, 2}{
    \pgfmathsetmacro{\start}{(\i - 1) * pi}
    \pgfmathsetmacro{\end}  {(\i + 1) * pi}
    \draw[thick, color=NavyBlue] plot [domain=\start:\end, samples=100] (\x, {((\x * 1/(2 * pi) + 0.5) - floor(\x * 1/(2 * pi) + 0.5)) * 2 * pi - pi} );
 }
 \draw[thick, color=OrangeRed] plot [domain=-pi * 3:pi * 3, samples=1000] (\x, {sin((((\x * 1/(2 * pi) + 0.5) - floor(\x * 1/(2 * pi) + 0.5)) * 2 * pi - pi) r)} );
 \node[NavyBlue] at (0, -5.4) {$f(x) = \Frac(x \cdot \frac{1}{2\pi}+0.5) \cdot 2\pi - \pi $};
 \node[OrangeRed] at (0, -6.6) {$f(x) = \sin( \Frac(x \cdot \frac{1}{2\pi}+0.5) \cdot 2\pi - \pi ) $};
 \draw [dashed, color=ForestGreen] (-2 * pi,-3.8) -- (-2 * pi,3.8) node[above] {$x=-2\pi$};
 \draw [dashed, color=Brown] (2 * pi,-3.8) -- (2 * pi,3.8) node[above] {$x=2\pi$};
 \draw [dashed, color=Fuchsia] (-3.0,pi) -- (3.0,pi) ;
 \draw [color=Fuchsia] (0, pi + 0.5) node {$y=\pi$};
 \draw [dashed, color=Peach] (-3.0,-pi) -- (3.0,-pi) ;
 \draw [color=Peach] (0, -pi + 0.5) node {$y=-\pi$};
 \end{tikzpicture}
 \end{document}
--- a/diagrams/z_operations.dot
+++ b/diagrams/z_operations.dot
@ -0,0 +1,60 @@
 digraph G {
  vertex_shader [label="(from the vertex shader)"]
  subgraph cluster_clipping {
    label = "clipping"
    DX_CLIP_SPACE_DEF [label="DX_CLIP_SPACE_DEF
 possibly clip the polygon"]
  }
  subgraph cluster_perspective {
    label = "perspective division"
    VTX_Z_FMT [nojustify=true label="VTX_Z_FMT
 (if enabled) divide Z by W"]
  }
  subgraph cluster_viewport_transformation {
    label = "viewport transformation"
    VPORT_Z_SCALE
    VPORT_Z_OFFSET
  }
  subgraph cluster_geometry_assembly {
  }
  subgraph cluster_setup_unit {
    label = "setup unit"
    SU_DEPTH_SCALE
    SU_DEPTH_OFFSET
  }
  subgraph cluster_zfunc {
    label = "ZFUNC"
    { rank=same
    depth_test [shape=box label="depth test"]
    depth_pass [shape=box label="depth pass"]
    }
    depth_test -> depth_pass
  }
  Z_BUFFER [shape=invhouse label="(write the new Z
 value to the Z-buffer)"]
  fragment_shader [label="(to the fragment shader)"]
  vertex_shader -> DX_CLIP_SPACE_DEF
  DX_CLIP_SPACE_DEF -> VTX_Z_FMT
  VTX_Z_FMT -> VPORT_Z_SCALE
  VPORT_Z_SCALE -> VPORT_Z_OFFSET
  VPORT_Z_OFFSET -> SU_DEPTH_SCALE
  SU_DEPTH_SCALE -> SU_DEPTH_OFFSET
  SU_DEPTH_OFFSET -> depth_test
  depth_test -> Z_BUFFER
  VPORT_Z_OFFSET -> depth_pass
  depth_pass -> fragment_shader
 }
--- a/diagrams/z_operations.svg
+++ b/diagrams/z_operations.svg
@ -0,0 +1,173 @@
 <?xml version="1.0" encoding="UTF-8" standalone="no"?>
 <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
 "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
 <!-- Generated by graphviz version 12.2.1 (20241206.2353)
 -->
 <!-- Title: G Pages: 1 -->
 <svg width="588pt" height="765pt"
 viewBox="0.00 0.00 588.26 764.75" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 760.75)">
 <title>G</title>
 <polygon fill="white" stroke="none" points="-4,4 -4,-760.75 584.26,-760.75 584.26,4 -4,4"/>
 <g id="clust1" class="cluster">
 <title>cluster_clipping</title>
 <polygon fill="none" stroke="black" points="94.4,-611.4 94.4,-712.75 382.4,-712.75 382.4,-611.4 94.4,-611.4"/>
 <text text-anchor="middle" x="238.4" y="-695.45" font-family="Times,serif" font-size="14.00">clipping</text>
 </g>
 <g id="clust2" class="cluster">
 <title>cluster_perspective</title>
 <polygon fill="none" stroke="black" points="89.4,-502.04 89.4,-603.4 387.4,-603.4 387.4,-502.04 89.4,-502.04"/>
 <text text-anchor="middle" x="238.4" y="-586.1" font-family="Times,serif" font-size="14.00">perspective division</text>
 </g>
 <g id="clust3" class="cluster">
 <title>cluster_viewport_transformation</title>
 <polygon fill="none" stroke="black" points="125.4,-344.79 125.4,-494.04 351.4,-494.04 351.4,-344.79 125.4,-344.79"/>
 <text text-anchor="middle" x="238.4" y="-476.74" font-family="Times,serif" font-size="14.00">viewport transformation</text>
 </g>
 <g id="clust5" class="cluster">
 <title>cluster_setup_unit</title>
 <polygon fill="none" stroke="black" points="101.4,-187.54 101.4,-336.79 347.4,-336.79 347.4,-187.54 101.4,-187.54"/>
 <text text-anchor="middle" x="224.4" y="-319.49" font-family="Times,serif" font-size="14.00">setup unit</text>
 </g>
 <g id="clust6" class="cluster">
 <title>cluster_zfunc</title>
 <polygon fill="none" stroke="black" points="187.4,-102.29 187.4,-179.54 405.4,-179.54 405.4,-102.29 187.4,-102.29"/>
 <text text-anchor="middle" x="296.4" y="-162.24" font-family="Times,serif" font-size="14.00">ZFUNC</text>
 </g>
 <!-- vertex_shader -->
 <g id="node1" class="node">
 <title>vertex_shader</title>
 <ellipse fill="none" stroke="black" cx="238.4" cy="-738.75" rx="134.33" ry="18"/>
 <text text-anchor="middle" x="238.4" y="-734.08" font-family="Times,serif" font-size="14.00">(from the vertex shader)</text>
 </g>
 <!-- DX_CLIP_SPACE_DEF -->
 <g id="node2" class="node">
 <title>DX_CLIP_SPACE_DEF</title>
 <ellipse fill="none" stroke="black" cx="238.4" cy="-649.45" rx="136.47" ry="30.05"/>
 <text text-anchor="middle" x="238.4" y="-653.4" font-family="Times,serif" font-size="14.00">DX_CLIP_SPACE_DEF</text>
 <text text-anchor="middle" x="238.4" y="-636.15" font-family="Times,serif" font-size="14.00">possibly clip the polygon</text>
 </g>
 <!-- vertex_shader&#45;&gt;DX_CLIP_SPACE_DEF -->
 <g id="edge2" class="edge">
 <title>vertex_shader&#45;&gt;DX_CLIP_SPACE_DEF</title>
 <path fill="none" stroke="black" d="M238.4,-720.5C238.4,-712.05 238.4,-701.49 238.4,-691.15"/>
 <polygon fill="black" stroke="black" points="241.9,-691.21 238.4,-681.21 234.9,-691.21 241.9,-691.21"/>
 </g>
 <!-- VTX_Z_FMT -->
 <g id="node3" class="node">
 <title>VTX_Z_FMT</title>
 <ellipse fill="none" stroke="black" cx="238.4" cy="-540.09" rx="141.24" ry="30.05"/>
 <text text-anchor="middle" x="238.4" y="-544.04" font-family="Times,serif" font-size="14.00">VTX_Z_FMT</text>
 <text text-anchor="middle" x="238.4" y="-526.79" font-family="Times,serif" font-size="14.00">(if enabled) divide Z by W</text>
 </g>
 <!-- DX_CLIP_SPACE_DEF&#45;&gt;VTX_Z_FMT -->
 <g id="edge3" class="edge">
 <title>DX_CLIP_SPACE_DEF&#45;&gt;VTX_Z_FMT</title>
 <path fill="none" stroke="black" d="M238.4,-619.11C238.4,-607.63 238.4,-594.27 238.4,-581.88"/>
 <polygon fill="black" stroke="black" points="241.9,-581.91 238.4,-571.91 234.9,-581.91 241.9,-581.91"/>
 </g>
 <!-- VPORT_Z_SCALE -->
 <g id="node4" class="node">
 <title>VPORT_Z_SCALE</title>
 <ellipse fill="none" stroke="black" cx="238.4" cy="-442.79" rx="97.51" ry="18"/>
 <text text-anchor="middle" x="238.4" y="-438.12" font-family="Times,serif" font-size="14.00">VPORT_Z_SCALE</text>
 </g>
 <!-- VTX_Z_FMT&#45;&gt;VPORT_Z_SCALE -->
 <g id="edge4" class="edge">
 <title>VTX_Z_FMT&#45;&gt;VPORT_Z_SCALE</title>
 <path fill="none" stroke="black" d="M238.4,-509.72C238.4,-497.95 238.4,-484.43 238.4,-472.7"/>
 <polygon fill="black" stroke="black" points="241.9,-472.8 238.4,-462.8 234.9,-472.8 241.9,-472.8"/>
 </g>
 <!-- VPORT_Z_OFFSET -->
 <g id="node5" class="node">
 <title>VPORT_Z_OFFSET</title>
 <ellipse fill="none" stroke="black" cx="238.4" cy="-370.79" rx="104.87" ry="18"/>
 <text text-anchor="middle" x="238.4" y="-366.12" font-family="Times,serif" font-size="14.00">VPORT_Z_OFFSET</text>
 </g>
 <!-- VPORT_Z_SCALE&#45;&gt;VPORT_Z_OFFSET -->
 <g id="edge5" class="edge">
 <title>VPORT_Z_SCALE&#45;&gt;VPORT_Z_OFFSET</title>
 <path fill="none" stroke="black" d="M238.4,-424.49C238.4,-417.2 238.4,-408.52 238.4,-400.33"/>
 <polygon fill="black" stroke="black" points="241.9,-400.41 238.4,-390.41 234.9,-400.41 241.9,-400.41"/>
 </g>
 <!-- SU_DEPTH_SCALE -->
 <g id="node6" class="node">
 <title>SU_DEPTH_SCALE</title>
 <ellipse fill="none" stroke="black" cx="227.4" cy="-285.54" rx="107.5" ry="18"/>
 <text text-anchor="middle" x="227.4" y="-280.87" font-family="Times,serif" font-size="14.00">SU_DEPTH_SCALE</text>
 </g>
 <!-- VPORT_Z_OFFSET&#45;&gt;SU_DEPTH_SCALE -->
 <g id="edge6" class="edge">
 <title>VPORT_Z_OFFSET&#45;&gt;SU_DEPTH_SCALE</title>
 <path fill="none" stroke="black" d="M236.12,-352.54C234.69,-341.72 232.81,-327.49 231.16,-315.02"/>
 <polygon fill="black" stroke="black" points="234.65,-314.73 229.87,-305.27 227.72,-315.64 234.65,-314.73"/>
 </g>
 <!-- depth_pass -->
 <g id="node9" class="node">
 <title>depth_pass</title>
 <polygon fill="none" stroke="black" points="397.4,-146.29 303.4,-146.29 303.4,-110.29 397.4,-110.29 397.4,-146.29"/>
 <text text-anchor="middle" x="350.4" y="-123.62" font-family="Times,serif" font-size="14.00">depth pass</text>
 </g>
 <!-- VPORT_Z_OFFSET&#45;&gt;depth_pass -->
 <g id="edge10" class="edge">
 <title>VPORT_Z_OFFSET&#45;&gt;depth_pass</title>
 <path fill="none" stroke="black" d="M321.07,-359.37C332.91,-354.32 343.68,-347.09 351.4,-336.79 390.89,-284.12 373.49,-200.91 360.3,-157.6"/>
 <polygon fill="black" stroke="black" points="363.67,-156.65 357.29,-148.19 357,-158.78 363.67,-156.65"/>
 </g>
 <!-- SU_DEPTH_OFFSET -->
 <g id="node7" class="node">
 <title>SU_DEPTH_OFFSET</title>
 <ellipse fill="none" stroke="black" cx="224.4" cy="-213.54" rx="114.87" ry="18"/>
 <text text-anchor="middle" x="224.4" y="-208.87" font-family="Times,serif" font-size="14.00">SU_DEPTH_OFFSET</text>
 </g>
 <!-- SU_DEPTH_SCALE&#45;&gt;SU_DEPTH_OFFSET -->
 <g id="edge7" class="edge">
 <title>SU_DEPTH_SCALE&#45;&gt;SU_DEPTH_OFFSET</title>
 <path fill="none" stroke="black" d="M226.66,-267.24C226.35,-259.95 225.97,-251.27 225.62,-243.08"/>
 <polygon fill="black" stroke="black" points="229.12,-243 225.2,-233.16 222.13,-243.3 229.12,-243"/>
 </g>
 <!-- depth_test -->
 <g id="node8" class="node">
 <title>depth_test</title>
 <polygon fill="none" stroke="black" points="285.15,-146.29 195.65,-146.29 195.65,-110.29 285.15,-110.29 285.15,-146.29"/>
 <text text-anchor="middle" x="240.4" y="-123.62" font-family="Times,serif" font-size="14.00">depth test</text>
 </g>
 <!-- SU_DEPTH_OFFSET&#45;&gt;depth_test -->
 <g id="edge8" class="edge">
 <title>SU_DEPTH_OFFSET&#45;&gt;depth_test</title>
 <path fill="none" stroke="black" d="M227.72,-195.29C229.8,-184.47 232.53,-170.24 234.93,-157.77"/>
 <polygon fill="black" stroke="black" points="238.35,-158.49 236.8,-148.01 231.48,-157.17 238.35,-158.49"/>
 </g>
 <!-- depth_test&#45;&gt;depth_pass -->
 <g id="edge1" class="edge">
 <title>depth_test&#45;&gt;depth_pass</title>
 <path fill="none" stroke="black" d="M285.52,-128.29C287.52,-128.29 289.53,-128.29 291.53,-128.29"/>
 <polygon fill="black" stroke="black" points="291.49,-131.79 301.49,-128.29 291.49,-124.79 291.49,-131.79"/>
 </g>
 <!-- Z_BUFFER -->
 <g id="node10" class="node">
 <title>Z_BUFFER</title>
 <polygon fill="none" stroke="black" points="0,-25.67 146.4,0 292.8,-25.67 292.66,-67.2 0.14,-67.2 0,-25.67"/>
 <text text-anchor="middle" x="146.4" y="-41.1" font-family="Times,serif" font-size="14.00">(write the new Z</text>
 <text text-anchor="middle" x="146.4" y="-23.85" font-family="Times,serif" font-size="14.00">value to the Z&#45;buffer)</text>
 </g>
 <!-- depth_test&#45;&gt;Z_BUFFER -->
 <g id="edge9" class="edge">
 <title>depth_test&#45;&gt;Z_BUFFER</title>
 <path fill="none" stroke="black" d="M222.28,-110.1C211.86,-100.23 198.36,-87.42 185.63,-75.35"/>
 <polygon fill="black" stroke="black" points="188.19,-72.95 178.52,-68.61 183.37,-78.03 188.19,-72.95"/>
 </g>
 <!-- fragment_shader -->
 <g id="node11" class="node">
 <title>fragment_shader</title>
 <ellipse fill="none" stroke="black" cx="445.4" cy="-37.15" rx="134.86" ry="18"/>
 <text text-anchor="middle" x="445.4" y="-32.47" font-family="Times,serif" font-size="14.00">(to the fragment shader)</text>
 </g>
 <!-- depth_pass&#45;&gt;fragment_shader -->
 <g id="edge11" class="edge">
 <title>depth_pass&#45;&gt;fragment_shader</title>
 <path fill="none" stroke="black" d="M368.72,-110.1C383.04,-96.66 403.15,-77.79 419.18,-62.75"/>
 <polygon fill="black" stroke="black" points="421.14,-65.71 426.04,-56.32 416.35,-60.61 421.14,-65.71"/>
 </g>
 </g>
 </svg>
--- a/images/cube_scene.png
+++ b/images/cube_scene.png
--- a/images/plane_scene.png
+++ b/images/plane_scene.png
--- a/images/z_buffer_clipped.png
+++ b/images/z_buffer_clipped.png
--- a/images/z_buffer_cube.png
+++ b/images/z_buffer_cube.png
--- a/images/z_buffer_cube_range.png
+++ b/images/z_buffer_cube_range.png
--- a/images/z_buffer_cube_range_back.png
+++ b/images/z_buffer_cube_range_back.png
--- a/images/z_buffer_gradient.png
+++ b/images/z_buffer_gradient.png
--- a/images/z_buffer_overflow.png
+++ b/images/z_buffer_overflow.png
--- a/images/z_buffer_perspective.png
+++ b/images/z_buffer_perspective.png
--- a/images/z_buffer_perspective_scale.png
+++ b/images/z_buffer_perspective_scale.png
--- a/index.tex
+++ b/index.tex
@ -1,5 +1,6 @@
 \documentclass[20pt]{article}
 \usepackage{amsmath}
 \usepackage[font=small,labelfont=bf]{caption}
 \usepackage{hyperref}
 \hypersetup{
@ -15,6 +16,7 @@
 \graphicspath{ {./images/} }
 \usepackage{minted}
 \usepackage{nicefrac}
 \title{Radeon R500}
 \date{}
@ -28,9 +30,9 @@
 \section{Introduction}
-The primary/minimal project goal is "draw a triangle on a Radeon R500 via direct
+The primary/minimal project goal is ``draw a triangle on a Radeon R500 via
-memory-mapped hardware register and texture memory accesses". This means no
+direct memory-mapped hardware register and texture memory accesses''. This means
-\href{https://mesa3d.org/}{Mesa}, no
+no \href{https://mesa3d.org/}{Mesa}, no
 \href{https://github.com/torvalds/linux/tree/v6.12/drivers/gpu/drm/radeon}{radeon}
 kernel module, and certainly no OpenGL or Direct3D.
@ -661,14 +663,45 @@ from scratch. I first implemented the rotation in GLSL:
  \caption*{\texttt{cube\_rotate.vs.glsl}}
 \end{figure}
-I verified that the GLSL version worked as expected in OpenGL, then I translated
+\subsubsection{Remapping shader unit sin/cos operands}
-the GLSL to R500 vertex shader assembly, as:
+
 Because this shader program depends on being able to calculate sin and cos, this
 meant I immediately needed to understand how to use the \texttt{ME\_SIN} and
 \texttt{ME\_COS} operations.
 The R500 vertex shader ME unit clamps sin/cos operands to the range
 $(-\pi,+\pi)$, as in:
 \begin{figure}
  \href{diagrams/sin_clamp.pdf}{\includegraphics{diagrams/sin_clamp.pdf}}
 \end{figure}
 ``Remapping'' floating point values from $(-\infty,+\infty)$ to $(-\pi,+\pi)$ is not
 obvious. I was not previously aware of this transformation:
 \begin{figure}
  \href{diagrams/sin_frac.pdf}{\includegraphics{diagrams/sin_frac.pdf}}
 \end{figure}
 Or, expressed as R500 vertex shader assembly:
 \begin{figure}
  \href{verbatim/sin_operand_remap.vs.asm}{\includegraphics{verbatim/output/sin_operand_remap.vs.asm.pdf}}
 \end{figure}
 \subsubsection{Translation of the GLSL vertex shader to R500 vertex shader assembly}
 Having verified that the GLSL version works as expected in OpenGL, and knowing
 how to use the R500 vertex shader sin/cos operations, then I translated the GLSL
 to R500 vertex shader assembly, as:
 \begin{figure}
  \href{verbatim/cube_rotate.vs.asm}{\includegraphics{verbatim/output/cube_rotate.vs.asm.pdf}}
  \caption*{\texttt{cube\_rotate.vs.asm}}
 \end{figure}
 \subsubsection{Vertex shader assembler/code generator debugging}
 However, when I first executed the vertex shader cube rotation demo, I found
 it did not work as expected:
@ -775,8 +808,8 @@ I've written several \href{https://github.com/buhman/scu-dsp-asm}{nice assembler
 for other architectures in the past, but I've never seen any instruction set
 as expressive as R500 fragment shaders.
-I attempted to directly reflect this ``multiple tiers of operand argument
+I attempted to directly represent this ``multiple tiers of operand argument
-decoding'' in the syntax I invented for fragment shader ALU instructions.
+decoding'' in my fragment shader ALU instructions syntax.
 These instructions are also vector instructions: a total of 24 floating point
 input operands and 8 floating results could be evaluated per instruction.
@ -902,4 +935,426 @@ except:
 The exponent/mantissa table that shows example 7-bit float values on page 106 of
 \href{doc/R5xx_Acceleration_v1.5.pdf}{R5xx\_Acceleration\_v1.5.pdf} is incorrect.
 \section{Progress: 26 Oct 2025}
 From 21 Oct 2025 to 26 Oct 2025, I achieved the following (roughly in chronological order):
 \begin{itemize}
 \item I \href{https://git.idk.st/bilbo/r500/commit/8594bc4a38f6fcab2ac6e437b46bcf1e0e6d32dd}{rewrote} most of the vertex shader assembler parser/validator, and implemented support for \href{https://git.idk.st/bilbo/r500/commit/f3f1969f4a9b336536f5fb23d246f7103c41e20d}{assembling/disassembling ``dual math'' operations}
 \item I implemented support for \href{https://git.idk.st/bilbo/r500/commit/96d7286e7cd3270b9dca0924d3a046d585d6dc9d}{assembling} and \href{https://git.idk.st/bilbo/r500/commit/27227426eaac265bc3126edd7d017c791640e789}{disassembling} TEX fragment shader instructions
 \item I presented this project (including live demos on real hardware) at
  a \href{https://itch.io/jam/spoopy-jam-7-heckraiser}{local in-person game jam event}
 \end{itemize}
 \subsection{Vertex shader optimization part 1: ``MOV'' elimination}
 After talking about it in-person, I decided to try to golf my original
 15-instruction
 \href{https://git.idk.st/bilbo/r500/src/commit/c8ae311e60/drm/cube_rotate.vs.asm}{cube\_rotate.vs.asm} vertex shader.
 The first opportunity for optimization is in the first two instructions of:
 \begin{figure}
  \href{verbatim/cube_rotate_const_move.vs.asm}{\includegraphics{verbatim/output/cube_rotate_const_move.vs.asm.pdf}}
 \end{figure}
 The \texttt{VE\_ADD} (being used here as a ``MOV'' instruction) is needed
 because there is only a single 128-bit read port into \texttt{const} memory, so
 a multiply-add like this is illegal:
 \begin{figure}
  \href{verbatim/cube_rotate_const_move_illegal.vs.asm}{\includegraphics{verbatim/output/cube_rotate_const_move_illegal.vs.asm.pdf}}
 \end{figure}
 I observed that because I never need to reference the last two constants in the
 same instruction that references the first two constants, if I rearrange the
 ordering of the constants to:
 \begin{figure}
  \href{verbatim/cube_rotate_const_move_rearrange.vs.asm}{\includegraphics{verbatim/output/cube_rotate_const_move_rearrange.vs.asm.pdf}}
 \end{figure}
 I can then rewrite the multiply-add instructions as:
 \begin{figure}
  \href{verbatim/cube_rotate_const_move_rearrange_mad.vs.asm}{\includegraphics{verbatim/output/cube_rotate_const_move_rearrange_mad.vs.asm.pdf}}
 \end{figure}
 \subsection{Vertex shader optimization part 2: ``dual math'' instructions}
 I spent an entire day rewriting large portions of the vertex shader assembler to
 add support for ``dual math'' instructions.
 The original
 \href{https://git.idk.st/bilbo/r500/src/commit/c8ae311e60/drm/cube_rotate.vs.asm}{cube\_rotate.vs.asm}
 contains this sequence of \texttt{ME_SIN}/\texttt{ME\_COS} instructions:
 \begin{figure}
  \href{verbatim/cube_rotate_sin_cos.vs.asm}{\includegraphics{verbatim/output/cube_rotate_sin_cos.vs.asm.pdf}}
 \end{figure}
 The \texttt{temp[3].x} and \texttt{temp[3].y} results are needed immediately,
 but \texttt{temp[3].z} and \texttt{temp[3].w} are not needed until after the
 first pair of \texttt{VE\_MUL}/\texttt{VE\_MAD} operations.
 The dual math instruction mode replaces the 3rd \texttt{VE_} instruction operand
 with any \texttt{ME\_} operation, so it is only usable with 2-operand
 \texttt{VE\_} instructions like \texttt{VE\_MUL}.
 The dual math encoding also has several restrictions (it only has \nicefrac{1}{4}th the
 control word bits compared to a normal \texttt{ME\_} instruction). A notable
 restriction is that it must write to \texttt{alt\_temp}.
 Unlike the fancy things that can be done with fragment shader
 operands/sources/swizzles, a single vertex shader operand can also only read
 from a single 128-bit register, so this means to be able to continue to access
 \texttt{temp[3].zw} as a vector, both \texttt{z} and \texttt{w} must now be
 stored in \texttt{alt\_temp}, even if only one of them was written by a ``dual
 math'' instruction.
 The change (and my newly-implemented dual math syntax) is:
 \begin{figure}
  \href{verbatim/cube_rotate_dual_math.vs.asm}{\includegraphics{verbatim/output/cube_rotate_dual_math.vs.asm.pdf}}
 \end{figure}
 Where the dual math instruction:
 \begin{figure}
  \href{verbatim/cube_rotate_dual_math_single_instruction.vs.asm}{\includegraphics{verbatim/output/cube_rotate_dual_math_single_instruction.vs.asm.pdf}}
 \end{figure}
 Is encoded by the assembler as single instruction and is executed by the vertex
 shader unit in a single clock cycle.
 The final
 \href{https://git.idk.st/bilbo/r500/src/commit/c8ae311e60/drm/cube_rotate_optimize.vs.asm}{cube\_rotate\_optimize.vs.asm}
 was reduced from 15 instructions to 13 instructions (compared
 to Mesa's R500 vertex shader compiler's 27 instructions).
 \section{Progress: 29 Oct 2025}
 From 27 Oct 2025 to 29 Oct 2025, I achieved the following (roughly in chronological order):
 \begin{itemize}
 \item I implemented support for \href{https://git.idk.st/bilbo/r500/commit/9aecbbfc6f297ea71c72f4c4fba1b8107be95ca1}{``multiple render targets''} in the fragment shader assembler
 \item I wrote a \href{https://git.idk.st/bilbo/r500/src/commit/18b7a593bd/drm/texture_blur_horizontal.fs.asm}{gaussian blur fragment shader}
 \item I made a demo that draws \href{https://git.idk.st/bilbo/r500/src/commit/18b7a593bd/drm/pumpkin_man.c#L963}{multiple 3D ``objects''} where each object's UV coordinates sample a \href{https://git.idk.st/bilbo/r500/src/commit/18b7a593bd/drm/pumpkin_man.c#L1029-L1069}{different} \href{https://git.idk.st/bilbo/r500/src/commit/18b7a593bd/drm/pumpkin_man.c#L314}{texture}
 \item I did several experiments related to R500's Z-buffer implementation
 \end{itemize}
 \subsection{Z-buffer experiments}
 \label{sec:z-buffer-experiments}
 Though I produced a ``properly'' Z-buffered 3D cube demo previously, I felt I
 did not fully understand the relationship between Z coordinates, W coordinates,
 viewport transformations, and the actual values that are written the the
 Z-buffer. At some point, I'd like to write fragment shaders that sample the
 Z-buffer, so I feel I need to understand this more rigorously.
 For comparison, Sega Dreamcast stores 32-bit floating-point values in the
 ``depth accumulation buffer''. This effectively means that any Z coordinates can
 be stored in the depth accumulation buffer without scaling or range
 remapping. I've made several
 \href{https://az1.idk.st/public/20kdm2-demo.mp4}{moderately fancy} Dreamcast
 demos in that happily store arbitrary ``view space'' Z values in the depth
 accumulation buffer without any visible depth aliasing/artifacts.
 In contrast, the Radeon R500 does not have a 32-bit floating point Z-buffer
 format. Instead, R500 supports (\href{doc/R5xx_Acceleration_v1.5.pdf}{R5xx\_Acceleration\_v1.5.pdf}, page 283,
 \texttt{ZB\_FORMAT}):
 \begin{itemize}
 \item 16-bit integer Z
 \item 16-bit floating point
 \item 24-bit integer Z with 8-bit stencil
 \end{itemize}
 The third option, with the most bits, clearly ought to give the most
 precision--with the caveat that the Z values that are written to the Z-buffer
 should be scaled to be uniformly distributed across the range of 24-bit integers.
 I performed several tests with variations of
 \href{https://git.idk.st/bilbo/r500/src/branch/main/drm/zbuffer_test.c}{zbuffer\_test.c}. The
 general strategy was:
 \begin{itemize}
 \item Define some contrived/illustrative 3D scene
 \item Manipulate the scale/range of Z and W values
 \item Observe the state of the Z-buffer after rendering
 \end{itemize}
 The first scene I chose was of a tilted plane that is non-coplanar with the view
 space XY plane, as in:
 \begin{figure}
  \href{images/plane_scene.png}{\includegraphics{images/plane_scene.png}}
  \caption*{Blender screenshot, ``plane scene''}
 \end{figure}
 Where the grey plane is the object that is to be rendered, the yellow lines
 represent a ``camera'' from which the plane is to be viewed, and the blue line
 represents the view/clip-space Z axis.
 To view the content of the Z buffer, I wrote a
 \href{https://git.idk.st/bilbo/r500/src/commit/18b7a593bd/tools/zbuf_decode.py}{simple script}
 to convert the 24-bit integer Z-buffer to 16-bit
 \href{https://en.wikipedia.org/wiki/Netpbm}{PGM},
 so that it can be easily viewed in an image editor. This tool also shows the
 minimum and maximum values found in the Z-buffer, intended to help verify that
 the entire numeric range of the Z-buffer is being used.
 While I expected to see the (orthographic, directly facing the camera) plane
 drawn on the Z-buffer as a smooth gradient such as:
 \begin{figure}
  \href{images/z_buffer_gradient.png}{\includegraphics{images/z_buffer_gradient.png}}
  \caption*{R500 framebuffer capture, \texttt{z\_buffer\_gradient.png}}
 \end{figure}
 Several of my tests displayed numeric aliasing, overflows, underflows, etc..:
 \begin{figure}
  \href{images/z_buffer_overflow.png}{\includegraphics{images/z_buffer_overflow.png}}
  \caption*{R500 framebuffer capture, \texttt{z\_buffer\_overflow.png}}
 \end{figure}
 Of particular interest to me was to verify the behavior of the
 \texttt{DX\_CLIP\_SPACE\_DEF} bit
 (\href{doc/R5xx_Acceleration_v1.5.pdf}{R5xx\_Acceleration\_v1.5.pdf}, page
 255--this is also the only place in the entire manual where ``non-user'' clip
 planes are even defined), and to understand the order of pipeline operations.
 I played with moving the plane around, to observe clipping behavior (here the
 lower half of the scene was clipped due to intersecting the Z=+1.0 clip plane):
 \begin{figure}
  \href{images/z_buffer_clipped.png}{\includegraphics{images/z_buffer_clipped.png}}
  \caption*{R500 framebuffer capture, \texttt{z\_buffer\_clipped.png}\\
    (also simultaneously showing overflow/underflow artifacts)}
 \end{figure}
 Thinking at this point that I nearly understood most of the pieces, I then
 re-enabled XY perspective division:
 \begin{figure}
  \href{images/z_buffer_perspective.png}{\includegraphics{images/z_buffer_perspective.png}}
  \caption*{R500 framebuffer capture, \texttt{z\_buffer\_perspective.png}}
 \end{figure}
 The above image was not quite what I wanted: I noticed the range of the Z buffer
 values were roughly between \texttt{0} and \texttt{8388607}, but what I really
 wanted was \texttt{0} to \texttt{16777215}. Adjusting scale again produced this
 Z-buffer:
 \begin{figure}
  \href{images/z_buffer_perspective_scale.png}{\includegraphics{images/z_buffer_perspective_scale.png}}
  \caption*{R500 framebuffer capture, \texttt{z\_buffer\_perspective\_scale.png}}
 \end{figure}
 Up to this point, I was using \texttt{ZFUNC=GREATER} with a Z-buffer cleared
 with an initial depth of zero, where all Z values are negative numbers.
 I decided it might be more intuitive to use a Z-buffer that is cleared with an
 initial depth of one, using \texttt{ZFUNC=LESS} instead where all Z values are
 positive numbers.
 With these adjustments, I captured a Z-buffer from the earlier cube demo:
 \begin{figure}
  \href{images/z_buffer_cube.png}{\includegraphics{images/z_buffer_cube.png}}
  \caption*{R500 framebuffer capture, \texttt{z\_buffer\_cube.png}}
 \end{figure}
 This was still not quite ``correct'', because the minimum depth of the cube is
 being drawn as \textasciitilde{}\texttt{2763306} (\textasciitilde{}0.16), but I expected
 something closer to zero.
 Adjusting my range/scale arithmetic again produced this image:
 \begin{figure}
  \href{images/z_buffer_cube_range.png}{\includegraphics{images/z_buffer_cube_range.png}}
  \caption*{R500 framebuffer capture, \texttt{z\_buffer\_cube\_range.png}}
 \end{figure}
 The minimum Z value now appears to be closer to zero, but the ``back'' faces of
 the cube (and maximum Z values) are not visible. Without changing any
 scale/range constants, inverting \texttt{ZFUNC} and using a zero-initialized
 Z-buffer produced this image of the back faces of the cube:
 \begin{figure}
  \href{images/z_buffer_cube_range_back.png}{\includegraphics{images/z_buffer_cube_range_back.png}}
  \caption*{R500 framebuffer capture, \texttt{z\_buffer\_cube\_range\_back.png}}
 \end{figure}
 Indeed, the maximum Z value is close to \textasciitilde{}\texttt{16777215}
 (\textasciitilde{}1.0), as intended. I feel at this point I have a better intuition
 for using integer Z-buffers. The pipeline (and relevant registers) appears to be
 something like this:
 \begin{figure}
  \includegraphics{diagrams/z_operations.svg}
  \caption*{R500 Z transform pipeline (simplified)}
 \end{figure}
 Prior to these experiments, I was not aware \texttt{SU\_DEPTH\_SCALE} is the
 thing directly responsible for scaling floating point Z values to the integer Z
 values stored in the depth buffer.
 In general, the hardware perspective divide, viewport transform, clipping, and
 setup units are absolutely fascinating.
 \subsection{3D perspective}
 Despite making many 3D demos in the past, I feel that every time I want to
 ``draw something 3D'' on a new platform, I need to re-relearn 3D/perspective
 transformations, (perhaps because I never truly \textit{learned} anything).
 In many OpenGL articles/tutorials/books the
 \href{https://learnopengl.com/Getting-started/Coordinate-Systems}{standard}
 \href{https://ogldev.org/www/tutorial12/tutorial12.html}{formula} for
 \href{https://songho.ca/opengl/gl_projectionmatrix.html}{explaining}
 \href{https://www.scratchapixel.com/lessons/3d-basic-rendering/perspective-and-orthographic-projection-matrix/opengl-perspective-projection-matrix.html}{perspective}
 \href{https://learnwebgl.brown37.net/08_projections/projections_perspective.html}{projection}
 appears to be:
 \begin{itemize}
 \item Begin with an overly-academic explanation of perspective in terms of camera optics and trigonometry
 \item Do not implement or demonstrate the any of the systems or mathematics
  described in the preceding pages of explanations; intead abruptly hide all
  magic behind \texttt{glm::perspective}
 \item Refuse to explain or clarify further
 \item Continue for the next 30 chapters/articles without ever revisiting focal
  length, view frustums, depth of field, etc.. again
 \end{itemize}
 It is sufficient to instead rationalize/implement ``perspective'' as:
 \begin{quote}
  Perspective is the division of X and Y coordinates by Z, where the coordinate
  $(0, 0, 0)$ is the view origin (and the center of the screen/projection).
 \end{quote}
 Defining perspective this way also works for OpenGL, with some slight
 adjustment, notably to deal with OpenGL's
 \href{https://registry.khronos.org/OpenGL/specs/gl/glspec20.pdf}{definition} of
 ``normalized device coordinates''.
 I note that (unlike Dreamcast) one can't actually divide by Z on R500 (nor
 OpenGL), both because the VTE doesn't support this, and because the texture
 unit doesn't support this. Of course, I tried it anyway:
 \begin{figure}
  \includegraphics{videos/cube_warped_textures.png}
  \caption*{R500 DVI capture, \texttt{texture\_cube\_warping.c} \\
    (unrelated to this demo, R500 also interestingly has a dedicated ``disable perspective-correct texture mapping'' bit)}
 \end{figure}
 Instead, in both cases, the R500 uses the W coordinate for division. This turns
 out to be very convenient, because it means that that the ``field of
 view''/perspective scale (W) and the Z-buffer/depth test scale (Z) can be
 adjusted independently.
 \subsection{3D clipping}
 Here are several examples of improperly scaled Z values, which are being clipped
 by the setup unit:
 \begin{figure}
  \includegraphics{videos/cube_clipped_far.png}
  \caption*{R500 DVI capture, \texttt{texture\_cube\_clear\_zwrite\_vertex\_shader\_optimize\_zscale.c} \\
  (``far'' clip plane intersection)}
 \end{figure}
 \begin{figure}
  \includegraphics{videos/cube_clipped_near.png}
  \caption*{R500 DVI capture, \texttt{texture\_cube\_clear\_zwrite\_vertex\_shader\_optimize\_zscale.c} \\
  (``near'' clip plane intersection)}
 \end{figure}
 \begin{figure}
  \includegraphics{videos/cube_clipped_near_opengl.png}
  \caption*{R500 DVI capture, \texttt{texture\_cube\_clear\_zwrite\_vertex\_shader\_optimize\_zscale.c} \\
  (I am curious to learn under what circumstances the OpenGL designers thought\\ $-w_{c} < z_{c} < w_{c}$ was a good idea)}
 \end{figure}
 \section{Progress: 31 Oct 2025}
 From 30 Oct 2025 to 31 Oct 2025, I achieved the following (non-chronological):
 \begin{itemize}
 \item I implemented a \href{https://git.idk.st/bilbo/r500/src/branch/main/drm/matrix_cubesphere_specular.fs.asm}{diffuse/specular lighting fragment shader} in R500 fragment shader assembly
 \item I made vertex shaders that represent coordinate space transformations
  using matrix multiplications rather than ad-hoc arithmetic
 \item While writing demos that pass multiple (interpolated) vectors from the
  vertex shader to the fragment shader, I learned more about \href{https://git.idk.st/bilbo/r500/src/commit/f43ac599f9/drm/matrix_cubesphere_specular_suzanne.cpp#L444-L512}{``rasterizer instructions''}
 \item I made a demo that uses more than one texture for the entire scene
  (by \href{https://git.idk.st/bilbo/r500/src/commit/f43ac599f9/drm/pumpkin_man.c#L272-L317}{reconfiguring
  the texture unit for each ``object''})
 \end{itemize}
 \subsection{Lighting demo}
 \begin{figure}
  \includegraphics{videos/suzanne.png}
  \caption*{R500 DVI capture, \texttt{matrix\_cubesphere\_specular\_suzanne.cpp} \\
  (subdivided Suzanne mesh, 15,744 triangles)}
 \end{figure}
 Despite being a ``simple'' lighting demo, a surprising number of things need to
 happen simultaneously before it becomes possible.
 Where vertex shaders from previous demos were passed at most a single scalar
 variable for animation/timing, the vertex shader in this demo uses
 \href{https://git.idk.st/bilbo/r500/src/commit/f43ac599f9/drm/matrix_cubesphere_specular_suzanne.cpp#L301-L326}{10 vectors} as
 input:
 \begin{itemize}
 \item 4 vectors for a ``local space to clip space'' transformation matrix
 \item 4 vectors for a ``local space to world space'' transformation matrix (used for lighting)
 \item 1 vector for a ``light position'' (in world space coordinates, used for lighting)
 \item 1 vector for a ``view origin'' (in world space coordinates, used for lighting)
 \end{itemize}
 Additionally, where previous demos passed at most a single vector from the
 vertex shader to the fragment shader (vertex color or texture coordinates), this
 demo passes
 \href{https://git.idk.st/bilbo/r500/src/commit/f43ac599f9/drm/matrix_cubesphere_specular_suzanne.cpp#L444-L512}{5 vectors}
 from the vertex shader to the fragment shader, all of which are used
 by the lighting calculation:
 \begin{itemize}
 \item world space position
 \item world space normal
 \item world space light position
 \item world space view origin
 \item uv space texture coordinates
 \end{itemize}
 \subsection{Learn algebra by writing fragment shader assembly}
 Prior to today, I did not know about this transformation/equivalence:
 \begin{gather*}
 x^{n} \iff 2^{\left( n\cdot\frac{\log(x)}{\log(2)} \right)}
 \end{gather*}
 While the R500 fragment shader alpha unit does not have a \texttt{POW} operation,
 it does have \href{https://git.idk.st/bilbo/r500/src/commit/f43ac599f9/drm/matrix_cubesphere_specular.fs.asm#L93-L99}{\texttt{EX2} and \texttt{LN2}}
 operations.
 For example, one could implement $a^{32}$ in R500 fragment shader assembly as:
 \begin{figure}
  \href{verbatim/pow_fragment_shader.fs.asm}{\includegraphics{verbatim/output/pow_fragment_shader.fs.asm.pdf}}
 \end{figure}
 This ``arbitrary exponents with arbitrary bases'' pattern is used in the
 lighting demo fragment shader as part of the ``specular intensity'' calculation.
 This fragment shader unit feature is very cool, because a software
 implementation of a generalized floating-point \texttt{pow} function is
 extremely
 \href{https://git.musl-libc.org/cgit/musl/tree/src/math/powf.c?id=cb5c057c87240a9534f8e0d9b7ff2560082f6218}{computationally expensive}
 otherwise.
 \end{document}
--- a/resize_svg.py
+++ b/resize_svg.py
@ -19,4 +19,4 @@ def transform():
 lines = list(transform())
 with open(sys.argv[1], 'w') as f:
-    f.write('\n'.join(lines))
+    f.write(''.join(lines))
--- a/verbatim/cube_rotate_const_move.vs.asm
+++ b/verbatim/cube_rotate_const_move.vs.asm
@ -0,0 +1,8 @@
 -- CONST[0] = {0.159155, 0.5, 6.283185, -3.141593}
 -- CONST[1] = {theta1, theta2, 0.2, 0.5}
 temp[0].xy   = VE_ADD  const[1].xy__ const[1].00__ ;
 temp[0].xy   = VE_MAD   temp[0].xy__   const[0].xx__  const[0].yy__ ;
 temp[0].xy   = VE_FRC   temp[0].xy__ ;
 temp[0].xy   = VE_MAD   temp[0].xy__   const[0].zz__  const[0].ww__ ;
--- a/verbatim/cube_rotate_const_move_illegal.vs.asm
+++ b/verbatim/cube_rotate_const_move_illegal.vs.asm
@ -0,0 +1,3 @@
 -- this is an illegal instruction:
 -- const[1] and const[0] can not be read simultaneously
 temp[0].xy   = VE_MAD   const[1].xy__   const[0].xx__  const[0].yy__ ;
--- a/verbatim/cube_rotate_const_move_rearrange.vs.asm
+++ b/verbatim/cube_rotate_const_move_rearrange.vs.asm
@ -0,0 +1,2 @@
 -- CONST[0] = {theta1, theta2, 0.159155, 0.5}
 -- CONST[1] = {6.283185, -3.141593, 0.2, 0.5}
--- a/verbatim/cube_rotate_const_move_rearrange_mad.vs.asm
+++ b/verbatim/cube_rotate_const_move_rearrange_mad.vs.asm
@ -0,0 +1,7 @@
 -- the VE_ADD instruction is now not necessary/deleted:
 -- temp[0].xy   = VE_ADD  const[1].xy__ const[1].00__ ;
 -- const addresses and swizzles changed:
 temp[0].xy   = VE_MAD   const[0].xy__  const[0].zz__  const[0].ww__ ;
 temp[0].xy   = VE_FRC   temp[0].xy__ ;
 temp[0].xy   = VE_MAD   temp[0].xy__   const[1].xx__  const[1].yy__ ;
--- a/verbatim/cube_rotate_dual_math.vs.asm
+++ b/verbatim/cube_rotate_dual_math.vs.asm
@ -0,0 +1,14 @@
 temp[3].x     = ME_SIN  temp[0].___x ;
 temp[3].y     = ME_COS  temp[0].___x ;
 alt_temp[3].z = ME_SIN  temp[0].___y ;
 -- first rotation
 temp[1].yz    = VE_MUL  input[0]._-zz_  temp[3]._xy_ ,
 alt_temp[3].w = ME_COS  temp[0].y_ ;
 temp[1].xyz   = VE_MAD  input[0].xyy_  temp[3].1yx_      temp[1].0yz_ ;
 -- second rotation
 temp[2].xz    = VE_MUL  temp[1].-z_z_  alt_temp[3].z_w_ ;
 temp[2].xyz   = VE_MAD  temp[1].xyx_   alt_temp[3].w1z_  temp[2].x0z_ ;
--- a/verbatim/cube_rotate_dual_math_single_instruction.vs.asm
+++ b/verbatim/cube_rotate_dual_math_single_instruction.vs.asm
@ -0,0 +1,2 @@
 temp[1].yz    = VE_MUL   input[0]._-zz_  temp[3]._xy_ ,
 alt_temp[3].w = ME_COS   temp[0].y_ ;
--- a/verbatim/cube_rotate_sin_cos.vs.asm
+++ b/verbatim/cube_rotate_sin_cos.vs.asm
@ -0,0 +1,14 @@
 temp[3].x     = ME_SIN  temp[0].___x ;
 temp[3].y     = ME_COS  temp[0].___x ;
 temp[3].z     = ME_SIN  temp[0].___y ;
 temp[3].w     = ME_COS  temp[0].___y ;
 -- first rotation
 temp[1].yz    = VE_MUL  input[0]._-zz_  temp[3]._xy_ ;
 temp[1].xyz   = VE_MAD  input[0].xyy_   temp[3].1yx_   temp[1].0yz_ ;
 -- second rotation
 temp[2].xz    = VE_MUL  temp[1].-z_z_   temp[3].z_w_ ;
 temp[2].xyz   = VE_MAD  temp[1].xyx_    temp[3].w1z_   temp[2].x0z_ ;
--- a/verbatim/pow_fragment_shader.fs.asm
+++ b/verbatim/pow_fragment_shader.fs.asm
@ -0,0 +1,12 @@
 -- a = log(a) / log(2)
 src0.a = temp[0] :
  temp[0].a = LN2 src0.a ;
 -- a = a * 32.0 + 0
 src0.a = temp[0] ,
 src1.a = float(96) :  -- 32.0 (or any other constant)
  temp[0].a = MAD src0.a src1.a src1.0 ;
 -- a = 2 ^ a
 src0.a = temp[0] :
  temp[0].a = EX2 src0.a ;
--- a/verbatim/r500_view_clip.c
+++ b/verbatim/r500_view_clip.c
@ -0,0 +1,6 @@
 VAP_VTE_CNTL__VPORT_Z_SCALE_ENA(0)
 VAP_VTE_CNTL__VPORT_Z_OFFSET_ENA(0)
 VAP_VTE_CNTL__VTX_XY_FMT(1)
 VAP_VTE_CNTL__VTX_Z_FMT(0)
 VAP_VTE_CNTL__VTX_W0_FMT(1)
 VAP_CNTL__DX_CLIP_SPACE_DEF(1)
--- a/verbatim/sin_operand_remap.vs.asm
+++ b/verbatim/sin_operand_remap.vs.asm
@ -0,0 +1,8 @@
 -- CONST[0] = {0.159155, 0.5, 6.283185, -3.141593}
 -- t = t * 0.159155 + 0.5
 temp[0].xy   = VE_MAD   temp[0].xy__   const[0].xx__  const[0].yy__ ;
 -- t = frac(t)
 temp[0].xy   = VE_FRC   temp[0].xy__ ;
 -- t = t * 6.283185 + -3.141593
 temp[0].xy   = VE_MAD   temp[0].xy__   const[0].zz__  const[0].ww__ ;
--- a/videos/cube_clipped_far.mp4
+++ b/videos/cube_clipped_far.mp4
--- a/videos/cube_clipped_far.png
+++ b/videos/cube_clipped_far.png
--- a/videos/cube_clipped_near.mp4
+++ b/videos/cube_clipped_near.mp4
--- a/videos/cube_clipped_near.png
+++ b/videos/cube_clipped_near.png
--- a/videos/cube_clipped_near_opengl.mp4
+++ b/videos/cube_clipped_near_opengl.mp4
--- a/videos/cube_clipped_near_opengl.png
+++ b/videos/cube_clipped_near_opengl.png
--- a/videos/cube_warped_textures.mp4
+++ b/videos/cube_warped_textures.mp4
--- a/videos/cube_warped_textures.png
+++ b/videos/cube_warped_textures.png
--- a/videos/suzanne.mp4
+++ b/videos/suzanne.mp4
--- a/videos/suzanne.png
+++ b/videos/suzanne.png
		`@ -0,0 +1,3 @@`
							`dot -Tsvg z_operations.dot > z_operations.svg`

							`#sed -i 's/scale(1 1)/scale(0.75 0.75)/g' z_operations.svg`
		`@ -0,0 +1,2 @@`
							`-- CONST[0] = {theta1, theta2, 0.159155, 0.5}`
							`-- CONST[1] = {6.283185, -3.141593, 0.2, 0.5}`
		`@ -0,0 +1,2 @@`
							`temp[1].yz = VE_MUL input[0]._-zz_ temp[3]._xy_ ,`
							`alt_temp[3].w = ME_COS temp[0].y_ ;`