october 31 update
2
.gitignore
vendored
@ -19,3 +19,5 @@ verbatim/*.svg
|
|||||||
verbatim/*.pdf
|
verbatim/*.pdf
|
||||||
verbatim/output
|
verbatim/output
|
||||||
images/*.data
|
images/*.data
|
||||||
|
/index*.svg
|
||||||
|
diagrams/*-.svg
|
||||||
11
build.sh
@ -15,10 +15,17 @@ echo 'figure.figure { margin-left: 20px; margin-right: 20px; }' >> index.css
|
|||||||
echo 'pre.verbatim { font-size: 0.9em; }' >> index.css
|
echo 'pre.verbatim { font-size: 0.9em; }' >> index.css
|
||||||
sed -i 's|color-scheme: light dark;||g' index.css
|
sed -i 's|color-scheme: light dark;||g' index.css
|
||||||
echo 'figcaption.caption { margin-bottom: 1.3em; margin-top: 0.3em; }' >> index.css
|
echo 'figcaption.caption { margin-bottom: 1.3em; margin-top: 0.3em; }' >> index.css
|
||||||
|
echo '.cmti-10 { font-style: italic; }' >> index.css
|
||||||
|
|
||||||
sed -i 's/index.css/index2.css/g' index.html
|
sed -i 's/˜/~/g' index.html
|
||||||
|
|
||||||
mv index.css index2.css
|
sed -i "s|<p class='noindent'><object class='graphics' data='diagrams/z_operations.svg' name='picture diagrams/z_operations' type='image/svg+xml'></object>|<p class='noindent' style='text-align: center;'><object class='graphics' style='width: 40em;' data='diagrams/z_operations.svg' name='picture diagrams/z_operations' type='image/svg+xml'></object>|g" index.html
|
||||||
|
|
||||||
|
sed -i '/height: 2.5em;/d' index.css
|
||||||
|
|
||||||
|
sed -i 's/index.css/index3.css/g' index.html
|
||||||
|
|
||||||
|
mv index.css index3.css
|
||||||
|
|
||||||
python replace_video.py index.html
|
python replace_video.py index.html
|
||||||
|
|
||||||
|
|||||||
3
diagrams/build.sh
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
dot -Tsvg z_operations.dot > z_operations.svg
|
||||||
|
|
||||||
|
#sed -i 's/scale(1 1)/scale(0.75 0.75)/g' z_operations.svg
|
||||||
31
diagrams/resize_dot_svg.py
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
import sys
|
||||||
|
|
||||||
|
scale = 0.75
|
||||||
|
|
||||||
|
def scale_svg(lines):
|
||||||
|
svg = "".join(lines)
|
||||||
|
head, viewbox = svg.split("viewBox=\"", maxsplit=1)
|
||||||
|
viewbox, tail = viewbox.split('"', maxsplit=1)
|
||||||
|
x, y, width, height = map(float, viewbox.split())
|
||||||
|
yield head
|
||||||
|
yield f'viewBox="{x} {y} {width * scale} {height * scale}"'
|
||||||
|
yield tail
|
||||||
|
|
||||||
|
def transform():
|
||||||
|
with open(sys.argv[1]) as f:
|
||||||
|
svg_lines = []
|
||||||
|
|
||||||
|
for line in f.readlines():
|
||||||
|
if line.strip().startswith("<svg"):
|
||||||
|
svg_lines.append(line)
|
||||||
|
elif svg_lines != []:
|
||||||
|
svg_lines.append(line)
|
||||||
|
if line.strip().endswith(">"):
|
||||||
|
yield from scale_svg(svg_lines)
|
||||||
|
svg_lines = []
|
||||||
|
else:
|
||||||
|
yield line
|
||||||
|
|
||||||
|
lines = list(transform())
|
||||||
|
with open(sys.argv[1], 'w') as f:
|
||||||
|
f.write(''.join(lines))
|
||||||
BIN
diagrams/sin_clamp.pdf
Normal file
34
diagrams/sin_clamp.tex
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
\documentclass[varwidth=13.1cm, border={0.0cm 0.0cm 0.0cm 0.0cm}]{standalone}
|
||||||
|
\usepackage{tikz}
|
||||||
|
\usepackage[dvipsnames]{xcolor}
|
||||||
|
\usepackage{pgfplots}
|
||||||
|
\pgfplotsset{compat=1.18}
|
||||||
|
\usepackage{amsmath}
|
||||||
|
\newcommand{\Clamp}[1]{\operatorname{clamp}#1}
|
||||||
|
|
||||||
|
\begin{document}
|
||||||
|
|
||||||
|
\begin{tikzpicture}[scale=0.5]
|
||||||
|
|
||||||
|
\draw[very thin,color=gray] (-pi * 3,-pi * 1.2) grid (pi * 3, pi * 1.2);
|
||||||
|
\draw[->] (-3.2*pi,0) -- (3.2*pi,0) node[right] {$x$};
|
||||||
|
\draw[->] (0,-pi * 1.4) -- (0,pi * 1.5) node[above] {$f(x)$};
|
||||||
|
|
||||||
|
\draw[thick, color=NavyBlue] plot [domain=-pi * 3:pi * 3, samples=100] (\x, {min(max(\x, -pi), pi)} );
|
||||||
|
\draw[thick, color=OrangeRed] plot [domain=-pi * 3:pi * 3, samples=1000] (\x, {sin(min(max(\x, -pi), pi) r)} );
|
||||||
|
|
||||||
|
\node[NavyBlue] at (0, -5.4) {$f(x) = \Clamp(x, -\pi, +\pi) $};
|
||||||
|
\node[OrangeRed] at (0, -6.6) {$f(x) = \sin(\Clamp(x, -\pi, +\pi)) $};
|
||||||
|
|
||||||
|
\draw [dashed, color=ForestGreen] (-2 * pi,-3.8) -- (-2 * pi,3.8) node[above] {$x=-2\pi$};
|
||||||
|
\draw [dashed, color=Brown] (2 * pi,-3.8) -- (2 * pi,3.8) node[above] {$x=2\pi$};
|
||||||
|
|
||||||
|
\draw [dashed, color=Fuchsia] (-3.0,pi) -- (3.0,pi) ;
|
||||||
|
\draw [color=Fuchsia] (0, pi + 0.5) node {$y=\pi$};
|
||||||
|
|
||||||
|
\draw [dashed, color=Peach] (-3.0,-pi) -- (3.0,-pi) ;
|
||||||
|
\draw [color=Peach] (0, -pi + 0.5) node {$y=-\pi$};
|
||||||
|
|
||||||
|
\end{tikzpicture}
|
||||||
|
|
||||||
|
\end{document}
|
||||||
BIN
diagrams/sin_frac.pdf
Normal file
38
diagrams/sin_frac.tex
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
\documentclass[varwidth=13.1cm, border={0.0cm 0.0cm 0.0cm 0.0cm}]{standalone}
|
||||||
|
\usepackage{tikz}
|
||||||
|
\usepackage[dvipsnames]{xcolor}
|
||||||
|
\usepackage{pgfplots}
|
||||||
|
\pgfplotsset{compat=1.18}
|
||||||
|
\usepackage{amsmath}
|
||||||
|
\newcommand{\Frac}[1]{\operatorname{frac}#1}
|
||||||
|
|
||||||
|
\begin{document}
|
||||||
|
|
||||||
|
\begin{tikzpicture}[scale=0.5]
|
||||||
|
|
||||||
|
\draw[very thin,color=gray] (-pi * 3,-pi * 1.2) grid (pi * 3, pi * 1.2);
|
||||||
|
\draw[->] (-3.2*pi,0) -- (3.2*pi,0) node[right] {$x$};
|
||||||
|
\draw[->] (0,-pi * 1.4) -- (0,pi * 1.5) node[above] {$f(x)$};
|
||||||
|
|
||||||
|
\foreach \i in {-2, 0, 2}{
|
||||||
|
\pgfmathsetmacro{\start}{(\i - 1) * pi}
|
||||||
|
\pgfmathsetmacro{\end} {(\i + 1) * pi}
|
||||||
|
\draw[thick, color=NavyBlue] plot [domain=\start:\end, samples=100] (\x, {((\x * 1/(2 * pi) + 0.5) - floor(\x * 1/(2 * pi) + 0.5)) * 2 * pi - pi} );
|
||||||
|
}
|
||||||
|
\draw[thick, color=OrangeRed] plot [domain=-pi * 3:pi * 3, samples=1000] (\x, {sin((((\x * 1/(2 * pi) + 0.5) - floor(\x * 1/(2 * pi) + 0.5)) * 2 * pi - pi) r)} );
|
||||||
|
|
||||||
|
\node[NavyBlue] at (0, -5.4) {$f(x) = \Frac(x \cdot \frac{1}{2\pi}+0.5) \cdot 2\pi - \pi $};
|
||||||
|
\node[OrangeRed] at (0, -6.6) {$f(x) = \sin( \Frac(x \cdot \frac{1}{2\pi}+0.5) \cdot 2\pi - \pi ) $};
|
||||||
|
|
||||||
|
\draw [dashed, color=ForestGreen] (-2 * pi,-3.8) -- (-2 * pi,3.8) node[above] {$x=-2\pi$};
|
||||||
|
\draw [dashed, color=Brown] (2 * pi,-3.8) -- (2 * pi,3.8) node[above] {$x=2\pi$};
|
||||||
|
|
||||||
|
\draw [dashed, color=Fuchsia] (-3.0,pi) -- (3.0,pi) ;
|
||||||
|
\draw [color=Fuchsia] (0, pi + 0.5) node {$y=\pi$};
|
||||||
|
|
||||||
|
\draw [dashed, color=Peach] (-3.0,-pi) -- (3.0,-pi) ;
|
||||||
|
\draw [color=Peach] (0, -pi + 0.5) node {$y=-\pi$};
|
||||||
|
|
||||||
|
\end{tikzpicture}
|
||||||
|
|
||||||
|
\end{document}
|
||||||
60
diagrams/z_operations.dot
Normal file
@ -0,0 +1,60 @@
|
|||||||
|
digraph G {
|
||||||
|
|
||||||
|
vertex_shader [label="(from the vertex shader)"]
|
||||||
|
|
||||||
|
subgraph cluster_clipping {
|
||||||
|
label = "clipping"
|
||||||
|
DX_CLIP_SPACE_DEF [label="DX_CLIP_SPACE_DEF
|
||||||
|
possibly clip the polygon"]
|
||||||
|
}
|
||||||
|
|
||||||
|
subgraph cluster_perspective {
|
||||||
|
label = "perspective division"
|
||||||
|
|
||||||
|
VTX_Z_FMT [nojustify=true label="VTX_Z_FMT
|
||||||
|
(if enabled) divide Z by W"]
|
||||||
|
}
|
||||||
|
|
||||||
|
subgraph cluster_viewport_transformation {
|
||||||
|
label = "viewport transformation"
|
||||||
|
|
||||||
|
VPORT_Z_SCALE
|
||||||
|
VPORT_Z_OFFSET
|
||||||
|
}
|
||||||
|
|
||||||
|
subgraph cluster_geometry_assembly {
|
||||||
|
}
|
||||||
|
|
||||||
|
subgraph cluster_setup_unit {
|
||||||
|
label = "setup unit"
|
||||||
|
|
||||||
|
SU_DEPTH_SCALE
|
||||||
|
SU_DEPTH_OFFSET
|
||||||
|
}
|
||||||
|
|
||||||
|
subgraph cluster_zfunc {
|
||||||
|
label = "ZFUNC"
|
||||||
|
{ rank=same
|
||||||
|
depth_test [shape=box label="depth test"]
|
||||||
|
depth_pass [shape=box label="depth pass"]
|
||||||
|
}
|
||||||
|
depth_test -> depth_pass
|
||||||
|
}
|
||||||
|
|
||||||
|
Z_BUFFER [shape=invhouse label="(write the new Z
|
||||||
|
value to the Z-buffer)"]
|
||||||
|
|
||||||
|
fragment_shader [label="(to the fragment shader)"]
|
||||||
|
|
||||||
|
vertex_shader -> DX_CLIP_SPACE_DEF
|
||||||
|
DX_CLIP_SPACE_DEF -> VTX_Z_FMT
|
||||||
|
VTX_Z_FMT -> VPORT_Z_SCALE
|
||||||
|
VPORT_Z_SCALE -> VPORT_Z_OFFSET
|
||||||
|
VPORT_Z_OFFSET -> SU_DEPTH_SCALE
|
||||||
|
SU_DEPTH_SCALE -> SU_DEPTH_OFFSET
|
||||||
|
SU_DEPTH_OFFSET -> depth_test
|
||||||
|
depth_test -> Z_BUFFER
|
||||||
|
|
||||||
|
VPORT_Z_OFFSET -> depth_pass
|
||||||
|
depth_pass -> fragment_shader
|
||||||
|
}
|
||||||
173
diagrams/z_operations.svg
Normal file
@ -0,0 +1,173 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||||
|
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
|
||||||
|
"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
|
||||||
|
<!-- Generated by graphviz version 12.2.1 (20241206.2353)
|
||||||
|
-->
|
||||||
|
<!-- Title: G Pages: 1 -->
|
||||||
|
<svg width="588pt" height="765pt"
|
||||||
|
viewBox="0.00 0.00 588.26 764.75" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
|
||||||
|
<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 760.75)">
|
||||||
|
<title>G</title>
|
||||||
|
<polygon fill="white" stroke="none" points="-4,4 -4,-760.75 584.26,-760.75 584.26,4 -4,4"/>
|
||||||
|
<g id="clust1" class="cluster">
|
||||||
|
<title>cluster_clipping</title>
|
||||||
|
<polygon fill="none" stroke="black" points="94.4,-611.4 94.4,-712.75 382.4,-712.75 382.4,-611.4 94.4,-611.4"/>
|
||||||
|
<text text-anchor="middle" x="238.4" y="-695.45" font-family="Times,serif" font-size="14.00">clipping</text>
|
||||||
|
</g>
|
||||||
|
<g id="clust2" class="cluster">
|
||||||
|
<title>cluster_perspective</title>
|
||||||
|
<polygon fill="none" stroke="black" points="89.4,-502.04 89.4,-603.4 387.4,-603.4 387.4,-502.04 89.4,-502.04"/>
|
||||||
|
<text text-anchor="middle" x="238.4" y="-586.1" font-family="Times,serif" font-size="14.00">perspective division</text>
|
||||||
|
</g>
|
||||||
|
<g id="clust3" class="cluster">
|
||||||
|
<title>cluster_viewport_transformation</title>
|
||||||
|
<polygon fill="none" stroke="black" points="125.4,-344.79 125.4,-494.04 351.4,-494.04 351.4,-344.79 125.4,-344.79"/>
|
||||||
|
<text text-anchor="middle" x="238.4" y="-476.74" font-family="Times,serif" font-size="14.00">viewport transformation</text>
|
||||||
|
</g>
|
||||||
|
<g id="clust5" class="cluster">
|
||||||
|
<title>cluster_setup_unit</title>
|
||||||
|
<polygon fill="none" stroke="black" points="101.4,-187.54 101.4,-336.79 347.4,-336.79 347.4,-187.54 101.4,-187.54"/>
|
||||||
|
<text text-anchor="middle" x="224.4" y="-319.49" font-family="Times,serif" font-size="14.00">setup unit</text>
|
||||||
|
</g>
|
||||||
|
<g id="clust6" class="cluster">
|
||||||
|
<title>cluster_zfunc</title>
|
||||||
|
<polygon fill="none" stroke="black" points="187.4,-102.29 187.4,-179.54 405.4,-179.54 405.4,-102.29 187.4,-102.29"/>
|
||||||
|
<text text-anchor="middle" x="296.4" y="-162.24" font-family="Times,serif" font-size="14.00">ZFUNC</text>
|
||||||
|
</g>
|
||||||
|
<!-- vertex_shader -->
|
||||||
|
<g id="node1" class="node">
|
||||||
|
<title>vertex_shader</title>
|
||||||
|
<ellipse fill="none" stroke="black" cx="238.4" cy="-738.75" rx="134.33" ry="18"/>
|
||||||
|
<text text-anchor="middle" x="238.4" y="-734.08" font-family="Times,serif" font-size="14.00">(from the vertex shader)</text>
|
||||||
|
</g>
|
||||||
|
<!-- DX_CLIP_SPACE_DEF -->
|
||||||
|
<g id="node2" class="node">
|
||||||
|
<title>DX_CLIP_SPACE_DEF</title>
|
||||||
|
<ellipse fill="none" stroke="black" cx="238.4" cy="-649.45" rx="136.47" ry="30.05"/>
|
||||||
|
<text text-anchor="middle" x="238.4" y="-653.4" font-family="Times,serif" font-size="14.00">DX_CLIP_SPACE_DEF</text>
|
||||||
|
<text text-anchor="middle" x="238.4" y="-636.15" font-family="Times,serif" font-size="14.00">possibly clip the polygon</text>
|
||||||
|
</g>
|
||||||
|
<!-- vertex_shader->DX_CLIP_SPACE_DEF -->
|
||||||
|
<g id="edge2" class="edge">
|
||||||
|
<title>vertex_shader->DX_CLIP_SPACE_DEF</title>
|
||||||
|
<path fill="none" stroke="black" d="M238.4,-720.5C238.4,-712.05 238.4,-701.49 238.4,-691.15"/>
|
||||||
|
<polygon fill="black" stroke="black" points="241.9,-691.21 238.4,-681.21 234.9,-691.21 241.9,-691.21"/>
|
||||||
|
</g>
|
||||||
|
<!-- VTX_Z_FMT -->
|
||||||
|
<g id="node3" class="node">
|
||||||
|
<title>VTX_Z_FMT</title>
|
||||||
|
<ellipse fill="none" stroke="black" cx="238.4" cy="-540.09" rx="141.24" ry="30.05"/>
|
||||||
|
<text text-anchor="middle" x="238.4" y="-544.04" font-family="Times,serif" font-size="14.00">VTX_Z_FMT</text>
|
||||||
|
<text text-anchor="middle" x="238.4" y="-526.79" font-family="Times,serif" font-size="14.00">(if enabled) divide Z by W</text>
|
||||||
|
</g>
|
||||||
|
<!-- DX_CLIP_SPACE_DEF->VTX_Z_FMT -->
|
||||||
|
<g id="edge3" class="edge">
|
||||||
|
<title>DX_CLIP_SPACE_DEF->VTX_Z_FMT</title>
|
||||||
|
<path fill="none" stroke="black" d="M238.4,-619.11C238.4,-607.63 238.4,-594.27 238.4,-581.88"/>
|
||||||
|
<polygon fill="black" stroke="black" points="241.9,-581.91 238.4,-571.91 234.9,-581.91 241.9,-581.91"/>
|
||||||
|
</g>
|
||||||
|
<!-- VPORT_Z_SCALE -->
|
||||||
|
<g id="node4" class="node">
|
||||||
|
<title>VPORT_Z_SCALE</title>
|
||||||
|
<ellipse fill="none" stroke="black" cx="238.4" cy="-442.79" rx="97.51" ry="18"/>
|
||||||
|
<text text-anchor="middle" x="238.4" y="-438.12" font-family="Times,serif" font-size="14.00">VPORT_Z_SCALE</text>
|
||||||
|
</g>
|
||||||
|
<!-- VTX_Z_FMT->VPORT_Z_SCALE -->
|
||||||
|
<g id="edge4" class="edge">
|
||||||
|
<title>VTX_Z_FMT->VPORT_Z_SCALE</title>
|
||||||
|
<path fill="none" stroke="black" d="M238.4,-509.72C238.4,-497.95 238.4,-484.43 238.4,-472.7"/>
|
||||||
|
<polygon fill="black" stroke="black" points="241.9,-472.8 238.4,-462.8 234.9,-472.8 241.9,-472.8"/>
|
||||||
|
</g>
|
||||||
|
<!-- VPORT_Z_OFFSET -->
|
||||||
|
<g id="node5" class="node">
|
||||||
|
<title>VPORT_Z_OFFSET</title>
|
||||||
|
<ellipse fill="none" stroke="black" cx="238.4" cy="-370.79" rx="104.87" ry="18"/>
|
||||||
|
<text text-anchor="middle" x="238.4" y="-366.12" font-family="Times,serif" font-size="14.00">VPORT_Z_OFFSET</text>
|
||||||
|
</g>
|
||||||
|
<!-- VPORT_Z_SCALE->VPORT_Z_OFFSET -->
|
||||||
|
<g id="edge5" class="edge">
|
||||||
|
<title>VPORT_Z_SCALE->VPORT_Z_OFFSET</title>
|
||||||
|
<path fill="none" stroke="black" d="M238.4,-424.49C238.4,-417.2 238.4,-408.52 238.4,-400.33"/>
|
||||||
|
<polygon fill="black" stroke="black" points="241.9,-400.41 238.4,-390.41 234.9,-400.41 241.9,-400.41"/>
|
||||||
|
</g>
|
||||||
|
<!-- SU_DEPTH_SCALE -->
|
||||||
|
<g id="node6" class="node">
|
||||||
|
<title>SU_DEPTH_SCALE</title>
|
||||||
|
<ellipse fill="none" stroke="black" cx="227.4" cy="-285.54" rx="107.5" ry="18"/>
|
||||||
|
<text text-anchor="middle" x="227.4" y="-280.87" font-family="Times,serif" font-size="14.00">SU_DEPTH_SCALE</text>
|
||||||
|
</g>
|
||||||
|
<!-- VPORT_Z_OFFSET->SU_DEPTH_SCALE -->
|
||||||
|
<g id="edge6" class="edge">
|
||||||
|
<title>VPORT_Z_OFFSET->SU_DEPTH_SCALE</title>
|
||||||
|
<path fill="none" stroke="black" d="M236.12,-352.54C234.69,-341.72 232.81,-327.49 231.16,-315.02"/>
|
||||||
|
<polygon fill="black" stroke="black" points="234.65,-314.73 229.87,-305.27 227.72,-315.64 234.65,-314.73"/>
|
||||||
|
</g>
|
||||||
|
<!-- depth_pass -->
|
||||||
|
<g id="node9" class="node">
|
||||||
|
<title>depth_pass</title>
|
||||||
|
<polygon fill="none" stroke="black" points="397.4,-146.29 303.4,-146.29 303.4,-110.29 397.4,-110.29 397.4,-146.29"/>
|
||||||
|
<text text-anchor="middle" x="350.4" y="-123.62" font-family="Times,serif" font-size="14.00">depth pass</text>
|
||||||
|
</g>
|
||||||
|
<!-- VPORT_Z_OFFSET->depth_pass -->
|
||||||
|
<g id="edge10" class="edge">
|
||||||
|
<title>VPORT_Z_OFFSET->depth_pass</title>
|
||||||
|
<path fill="none" stroke="black" d="M321.07,-359.37C332.91,-354.32 343.68,-347.09 351.4,-336.79 390.89,-284.12 373.49,-200.91 360.3,-157.6"/>
|
||||||
|
<polygon fill="black" stroke="black" points="363.67,-156.65 357.29,-148.19 357,-158.78 363.67,-156.65"/>
|
||||||
|
</g>
|
||||||
|
<!-- SU_DEPTH_OFFSET -->
|
||||||
|
<g id="node7" class="node">
|
||||||
|
<title>SU_DEPTH_OFFSET</title>
|
||||||
|
<ellipse fill="none" stroke="black" cx="224.4" cy="-213.54" rx="114.87" ry="18"/>
|
||||||
|
<text text-anchor="middle" x="224.4" y="-208.87" font-family="Times,serif" font-size="14.00">SU_DEPTH_OFFSET</text>
|
||||||
|
</g>
|
||||||
|
<!-- SU_DEPTH_SCALE->SU_DEPTH_OFFSET -->
|
||||||
|
<g id="edge7" class="edge">
|
||||||
|
<title>SU_DEPTH_SCALE->SU_DEPTH_OFFSET</title>
|
||||||
|
<path fill="none" stroke="black" d="M226.66,-267.24C226.35,-259.95 225.97,-251.27 225.62,-243.08"/>
|
||||||
|
<polygon fill="black" stroke="black" points="229.12,-243 225.2,-233.16 222.13,-243.3 229.12,-243"/>
|
||||||
|
</g>
|
||||||
|
<!-- depth_test -->
|
||||||
|
<g id="node8" class="node">
|
||||||
|
<title>depth_test</title>
|
||||||
|
<polygon fill="none" stroke="black" points="285.15,-146.29 195.65,-146.29 195.65,-110.29 285.15,-110.29 285.15,-146.29"/>
|
||||||
|
<text text-anchor="middle" x="240.4" y="-123.62" font-family="Times,serif" font-size="14.00">depth test</text>
|
||||||
|
</g>
|
||||||
|
<!-- SU_DEPTH_OFFSET->depth_test -->
|
||||||
|
<g id="edge8" class="edge">
|
||||||
|
<title>SU_DEPTH_OFFSET->depth_test</title>
|
||||||
|
<path fill="none" stroke="black" d="M227.72,-195.29C229.8,-184.47 232.53,-170.24 234.93,-157.77"/>
|
||||||
|
<polygon fill="black" stroke="black" points="238.35,-158.49 236.8,-148.01 231.48,-157.17 238.35,-158.49"/>
|
||||||
|
</g>
|
||||||
|
<!-- depth_test->depth_pass -->
|
||||||
|
<g id="edge1" class="edge">
|
||||||
|
<title>depth_test->depth_pass</title>
|
||||||
|
<path fill="none" stroke="black" d="M285.52,-128.29C287.52,-128.29 289.53,-128.29 291.53,-128.29"/>
|
||||||
|
<polygon fill="black" stroke="black" points="291.49,-131.79 301.49,-128.29 291.49,-124.79 291.49,-131.79"/>
|
||||||
|
</g>
|
||||||
|
<!-- Z_BUFFER -->
|
||||||
|
<g id="node10" class="node">
|
||||||
|
<title>Z_BUFFER</title>
|
||||||
|
<polygon fill="none" stroke="black" points="0,-25.67 146.4,0 292.8,-25.67 292.66,-67.2 0.14,-67.2 0,-25.67"/>
|
||||||
|
<text text-anchor="middle" x="146.4" y="-41.1" font-family="Times,serif" font-size="14.00">(write the new Z</text>
|
||||||
|
<text text-anchor="middle" x="146.4" y="-23.85" font-family="Times,serif" font-size="14.00">value to the Z-buffer)</text>
|
||||||
|
</g>
|
||||||
|
<!-- depth_test->Z_BUFFER -->
|
||||||
|
<g id="edge9" class="edge">
|
||||||
|
<title>depth_test->Z_BUFFER</title>
|
||||||
|
<path fill="none" stroke="black" d="M222.28,-110.1C211.86,-100.23 198.36,-87.42 185.63,-75.35"/>
|
||||||
|
<polygon fill="black" stroke="black" points="188.19,-72.95 178.52,-68.61 183.37,-78.03 188.19,-72.95"/>
|
||||||
|
</g>
|
||||||
|
<!-- fragment_shader -->
|
||||||
|
<g id="node11" class="node">
|
||||||
|
<title>fragment_shader</title>
|
||||||
|
<ellipse fill="none" stroke="black" cx="445.4" cy="-37.15" rx="134.86" ry="18"/>
|
||||||
|
<text text-anchor="middle" x="445.4" y="-32.47" font-family="Times,serif" font-size="14.00">(to the fragment shader)</text>
|
||||||
|
</g>
|
||||||
|
<!-- depth_pass->fragment_shader -->
|
||||||
|
<g id="edge11" class="edge">
|
||||||
|
<title>depth_pass->fragment_shader</title>
|
||||||
|
<path fill="none" stroke="black" d="M368.72,-110.1C383.04,-96.66 403.15,-77.79 419.18,-62.75"/>
|
||||||
|
<polygon fill="black" stroke="black" points="421.14,-65.71 426.04,-56.32 416.35,-60.61 421.14,-65.71"/>
|
||||||
|
</g>
|
||||||
|
</g>
|
||||||
|
</svg>
|
||||||
|
After Width: | Height: | Size: 9.1 KiB |
BIN
images/cube_scene.png
Normal file
|
After Width: | Height: | Size: 79 KiB |
BIN
images/plane_scene.png
Normal file
|
After Width: | Height: | Size: 338 KiB |
BIN
images/z_buffer_clipped.png
Normal file
|
After Width: | Height: | Size: 9.8 KiB |
BIN
images/z_buffer_cube.png
Normal file
|
After Width: | Height: | Size: 65 KiB |
BIN
images/z_buffer_cube_range.png
Normal file
|
After Width: | Height: | Size: 39 KiB |
BIN
images/z_buffer_cube_range_back.png
Normal file
|
After Width: | Height: | Size: 29 KiB |
BIN
images/z_buffer_gradient.png
Normal file
|
After Width: | Height: | Size: 8.5 KiB |
BIN
images/z_buffer_overflow.png
Normal file
|
After Width: | Height: | Size: 14 KiB |
BIN
images/z_buffer_perspective.png
Normal file
|
After Width: | Height: | Size: 13 KiB |
BIN
images/z_buffer_perspective_scale.png
Normal file
|
After Width: | Height: | Size: 13 KiB |
469
index.tex
@ -1,5 +1,6 @@
|
|||||||
\documentclass[20pt]{article}
|
\documentclass[20pt]{article}
|
||||||
|
|
||||||
|
\usepackage{amsmath}
|
||||||
\usepackage[font=small,labelfont=bf]{caption}
|
\usepackage[font=small,labelfont=bf]{caption}
|
||||||
\usepackage{hyperref}
|
\usepackage{hyperref}
|
||||||
\hypersetup{
|
\hypersetup{
|
||||||
@ -15,6 +16,7 @@
|
|||||||
\graphicspath{ {./images/} }
|
\graphicspath{ {./images/} }
|
||||||
|
|
||||||
\usepackage{minted}
|
\usepackage{minted}
|
||||||
|
\usepackage{nicefrac}
|
||||||
|
|
||||||
\title{Radeon R500}
|
\title{Radeon R500}
|
||||||
\date{}
|
\date{}
|
||||||
@ -28,9 +30,9 @@
|
|||||||
|
|
||||||
\section{Introduction}
|
\section{Introduction}
|
||||||
|
|
||||||
The primary/minimal project goal is "draw a triangle on a Radeon R500 via direct
|
The primary/minimal project goal is ``draw a triangle on a Radeon R500 via
|
||||||
memory-mapped hardware register and texture memory accesses". This means no
|
direct memory-mapped hardware register and texture memory accesses''. This means
|
||||||
\href{https://mesa3d.org/}{Mesa}, no
|
no \href{https://mesa3d.org/}{Mesa}, no
|
||||||
\href{https://github.com/torvalds/linux/tree/v6.12/drivers/gpu/drm/radeon}{radeon}
|
\href{https://github.com/torvalds/linux/tree/v6.12/drivers/gpu/drm/radeon}{radeon}
|
||||||
kernel module, and certainly no OpenGL or Direct3D.
|
kernel module, and certainly no OpenGL or Direct3D.
|
||||||
|
|
||||||
@ -661,14 +663,45 @@ from scratch. I first implemented the rotation in GLSL:
|
|||||||
\caption*{\texttt{cube\_rotate.vs.glsl}}
|
\caption*{\texttt{cube\_rotate.vs.glsl}}
|
||||||
\end{figure}
|
\end{figure}
|
||||||
|
|
||||||
I verified that the GLSL version worked as expected in OpenGL, then I translated
|
\subsubsection{Remapping shader unit sin/cos operands}
|
||||||
the GLSL to R500 vertex shader assembly, as:
|
|
||||||
|
Because this shader program depends on being able to calculate sin and cos, this
|
||||||
|
meant I immediately needed to understand how to use the \texttt{ME\_SIN} and
|
||||||
|
\texttt{ME\_COS} operations.
|
||||||
|
|
||||||
|
The R500 vertex shader ME unit clamps sin/cos operands to the range
|
||||||
|
$(-\pi,+\pi)$, as in:
|
||||||
|
|
||||||
|
\begin{figure}
|
||||||
|
\href{diagrams/sin_clamp.pdf}{\includegraphics{diagrams/sin_clamp.pdf}}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
``Remapping'' floating point values from $(-\infty,+\infty)$ to $(-\pi,+\pi)$ is not
|
||||||
|
obvious. I was not previously aware of this transformation:
|
||||||
|
|
||||||
|
\begin{figure}
|
||||||
|
\href{diagrams/sin_frac.pdf}{\includegraphics{diagrams/sin_frac.pdf}}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
Or, expressed as R500 vertex shader assembly:
|
||||||
|
|
||||||
|
\begin{figure}
|
||||||
|
\href{verbatim/sin_operand_remap.vs.asm}{\includegraphics{verbatim/output/sin_operand_remap.vs.asm.pdf}}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
\subsubsection{Translation of the GLSL vertex shader to R500 vertex shader assembly}
|
||||||
|
|
||||||
|
Having verified that the GLSL version works as expected in OpenGL, and knowing
|
||||||
|
how to use the R500 vertex shader sin/cos operations, then I translated the GLSL
|
||||||
|
to R500 vertex shader assembly, as:
|
||||||
|
|
||||||
\begin{figure}
|
\begin{figure}
|
||||||
\href{verbatim/cube_rotate.vs.asm}{\includegraphics{verbatim/output/cube_rotate.vs.asm.pdf}}
|
\href{verbatim/cube_rotate.vs.asm}{\includegraphics{verbatim/output/cube_rotate.vs.asm.pdf}}
|
||||||
\caption*{\texttt{cube\_rotate.vs.asm}}
|
\caption*{\texttt{cube\_rotate.vs.asm}}
|
||||||
\end{figure}
|
\end{figure}
|
||||||
|
|
||||||
|
\subsubsection{Vertex shader assembler/code generator debugging}
|
||||||
|
|
||||||
However, when I first executed the vertex shader cube rotation demo, I found
|
However, when I first executed the vertex shader cube rotation demo, I found
|
||||||
it did not work as expected:
|
it did not work as expected:
|
||||||
|
|
||||||
@ -775,8 +808,8 @@ I've written several \href{https://github.com/buhman/scu-dsp-asm}{nice assembler
|
|||||||
for other architectures in the past, but I've never seen any instruction set
|
for other architectures in the past, but I've never seen any instruction set
|
||||||
as expressive as R500 fragment shaders.
|
as expressive as R500 fragment shaders.
|
||||||
|
|
||||||
I attempted to directly reflect this ``multiple tiers of operand argument
|
I attempted to directly represent this ``multiple tiers of operand argument
|
||||||
decoding'' in the syntax I invented for fragment shader ALU instructions.
|
decoding'' in my fragment shader ALU instructions syntax.
|
||||||
|
|
||||||
These instructions are also vector instructions: a total of 24 floating point
|
These instructions are also vector instructions: a total of 24 floating point
|
||||||
input operands and 8 floating results could be evaluated per instruction.
|
input operands and 8 floating results could be evaluated per instruction.
|
||||||
@ -902,4 +935,426 @@ except:
|
|||||||
The exponent/mantissa table that shows example 7-bit float values on page 106 of
|
The exponent/mantissa table that shows example 7-bit float values on page 106 of
|
||||||
\href{doc/R5xx_Acceleration_v1.5.pdf}{R5xx\_Acceleration\_v1.5.pdf} is incorrect.
|
\href{doc/R5xx_Acceleration_v1.5.pdf}{R5xx\_Acceleration\_v1.5.pdf} is incorrect.
|
||||||
|
|
||||||
|
\section{Progress: 26 Oct 2025}
|
||||||
|
|
||||||
|
From 21 Oct 2025 to 26 Oct 2025, I achieved the following (roughly in chronological order):
|
||||||
|
|
||||||
|
\begin{itemize}
|
||||||
|
\item I \href{https://git.idk.st/bilbo/r500/commit/8594bc4a38f6fcab2ac6e437b46bcf1e0e6d32dd}{rewrote} most of the vertex shader assembler parser/validator, and implemented support for \href{https://git.idk.st/bilbo/r500/commit/f3f1969f4a9b336536f5fb23d246f7103c41e20d}{assembling/disassembling ``dual math'' operations}
|
||||||
|
\item I implemented support for \href{https://git.idk.st/bilbo/r500/commit/96d7286e7cd3270b9dca0924d3a046d585d6dc9d}{assembling} and \href{https://git.idk.st/bilbo/r500/commit/27227426eaac265bc3126edd7d017c791640e789}{disassembling} TEX fragment shader instructions
|
||||||
|
\item I presented this project (including live demos on real hardware) at
|
||||||
|
a \href{https://itch.io/jam/spoopy-jam-7-heckraiser}{local in-person game jam event}
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
\subsection{Vertex shader optimization part 1: ``MOV'' elimination}
|
||||||
|
|
||||||
|
After talking about it in-person, I decided to try to golf my original
|
||||||
|
15-instruction
|
||||||
|
\href{https://git.idk.st/bilbo/r500/src/commit/c8ae311e60/drm/cube_rotate.vs.asm}{cube\_rotate.vs.asm} vertex shader.
|
||||||
|
|
||||||
|
The first opportunity for optimization is in the first two instructions of:
|
||||||
|
|
||||||
|
\begin{figure}
|
||||||
|
\href{verbatim/cube_rotate_const_move.vs.asm}{\includegraphics{verbatim/output/cube_rotate_const_move.vs.asm.pdf}}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
The \texttt{VE\_ADD} (being used here as a ``MOV'' instruction) is needed
|
||||||
|
because there is only a single 128-bit read port into \texttt{const} memory, so
|
||||||
|
a multiply-add like this is illegal:
|
||||||
|
|
||||||
|
\begin{figure}
|
||||||
|
\href{verbatim/cube_rotate_const_move_illegal.vs.asm}{\includegraphics{verbatim/output/cube_rotate_const_move_illegal.vs.asm.pdf}}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
I observed that because I never need to reference the last two constants in the
|
||||||
|
same instruction that references the first two constants, if I rearrange the
|
||||||
|
ordering of the constants to:
|
||||||
|
|
||||||
|
\begin{figure}
|
||||||
|
\href{verbatim/cube_rotate_const_move_rearrange.vs.asm}{\includegraphics{verbatim/output/cube_rotate_const_move_rearrange.vs.asm.pdf}}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
I can then rewrite the multiply-add instructions as:
|
||||||
|
|
||||||
|
\begin{figure}
|
||||||
|
\href{verbatim/cube_rotate_const_move_rearrange_mad.vs.asm}{\includegraphics{verbatim/output/cube_rotate_const_move_rearrange_mad.vs.asm.pdf}}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
\subsection{Vertex shader optimization part 2: ``dual math'' instructions}
|
||||||
|
|
||||||
|
I spent an entire day rewriting large portions of the vertex shader assembler to
|
||||||
|
add support for ``dual math'' instructions.
|
||||||
|
|
||||||
|
The original
|
||||||
|
\href{https://git.idk.st/bilbo/r500/src/commit/c8ae311e60/drm/cube_rotate.vs.asm}{cube\_rotate.vs.asm}
|
||||||
|
contains this sequence of \texttt{ME_SIN}/\texttt{ME\_COS} instructions:
|
||||||
|
|
||||||
|
\begin{figure}
|
||||||
|
\href{verbatim/cube_rotate_sin_cos.vs.asm}{\includegraphics{verbatim/output/cube_rotate_sin_cos.vs.asm.pdf}}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
The \texttt{temp[3].x} and \texttt{temp[3].y} results are needed immediately,
|
||||||
|
but \texttt{temp[3].z} and \texttt{temp[3].w} are not needed until after the
|
||||||
|
first pair of \texttt{VE\_MUL}/\texttt{VE\_MAD} operations.
|
||||||
|
|
||||||
|
The dual math instruction mode replaces the 3rd \texttt{VE_} instruction operand
|
||||||
|
with any \texttt{ME\_} operation, so it is only usable with 2-operand
|
||||||
|
\texttt{VE\_} instructions like \texttt{VE\_MUL}.
|
||||||
|
|
||||||
|
The dual math encoding also has several restrictions (it only has \nicefrac{1}{4}th the
|
||||||
|
control word bits compared to a normal \texttt{ME\_} instruction). A notable
|
||||||
|
restriction is that it must write to \texttt{alt\_temp}.
|
||||||
|
|
||||||
|
Unlike the fancy things that can be done with fragment shader
|
||||||
|
operands/sources/swizzles, a single vertex shader operand can also only read
|
||||||
|
from a single 128-bit register, so this means to be able to continue to access
|
||||||
|
\texttt{temp[3].zw} as a vector, both \texttt{z} and \texttt{w} must now be
|
||||||
|
stored in \texttt{alt\_temp}, even if only one of them was written by a ``dual
|
||||||
|
math'' instruction.
|
||||||
|
|
||||||
|
The change (and my newly-implemented dual math syntax) is:
|
||||||
|
|
||||||
|
\begin{figure}
|
||||||
|
\href{verbatim/cube_rotate_dual_math.vs.asm}{\includegraphics{verbatim/output/cube_rotate_dual_math.vs.asm.pdf}}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
Where the dual math instruction:
|
||||||
|
|
||||||
|
\begin{figure}
|
||||||
|
\href{verbatim/cube_rotate_dual_math_single_instruction.vs.asm}{\includegraphics{verbatim/output/cube_rotate_dual_math_single_instruction.vs.asm.pdf}}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
Is encoded by the assembler as single instruction and is executed by the vertex
|
||||||
|
shader unit in a single clock cycle.
|
||||||
|
|
||||||
|
The final
|
||||||
|
\href{https://git.idk.st/bilbo/r500/src/commit/c8ae311e60/drm/cube_rotate_optimize.vs.asm}{cube\_rotate\_optimize.vs.asm}
|
||||||
|
was reduced from 15 instructions to 13 instructions (compared
|
||||||
|
to Mesa's R500 vertex shader compiler's 27 instructions).
|
||||||
|
|
||||||
|
\section{Progress: 29 Oct 2025}
|
||||||
|
|
||||||
|
From 27 Oct 2025 to 29 Oct 2025, I achieved the following (roughly in chronological order):
|
||||||
|
|
||||||
|
\begin{itemize}
|
||||||
|
\item I implemented support for \href{https://git.idk.st/bilbo/r500/commit/9aecbbfc6f297ea71c72f4c4fba1b8107be95ca1}{``multiple render targets''} in the fragment shader assembler
|
||||||
|
\item I wrote a \href{https://git.idk.st/bilbo/r500/src/commit/18b7a593bd/drm/texture_blur_horizontal.fs.asm}{gaussian blur fragment shader}
|
||||||
|
\item I made a demo that draws \href{https://git.idk.st/bilbo/r500/src/commit/18b7a593bd/drm/pumpkin_man.c#L963}{multiple 3D ``objects''} where each object's UV coordinates sample a \href{https://git.idk.st/bilbo/r500/src/commit/18b7a593bd/drm/pumpkin_man.c#L1029-L1069}{different} \href{https://git.idk.st/bilbo/r500/src/commit/18b7a593bd/drm/pumpkin_man.c#L314}{texture}
|
||||||
|
\item I did several experiments related to R500's Z-buffer implementation
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
\subsection{Z-buffer experiments}
|
||||||
|
\label{sec:z-buffer-experiments}
|
||||||
|
Though I produced a ``properly'' Z-buffered 3D cube demo previously, I felt I
|
||||||
|
did not fully understand the relationship between Z coordinates, W coordinates,
|
||||||
|
viewport transformations, and the actual values that are written the the
|
||||||
|
Z-buffer. At some point, I'd like to write fragment shaders that sample the
|
||||||
|
Z-buffer, so I feel I need to understand this more rigorously.
|
||||||
|
|
||||||
|
For comparison, Sega Dreamcast stores 32-bit floating-point values in the
|
||||||
|
``depth accumulation buffer''. This effectively means that any Z coordinates can
|
||||||
|
be stored in the depth accumulation buffer without scaling or range
|
||||||
|
remapping. I've made several
|
||||||
|
\href{https://az1.idk.st/public/20kdm2-demo.mp4}{moderately fancy} Dreamcast
|
||||||
|
demos in that happily store arbitrary ``view space'' Z values in the depth
|
||||||
|
accumulation buffer without any visible depth aliasing/artifacts.
|
||||||
|
|
||||||
|
In contrast, the Radeon R500 does not have a 32-bit floating point Z-buffer
|
||||||
|
format. Instead, R500 supports (\href{doc/R5xx_Acceleration_v1.5.pdf}{R5xx\_Acceleration\_v1.5.pdf}, page 283,
|
||||||
|
\texttt{ZB\_FORMAT}):
|
||||||
|
|
||||||
|
\begin{itemize}
|
||||||
|
\item 16-bit integer Z
|
||||||
|
\item 16-bit floating point
|
||||||
|
\item 24-bit integer Z with 8-bit stencil
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
The third option, with the most bits, clearly ought to give the most
|
||||||
|
precision--with the caveat that the Z values that are written to the Z-buffer
|
||||||
|
should be scaled to be uniformly distributed across the range of 24-bit integers.
|
||||||
|
|
||||||
|
I performed several tests with variations of
|
||||||
|
\href{https://git.idk.st/bilbo/r500/src/branch/main/drm/zbuffer_test.c}{zbuffer\_test.c}. The
|
||||||
|
general strategy was:
|
||||||
|
|
||||||
|
\begin{itemize}
|
||||||
|
\item Define some contrived/illustrative 3D scene
|
||||||
|
\item Manipulate the scale/range of Z and W values
|
||||||
|
\item Observe the state of the Z-buffer after rendering
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
The first scene I chose was of a tilted plane that is non-coplanar with the view
|
||||||
|
space XY plane, as in:
|
||||||
|
|
||||||
|
\begin{figure}
|
||||||
|
\href{images/plane_scene.png}{\includegraphics{images/plane_scene.png}}
|
||||||
|
\caption*{Blender screenshot, ``plane scene''}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
Where the grey plane is the object that is to be rendered, the yellow lines
|
||||||
|
represent a ``camera'' from which the plane is to be viewed, and the blue line
|
||||||
|
represents the view/clip-space Z axis.
|
||||||
|
|
||||||
|
To view the content of the Z buffer, I wrote a
|
||||||
|
\href{https://git.idk.st/bilbo/r500/src/commit/18b7a593bd/tools/zbuf_decode.py}{simple script}
|
||||||
|
to convert the 24-bit integer Z-buffer to 16-bit
|
||||||
|
\href{https://en.wikipedia.org/wiki/Netpbm}{PGM},
|
||||||
|
so that it can be easily viewed in an image editor. This tool also shows the
|
||||||
|
minimum and maximum values found in the Z-buffer, intended to help verify that
|
||||||
|
the entire numeric range of the Z-buffer is being used.
|
||||||
|
|
||||||
|
While I expected to see the (orthographic, directly facing the camera) plane
|
||||||
|
drawn on the Z-buffer as a smooth gradient such as:
|
||||||
|
|
||||||
|
\begin{figure}
|
||||||
|
\href{images/z_buffer_gradient.png}{\includegraphics{images/z_buffer_gradient.png}}
|
||||||
|
\caption*{R500 framebuffer capture, \texttt{z\_buffer\_gradient.png}}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
Several of my tests displayed numeric aliasing, overflows, underflows, etc..:
|
||||||
|
|
||||||
|
\begin{figure}
|
||||||
|
\href{images/z_buffer_overflow.png}{\includegraphics{images/z_buffer_overflow.png}}
|
||||||
|
\caption*{R500 framebuffer capture, \texttt{z\_buffer\_overflow.png}}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
Of particular interest to me was to verify the behavior of the
|
||||||
|
\texttt{DX\_CLIP\_SPACE\_DEF} bit
|
||||||
|
(\href{doc/R5xx_Acceleration_v1.5.pdf}{R5xx\_Acceleration\_v1.5.pdf}, page
|
||||||
|
255--this is also the only place in the entire manual where ``non-user'' clip
|
||||||
|
planes are even defined), and to understand the order of pipeline operations.
|
||||||
|
|
||||||
|
I played with moving the plane around, to observe clipping behavior (here the
|
||||||
|
lower half of the scene was clipped due to intersecting the Z=+1.0 clip plane):
|
||||||
|
|
||||||
|
\begin{figure}
|
||||||
|
\href{images/z_buffer_clipped.png}{\includegraphics{images/z_buffer_clipped.png}}
|
||||||
|
\caption*{R500 framebuffer capture, \texttt{z\_buffer\_clipped.png}\\
|
||||||
|
(also simultaneously showing overflow/underflow artifacts)}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
Thinking at this point that I nearly understood most of the pieces, I then
|
||||||
|
re-enabled XY perspective division:
|
||||||
|
|
||||||
|
\begin{figure}
|
||||||
|
\href{images/z_buffer_perspective.png}{\includegraphics{images/z_buffer_perspective.png}}
|
||||||
|
\caption*{R500 framebuffer capture, \texttt{z\_buffer\_perspective.png}}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
The above image was not quite what I wanted: I noticed the range of the Z buffer
|
||||||
|
values were roughly between \texttt{0} and \texttt{8388607}, but what I really
|
||||||
|
wanted was \texttt{0} to \texttt{16777215}. Adjusting scale again produced this
|
||||||
|
Z-buffer:
|
||||||
|
|
||||||
|
\begin{figure}
|
||||||
|
\href{images/z_buffer_perspective_scale.png}{\includegraphics{images/z_buffer_perspective_scale.png}}
|
||||||
|
\caption*{R500 framebuffer capture, \texttt{z\_buffer\_perspective\_scale.png}}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
Up to this point, I was using \texttt{ZFUNC=GREATER} with a Z-buffer cleared
|
||||||
|
with an initial depth of zero, where all Z values are negative numbers.
|
||||||
|
|
||||||
|
I decided it might be more intuitive to use a Z-buffer that is cleared with an
|
||||||
|
initial depth of one, using \texttt{ZFUNC=LESS} instead where all Z values are
|
||||||
|
positive numbers.
|
||||||
|
|
||||||
|
With these adjustments, I captured a Z-buffer from the earlier cube demo:
|
||||||
|
|
||||||
|
\begin{figure}
|
||||||
|
\href{images/z_buffer_cube.png}{\includegraphics{images/z_buffer_cube.png}}
|
||||||
|
\caption*{R500 framebuffer capture, \texttt{z\_buffer\_cube.png}}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
This was still not quite ``correct'', because the minimum depth of the cube is
|
||||||
|
being drawn as \textasciitilde{}\texttt{2763306} (\textasciitilde{}0.16), but I expected
|
||||||
|
something closer to zero.
|
||||||
|
|
||||||
|
Adjusting my range/scale arithmetic again produced this image:
|
||||||
|
|
||||||
|
\begin{figure}
|
||||||
|
\href{images/z_buffer_cube_range.png}{\includegraphics{images/z_buffer_cube_range.png}}
|
||||||
|
\caption*{R500 framebuffer capture, \texttt{z\_buffer\_cube\_range.png}}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
The minimum Z value now appears to be closer to zero, but the ``back'' faces of
|
||||||
|
the cube (and maximum Z values) are not visible. Without changing any
|
||||||
|
scale/range constants, inverting \texttt{ZFUNC} and using a zero-initialized
|
||||||
|
Z-buffer produced this image of the back faces of the cube:
|
||||||
|
|
||||||
|
\begin{figure}
|
||||||
|
\href{images/z_buffer_cube_range_back.png}{\includegraphics{images/z_buffer_cube_range_back.png}}
|
||||||
|
\caption*{R500 framebuffer capture, \texttt{z\_buffer\_cube\_range\_back.png}}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
Indeed, the maximum Z value is close to \textasciitilde{}\texttt{16777215}
|
||||||
|
(\textasciitilde{}1.0), as intended. I feel at this point I have a better intuition
|
||||||
|
for using integer Z-buffers. The pipeline (and relevant registers) appears to be
|
||||||
|
something like this:
|
||||||
|
|
||||||
|
\begin{figure}
|
||||||
|
\includegraphics{diagrams/z_operations.svg}
|
||||||
|
\caption*{R500 Z transform pipeline (simplified)}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
Prior to these experiments, I was not aware \texttt{SU\_DEPTH\_SCALE} is the
|
||||||
|
thing directly responsible for scaling floating point Z values to the integer Z
|
||||||
|
values stored in the depth buffer.
|
||||||
|
|
||||||
|
In general, the hardware perspective divide, viewport transform, clipping, and
|
||||||
|
setup units are absolutely fascinating.
|
||||||
|
|
||||||
|
\subsection{3D perspective}
|
||||||
|
|
||||||
|
Despite making many 3D demos in the past, I feel that every time I want to
|
||||||
|
``draw something 3D'' on a new platform, I need to re-relearn 3D/perspective
|
||||||
|
transformations, (perhaps because I never truly \textit{learned} anything).
|
||||||
|
|
||||||
|
In many OpenGL articles/tutorials/books the
|
||||||
|
\href{https://learnopengl.com/Getting-started/Coordinate-Systems}{standard}
|
||||||
|
\href{https://ogldev.org/www/tutorial12/tutorial12.html}{formula} for
|
||||||
|
\href{https://songho.ca/opengl/gl_projectionmatrix.html}{explaining}
|
||||||
|
\href{https://www.scratchapixel.com/lessons/3d-basic-rendering/perspective-and-orthographic-projection-matrix/opengl-perspective-projection-matrix.html}{perspective}
|
||||||
|
\href{https://learnwebgl.brown37.net/08_projections/projections_perspective.html}{projection}
|
||||||
|
appears to be:
|
||||||
|
|
||||||
|
\begin{itemize}
|
||||||
|
\item Begin with an overly-academic explanation of perspective in terms of camera optics and trigonometry
|
||||||
|
\item Do not implement or demonstrate the any of the systems or mathematics
|
||||||
|
described in the preceding pages of explanations; intead abruptly hide all
|
||||||
|
magic behind \texttt{glm::perspective}
|
||||||
|
\item Refuse to explain or clarify further
|
||||||
|
\item Continue for the next 30 chapters/articles without ever revisiting focal
|
||||||
|
length, view frustums, depth of field, etc.. again
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
It is sufficient to instead rationalize/implement ``perspective'' as:
|
||||||
|
|
||||||
|
\begin{quote}
|
||||||
|
Perspective is the division of X and Y coordinates by Z, where the coordinate
|
||||||
|
$(0, 0, 0)$ is the view origin (and the center of the screen/projection).
|
||||||
|
\end{quote}
|
||||||
|
|
||||||
|
Defining perspective this way also works for OpenGL, with some slight
|
||||||
|
adjustment, notably to deal with OpenGL's
|
||||||
|
\href{https://registry.khronos.org/OpenGL/specs/gl/glspec20.pdf}{definition} of
|
||||||
|
``normalized device coordinates''.
|
||||||
|
|
||||||
|
I note that (unlike Dreamcast) one can't actually divide by Z on R500 (nor
|
||||||
|
OpenGL), both because the VTE doesn't support this, and because the texture
|
||||||
|
unit doesn't support this. Of course, I tried it anyway:
|
||||||
|
|
||||||
|
\begin{figure}
|
||||||
|
\includegraphics{videos/cube_warped_textures.png}
|
||||||
|
\caption*{R500 DVI capture, \texttt{texture\_cube\_warping.c} \\
|
||||||
|
(unrelated to this demo, R500 also interestingly has a dedicated ``disable perspective-correct texture mapping'' bit)}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
Instead, in both cases, the R500 uses the W coordinate for division. This turns
|
||||||
|
out to be very convenient, because it means that that the ``field of
|
||||||
|
view''/perspective scale (W) and the Z-buffer/depth test scale (Z) can be
|
||||||
|
adjusted independently.
|
||||||
|
|
||||||
|
\subsection{3D clipping}
|
||||||
|
|
||||||
|
Here are several examples of improperly scaled Z values, which are being clipped
|
||||||
|
by the setup unit:
|
||||||
|
|
||||||
|
\begin{figure}
|
||||||
|
\includegraphics{videos/cube_clipped_far.png}
|
||||||
|
\caption*{R500 DVI capture, \texttt{texture\_cube\_clear\_zwrite\_vertex\_shader\_optimize\_zscale.c} \\
|
||||||
|
(``far'' clip plane intersection)}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
\begin{figure}
|
||||||
|
\includegraphics{videos/cube_clipped_near.png}
|
||||||
|
\caption*{R500 DVI capture, \texttt{texture\_cube\_clear\_zwrite\_vertex\_shader\_optimize\_zscale.c} \\
|
||||||
|
(``near'' clip plane intersection)}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
\begin{figure}
|
||||||
|
\includegraphics{videos/cube_clipped_near_opengl.png}
|
||||||
|
\caption*{R500 DVI capture, \texttt{texture\_cube\_clear\_zwrite\_vertex\_shader\_optimize\_zscale.c} \\
|
||||||
|
(I am curious to learn under what circumstances the OpenGL designers thought\\ $-w_{c} < z_{c} < w_{c}$ was a good idea)}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
\section{Progress: 31 Oct 2025}
|
||||||
|
|
||||||
|
From 30 Oct 2025 to 31 Oct 2025, I achieved the following (non-chronological):
|
||||||
|
|
||||||
|
\begin{itemize}
|
||||||
|
\item I implemented a \href{https://git.idk.st/bilbo/r500/src/branch/main/drm/matrix_cubesphere_specular.fs.asm}{diffuse/specular lighting fragment shader} in R500 fragment shader assembly
|
||||||
|
\item I made vertex shaders that represent coordinate space transformations
|
||||||
|
using matrix multiplications rather than ad-hoc arithmetic
|
||||||
|
\item While writing demos that pass multiple (interpolated) vectors from the
|
||||||
|
vertex shader to the fragment shader, I learned more about \href{https://git.idk.st/bilbo/r500/src/commit/f43ac599f9/drm/matrix_cubesphere_specular_suzanne.cpp#L444-L512}{``rasterizer instructions''}
|
||||||
|
\item I made a demo that uses more than one texture for the entire scene
|
||||||
|
(by \href{https://git.idk.st/bilbo/r500/src/commit/f43ac599f9/drm/pumpkin_man.c#L272-L317}{reconfiguring
|
||||||
|
the texture unit for each ``object''})
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
\subsection{Lighting demo}
|
||||||
|
|
||||||
|
\begin{figure}
|
||||||
|
\includegraphics{videos/suzanne.png}
|
||||||
|
\caption*{R500 DVI capture, \texttt{matrix\_cubesphere\_specular\_suzanne.cpp} \\
|
||||||
|
(subdivided Suzanne mesh, 15,744 triangles)}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
Despite being a ``simple'' lighting demo, a surprising number of things need to
|
||||||
|
happen simultaneously before it becomes possible.
|
||||||
|
|
||||||
|
Where vertex shaders from previous demos were passed at most a single scalar
|
||||||
|
variable for animation/timing, the vertex shader in this demo uses
|
||||||
|
\href{https://git.idk.st/bilbo/r500/src/commit/f43ac599f9/drm/matrix_cubesphere_specular_suzanne.cpp#L301-L326}{10 vectors} as
|
||||||
|
input:
|
||||||
|
|
||||||
|
\begin{itemize}
|
||||||
|
\item 4 vectors for a ``local space to clip space'' transformation matrix
|
||||||
|
\item 4 vectors for a ``local space to world space'' transformation matrix (used for lighting)
|
||||||
|
\item 1 vector for a ``light position'' (in world space coordinates, used for lighting)
|
||||||
|
\item 1 vector for a ``view origin'' (in world space coordinates, used for lighting)
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
Additionally, where previous demos passed at most a single vector from the
|
||||||
|
vertex shader to the fragment shader (vertex color or texture coordinates), this
|
||||||
|
demo passes
|
||||||
|
\href{https://git.idk.st/bilbo/r500/src/commit/f43ac599f9/drm/matrix_cubesphere_specular_suzanne.cpp#L444-L512}{5 vectors}
|
||||||
|
from the vertex shader to the fragment shader, all of which are used
|
||||||
|
by the lighting calculation:
|
||||||
|
|
||||||
|
\begin{itemize}
|
||||||
|
\item world space position
|
||||||
|
\item world space normal
|
||||||
|
\item world space light position
|
||||||
|
\item world space view origin
|
||||||
|
\item uv space texture coordinates
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
\subsection{Learn algebra by writing fragment shader assembly}
|
||||||
|
|
||||||
|
Prior to today, I did not know about this transformation/equivalence:
|
||||||
|
|
||||||
|
\begin{gather*}
|
||||||
|
x^{n} \iff 2^{\left( n\cdot\frac{\log(x)}{\log(2)} \right)}
|
||||||
|
\end{gather*}
|
||||||
|
|
||||||
|
While the R500 fragment shader alpha unit does not have a \texttt{POW} operation,
|
||||||
|
it does have \href{https://git.idk.st/bilbo/r500/src/commit/f43ac599f9/drm/matrix_cubesphere_specular.fs.asm#L93-L99}{\texttt{EX2} and \texttt{LN2}}
|
||||||
|
operations.
|
||||||
|
|
||||||
|
For example, one could implement $a^{32}$ in R500 fragment shader assembly as:
|
||||||
|
|
||||||
|
\begin{figure}
|
||||||
|
\href{verbatim/pow_fragment_shader.fs.asm}{\includegraphics{verbatim/output/pow_fragment_shader.fs.asm.pdf}}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
This ``arbitrary exponents with arbitrary bases'' pattern is used in the
|
||||||
|
lighting demo fragment shader as part of the ``specular intensity'' calculation.
|
||||||
|
|
||||||
|
This fragment shader unit feature is very cool, because a software
|
||||||
|
implementation of a generalized floating-point \texttt{pow} function is
|
||||||
|
extremely
|
||||||
|
\href{https://git.musl-libc.org/cgit/musl/tree/src/math/powf.c?id=cb5c057c87240a9534f8e0d9b7ff2560082f6218}{computationally expensive}
|
||||||
|
otherwise.
|
||||||
|
|
||||||
\end{document}
|
\end{document}
|
||||||
|
|||||||
@ -19,4 +19,4 @@ def transform():
|
|||||||
|
|
||||||
lines = list(transform())
|
lines = list(transform())
|
||||||
with open(sys.argv[1], 'w') as f:
|
with open(sys.argv[1], 'w') as f:
|
||||||
f.write('\n'.join(lines))
|
f.write(''.join(lines))
|
||||||
|
|||||||
8
verbatim/cube_rotate_const_move.vs.asm
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
-- CONST[0] = {0.159155, 0.5, 6.283185, -3.141593}
|
||||||
|
-- CONST[1] = {theta1, theta2, 0.2, 0.5}
|
||||||
|
|
||||||
|
temp[0].xy = VE_ADD const[1].xy__ const[1].00__ ;
|
||||||
|
|
||||||
|
temp[0].xy = VE_MAD temp[0].xy__ const[0].xx__ const[0].yy__ ;
|
||||||
|
temp[0].xy = VE_FRC temp[0].xy__ ;
|
||||||
|
temp[0].xy = VE_MAD temp[0].xy__ const[0].zz__ const[0].ww__ ;
|
||||||
3
verbatim/cube_rotate_const_move_illegal.vs.asm
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
-- this is an illegal instruction:
|
||||||
|
-- const[1] and const[0] can not be read simultaneously
|
||||||
|
temp[0].xy = VE_MAD const[1].xy__ const[0].xx__ const[0].yy__ ;
|
||||||
2
verbatim/cube_rotate_const_move_rearrange.vs.asm
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
-- CONST[0] = {theta1, theta2, 0.159155, 0.5}
|
||||||
|
-- CONST[1] = {6.283185, -3.141593, 0.2, 0.5}
|
||||||
7
verbatim/cube_rotate_const_move_rearrange_mad.vs.asm
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
-- the VE_ADD instruction is now not necessary/deleted:
|
||||||
|
-- temp[0].xy = VE_ADD const[1].xy__ const[1].00__ ;
|
||||||
|
|
||||||
|
-- const addresses and swizzles changed:
|
||||||
|
temp[0].xy = VE_MAD const[0].xy__ const[0].zz__ const[0].ww__ ;
|
||||||
|
temp[0].xy = VE_FRC temp[0].xy__ ;
|
||||||
|
temp[0].xy = VE_MAD temp[0].xy__ const[1].xx__ const[1].yy__ ;
|
||||||
14
verbatim/cube_rotate_dual_math.vs.asm
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
temp[3].x = ME_SIN temp[0].___x ;
|
||||||
|
temp[3].y = ME_COS temp[0].___x ;
|
||||||
|
alt_temp[3].z = ME_SIN temp[0].___y ;
|
||||||
|
|
||||||
|
-- first rotation
|
||||||
|
temp[1].yz = VE_MUL input[0]._-zz_ temp[3]._xy_ ,
|
||||||
|
alt_temp[3].w = ME_COS temp[0].y_ ;
|
||||||
|
|
||||||
|
temp[1].xyz = VE_MAD input[0].xyy_ temp[3].1yx_ temp[1].0yz_ ;
|
||||||
|
|
||||||
|
-- second rotation
|
||||||
|
temp[2].xz = VE_MUL temp[1].-z_z_ alt_temp[3].z_w_ ;
|
||||||
|
|
||||||
|
temp[2].xyz = VE_MAD temp[1].xyx_ alt_temp[3].w1z_ temp[2].x0z_ ;
|
||||||
2
verbatim/cube_rotate_dual_math_single_instruction.vs.asm
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
temp[1].yz = VE_MUL input[0]._-zz_ temp[3]._xy_ ,
|
||||||
|
alt_temp[3].w = ME_COS temp[0].y_ ;
|
||||||
14
verbatim/cube_rotate_sin_cos.vs.asm
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
temp[3].x = ME_SIN temp[0].___x ;
|
||||||
|
temp[3].y = ME_COS temp[0].___x ;
|
||||||
|
temp[3].z = ME_SIN temp[0].___y ;
|
||||||
|
temp[3].w = ME_COS temp[0].___y ;
|
||||||
|
|
||||||
|
-- first rotation
|
||||||
|
temp[1].yz = VE_MUL input[0]._-zz_ temp[3]._xy_ ;
|
||||||
|
|
||||||
|
temp[1].xyz = VE_MAD input[0].xyy_ temp[3].1yx_ temp[1].0yz_ ;
|
||||||
|
|
||||||
|
-- second rotation
|
||||||
|
temp[2].xz = VE_MUL temp[1].-z_z_ temp[3].z_w_ ;
|
||||||
|
|
||||||
|
temp[2].xyz = VE_MAD temp[1].xyx_ temp[3].w1z_ temp[2].x0z_ ;
|
||||||
12
verbatim/pow_fragment_shader.fs.asm
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
-- a = log(a) / log(2)
|
||||||
|
src0.a = temp[0] :
|
||||||
|
temp[0].a = LN2 src0.a ;
|
||||||
|
|
||||||
|
-- a = a * 32.0 + 0
|
||||||
|
src0.a = temp[0] ,
|
||||||
|
src1.a = float(96) : -- 32.0 (or any other constant)
|
||||||
|
temp[0].a = MAD src0.a src1.a src1.0 ;
|
||||||
|
|
||||||
|
-- a = 2 ^ a
|
||||||
|
src0.a = temp[0] :
|
||||||
|
temp[0].a = EX2 src0.a ;
|
||||||
6
verbatim/r500_view_clip.c
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
VAP_VTE_CNTL__VPORT_Z_SCALE_ENA(0)
|
||||||
|
VAP_VTE_CNTL__VPORT_Z_OFFSET_ENA(0)
|
||||||
|
VAP_VTE_CNTL__VTX_XY_FMT(1)
|
||||||
|
VAP_VTE_CNTL__VTX_Z_FMT(0)
|
||||||
|
VAP_VTE_CNTL__VTX_W0_FMT(1)
|
||||||
|
VAP_CNTL__DX_CLIP_SPACE_DEF(1)
|
||||||
8
verbatim/sin_operand_remap.vs.asm
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
-- CONST[0] = {0.159155, 0.5, 6.283185, -3.141593}
|
||||||
|
|
||||||
|
-- t = t * 0.159155 + 0.5
|
||||||
|
temp[0].xy = VE_MAD temp[0].xy__ const[0].xx__ const[0].yy__ ;
|
||||||
|
-- t = frac(t)
|
||||||
|
temp[0].xy = VE_FRC temp[0].xy__ ;
|
||||||
|
-- t = t * 6.283185 + -3.141593
|
||||||
|
temp[0].xy = VE_MAD temp[0].xy__ const[0].zz__ const[0].ww__ ;
|
||||||
BIN
videos/cube_clipped_far.mp4
Normal file
BIN
videos/cube_clipped_far.png
Normal file
|
After Width: | Height: | Size: 127 KiB |
BIN
videos/cube_clipped_near.mp4
Normal file
BIN
videos/cube_clipped_near.png
Normal file
|
After Width: | Height: | Size: 122 KiB |
BIN
videos/cube_clipped_near_opengl.mp4
Normal file
BIN
videos/cube_clipped_near_opengl.png
Normal file
|
After Width: | Height: | Size: 142 KiB |
BIN
videos/cube_warped_textures.mp4
Normal file
BIN
videos/cube_warped_textures.png
Normal file
|
After Width: | Height: | Size: 212 KiB |
BIN
videos/suzanne.mp4
Normal file
BIN
videos/suzanne.png
Normal file
|
After Width: | Height: | Size: 77 KiB |