november 11 update

This commit is contained in:
Zack Buhman 2025-11-13 10:56:28 -06:00
parent 2c6e735350
commit 6bb7722011
38 changed files with 2915 additions and 74 deletions

View File

@ -1,6 +1,8 @@
set -eux set -eux
mkdir -p verbatim/output
rm -f verbatim/output/*.svg rm -f verbatim/output/*.svg
(cd verbatim; make -j8)
make4ht --shell-escape index.tex "pic-m,pic-equation,svg" make4ht --shell-escape index.tex "pic-m,pic-equation,svg"
@ -17,15 +19,18 @@ sed -i 's|color-scheme: light dark;||g' index.css
echo 'figcaption.caption { margin-bottom: 1.3em; margin-top: 0.3em; }' >> index.css echo 'figcaption.caption { margin-bottom: 1.3em; margin-top: 0.3em; }' >> index.css
echo '.cmti-10 { font-style: italic; }' >> index.css echo '.cmti-10 { font-style: italic; }' >> index.css
echo '[data="diagrams/complete_particle_data_flow.svg"] { margin-left: -2.5em; }' >> index.css
echo '[href="diagrams/simplified_particle_data_flow.svg"] { width: 100%; text-align: center; display: inline-block; }' >> index.css
sed -i 's/˜/~/g' index.html sed -i 's/˜/~/g' index.html
sed -i "s|<p class='noindent'><object class='graphics' data='diagrams/z_operations.svg' name='picture diagrams/z_operations' type='image/svg+xml'></object>|<p class='noindent' style='text-align: center;'><object class='graphics' style='width: 40em;' data='diagrams/z_operations.svg' name='picture diagrams/z_operations' type='image/svg+xml'></object>|g" index.html sed -i "s|<p class='noindent'><object class='graphics' data='diagrams/z_operations.svg' name='picture diagrams/z_operations' type='image/svg+xml'></object>|<p class='noindent' style='text-align: center;'><object class='graphics' style='width: 40em;' data='diagrams/z_operations.svg' name='picture diagrams/z_operations' type='image/svg+xml'></object>|g" index.html
sed -i '/height: 2.5em;/d' index.css sed -i '/height: 2.5em;/d' index.css
sed -i 's/index.css/index3.css/g' index.html sed -i 's/index.css/index4.css/g' index.html
mv index.css index3.css mv index.css index4.css
python replace_video.py index.html python replace_video.py index.html

BIN
diagrams/c4_32_fp.pdf Normal file

Binary file not shown.

32
diagrams/c4_32_fp.tex Normal file
View File

@ -0,0 +1,32 @@
\documentclass{standalone}
\usepackage{tikz}
\usepackage[dvipsnames]{xcolor}
\begin{document}
\begin{tikzpicture}[scale=1]
\draw[ultra thick] (0,0) rectangle (32,-1.2);
\foreach \j in {1,...,3} {
\draw[ultra thick] (\j * 8,0) -- (\j * 8,-1.2);
}
\foreach \j in {0,...,3} {
\foreach \i in {1,...,31} {
\draw[very thick] (\i / 4 + \j * 8,0) -- (\i / 4 + \j * 8,-0.15);
}
\foreach \i in {1,...,3} {
\draw[ultra thick] (\i * 8 / 4 + \j * 8,0) -- (\i * 8 / 4 + \j * 8,-0.25);
}
}
\foreach \j in {0,...,3} {
\node[font=\huge\bfseries] at (8 * \j + 4.0,-0.7) {C\pgfmathparse{int(3 - \j)}\pgfmathresult};
\node[font=\large\bfseries] at (8 * \j + 7.9,0.25) {\pgfmathparse{int((3 - \j) * 32)}\pgfmathresult};
}
\node[font=\large\bfseries] at (8 * 0 + 0.0,0.25) {\pgfmathparse{int((4 - 0) * 32)}\pgfmathresult};
\end{tikzpicture}
\end{document}

BIN
diagrams/c4_8.pdf Normal file

Binary file not shown.

33
diagrams/c4_8.tex Normal file
View File

@ -0,0 +1,33 @@
\documentclass{standalone}
\usepackage{tikz}
\usepackage[dvipsnames]{xcolor}
\begin{document}
\begin{tikzpicture}[scale=1]
\draw[ultra thick] (0,0) rectangle (32,-1.2);
\foreach \j in {1,...,3} {
\draw[ultra thick] (\j * 8,0) -- (\j * 8,-1.2);
}
\foreach \j in {0,...,3} {
\foreach \i in {1,...,7} {
\draw[very thick] (\i + \j * 8,0) -- (\i + \j * 8,-0.2);
}
}
\foreach \j in {0,...,3} {
\node[font=\huge\bfseries] at (8 * \j + 4.0,-0.7) {C\pgfmathparse{int(3 - \j)}\pgfmathresult};
\node[font=\large\bfseries] at (8 * \j + 7.5,0.25) {\pgfmathparse{int((3 - \j) * 8)}\pgfmathresult};
}
\node[font=\large\bfseries] at (8 * 0 + 0.0,0.25) {\pgfmathparse{int((4 - 0) * 8)}\pgfmathresult};
%\node[font=\huge\bfseries,text=MidnightBlue ] at ( 0 + 4.5,-1.5) {blue };
%\node[font=\huge\bfseries,text=OliveGreen ] at ( 8 + 4.5,-1.5) {green};
%\node[font=\huge\bfseries,text=BrickRed ] at (16 + 4.5,-1.5) {red };
%\node[font=\huge\bfseries,text=black ] at (24 + 4.5,-1.5) {alpha};
\end{tikzpicture}
\end{document}

File diff suppressed because it is too large Load Diff

After

Width:  |  Height:  |  Size: 32 KiB

View File

@ -0,0 +1,66 @@
digraph
{
//graph [ranksep=1 nodesep=1]
graph [nodesep=0.5, margin=0]
fontname="Computer Modern,Arial,sans-serif"
node [fontname="Helvetica,Arial,sans-serif"]
edge [fontname="Helvetica,Arial,sans-serif"]
layout=dot
labelloc = "t"
node [shape=plaintext]
read [label=<<b>read buffer</b>> height=0]
write [label=<<b>write buffer</b>> height=0]
shader [label=<<b>fragment shader</b>> height=0]
particle_stateA [label=<
<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0" CELLPADDING="4">
<TR>
<TD COLSPAN="2" ROWSPAN="2">state </TD>
<TD BGCOLOR="chartreuse2" PORT="src">a</TD>
</TR>
<TR>
<TD BGCOLOR="azure3">b</TD>
</TR>
</TABLE>>]
particle_stateB [label=<
<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0" CELLPADDING="4">
<TR>
<TD BGCOLOR="azure3">a</TD>
<TD COLSPAN="2" ROWSPAN="2">state</TD>
</TR>
<TR>
<TD BGCOLOR="chartreuse2" PORT="dst">b</TD>
</TR>
</TABLE>>]
node [shape=ellipse]
simulation [label="arbitrary\ncomputation"]
edge [style=invis]
rank=same {read -> shader -> write}
read -> particle_stateA
shader -> simulation
write -> particle_stateB
subgraph program {
edge [weight=1000 style=dashed]
// uncomment to hide the grid
//edge [style=invis]
//
// vertex_buffer_copy * 4
edge [weight=1000 style=solid]
rank=same {
particle_stateA:src -> simulation:w
simulation:e -> particle_stateB:dst
}
}
}

View File

@ -0,0 +1,332 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!-- Generated by graphviz version 12.2.1 (20241206.2353)
-->
<!-- Pages: 1 -->
<svg
width="378.63464pt"
height="91.165062pt"
viewBox="0 0 379.01428 91.344521"
version="1.1"
id="svg15"
sodipodi:docname="simplified_particle_data_flow.svg"
inkscape:version="1.4 (e7c3feb100, 2024-10-09)"
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
xmlns="http://www.w3.org/2000/svg"
xmlns:svg="http://www.w3.org/2000/svg">
<defs
id="defs15" />
<sodipodi:namedview
id="namedview15"
pagecolor="#ffffff"
bordercolor="#666666"
borderopacity="1.0"
inkscape:showpageshadow="2"
inkscape:pageopacity="0.0"
inkscape:pagecheckerboard="0"
inkscape:deskcolor="#d1d1d1"
inkscape:document-units="pt"
inkscape:zoom="2.469657"
inkscape:cx="252.26175"
inkscape:cy="36.847222"
inkscape:window-width="2048"
inkscape:window-height="1124"
inkscape:window-x="0"
inkscape:window-y="0"
inkscape:window-maximized="1"
inkscape:current-layer="node1-2" />
<g
id="graph0"
class="graph"
transform="translate(3.816959,87.344522)">
<g
id="node2"
class="node"
transform="translate(0,32.254074)">
<title
id="title7">write</title>
<text
text-anchor="start"
x="289.12"
y="-102.95"
font-family="Helvetica, Arial, sans-serif"
font-weight="bold"
font-size="14px"
id="text8">write buffer</text>
</g>
<polygon
fill="#ffffff"
stroke="none"
points="375.38,4 -4,4 -4,-123.25 375.38,-123.25 "
id="polygon1"
transform="matrix(1,0,0,0.71783514,0,1.1286594)" />
<!-- read -->
<g
id="node1"
class="node"
transform="translate(-3.0428371,27.081251)">
<title
id="title1">read</title>
<text
text-anchor="start"
x="48.986347"
y="-102.95"
font-family="Helvetica, Arial, sans-serif"
font-weight="bold"
font-size="14px"
id="text1"
style="line-height:0"><tspan
style="line-height:1.25;text-align:center;text-anchor:middle"
sodipodi:role="line"
id="tspan23"
x="48.986347"
y="-102.95">read buffer</tspan><tspan
sodipodi:role="line"
x="48.986347"
y="-90.287735"
id="tspan17"
style="font-size:10.0197px;line-height:1;text-align:center;text-anchor:middle">(texture sample)</tspan></text>
<text
xml:space="preserve"
style="font-size:10.0197px;line-height:1.25;font-family:Monospace;-inkscape-font-specification:Monospace;text-align:center;text-anchor:middle;stroke-width:0.751476"
x="44.38129"
y="-87.754082"
id="text18"><tspan
sodipodi:role="line"
id="tspan18"
style="stroke-width:0.751476"
x="47.387688"
y="-87.754082"> </tspan></text>
</g>
<!-- shader -->
<g
id="node3"
class="node"
transform="translate(0,32.254074)">
<title
id="title2">shader</title>
<text
text-anchor="start"
x="131"
y="-102.95"
font-family="Helvetica, Arial, sans-serif"
font-weight="bold"
font-size="14px"
id="text2">fragment shader</text>
</g>
<!-- read&#45;&gt;shader -->
<!-- particle_stateA -->
<g
id="node4"
class="node">
<title
id="title3">particle_stateA</title>
<polygon
fill="none"
stroke="#000000"
points="59,-55.5 59,-4 16,-4 16,-55.5 "
id="polygon3" />
<text
text-anchor="start"
x="21"
y="-24.32"
font-family="Helvetica, Arial, sans-serif"
font-size="14px"
id="text3">state</text>
<polygon
fill="#76ee00"
stroke="none"
points="76.5,-55.5 76.5,-29.75 59,-29.75 59,-55.5 "
id="polygon4" />
<polygon
fill="none"
stroke="#000000"
points="76.5,-55.5 76.5,-29.75 59,-29.75 59,-55.5 "
id="polygon5" />
<text
text-anchor="start"
x="64"
y="-37.200001"
font-family="Helvetica, Arial, sans-serif"
font-size="14px"
id="text5">a</text>
<polygon
fill="#c1cdcd"
stroke="none"
points="76.5,-29.75 76.5,-4 59,-4 59,-29.75 "
id="polygon6" />
<polygon
fill="none"
stroke="#000000"
points="76.5,-29.75 76.5,-4 59,-4 59,-29.75 "
id="polygon7" />
<text
text-anchor="start"
x="64"
y="-11.45"
font-family="Helvetica, Arial, sans-serif"
font-size="14px"
id="text7">b</text>
</g>
<!-- read&#45;&gt;particle_stateA -->
<!-- write -->
<!-- particle_stateB -->
<g
id="node5"
class="node">
<title
id="title8">particle_stateB</title>
<polygon
fill="#c1cdcd"
stroke="none"
points="309.38,-55.5 309.38,-29.75 291.88,-29.75 291.88,-55.5 "
id="polygon8" />
<polygon
fill="none"
stroke="#000000"
points="309.38,-55.5 309.38,-29.75 291.88,-29.75 291.88,-55.5 "
id="polygon9" />
<text
text-anchor="start"
x="296.88"
y="-37.200001"
font-family="Helvetica, Arial, sans-serif"
font-size="14px"
id="text9">a</text>
<polygon
fill="none"
stroke="#000000"
points="348.62,-55.5 348.62,-4 309.38,-4 309.38,-55.5 "
id="polygon10" />
<text
text-anchor="start"
x="314.38"
y="-24.32"
font-family="Helvetica, Arial, sans-serif"
font-size="14px"
id="text10">state</text>
<polygon
fill="#76ee00"
stroke="none"
points="309.38,-29.75 309.38,-4 291.88,-4 291.88,-29.75 "
id="polygon11" />
<polygon
fill="none"
stroke="#000000"
points="309.38,-29.75 309.38,-4 291.88,-4 291.88,-29.75 "
id="polygon12" />
<text
text-anchor="start"
x="296.88"
y="-11.45"
font-family="Helvetica, Arial, sans-serif"
font-size="14px"
id="text12">b</text>
</g>
<!-- write&#45;&gt;particle_stateB -->
<!-- shader&#45;&gt;write -->
<!-- simulation -->
<g
id="node6"
class="node">
<title
id="title12">simulation</title>
<ellipse
fill="none"
stroke="#000000"
cx="184.25"
cy="-29.75"
rx="63.82"
ry="27.93"
id="ellipse12" />
<text
text-anchor="middle"
x="184.25"
y="-32.200001"
font-family="Helvetica, Arial, sans-serif"
font-size="14px"
id="text13">arbitrary</text>
<text
text-anchor="middle"
x="184.25"
y="-16.450001"
font-family="Helvetica, Arial, sans-serif"
font-size="14px"
id="text14">computation</text>
</g>
<!-- shader&#45;&gt;simulation -->
<!-- particle_stateA&#45;&gt;simulation -->
<g
id="edge6"
class="edge">
<title
id="title14">particle_stateA:src-&gt;simulation:w</title>
<path
fill="none"
stroke="#000000"
d="m 77.5,-42.62 c 15.41,0 20.75,7.7 31.59,11.19"
id="path14" />
<polygon
fill="#000000"
stroke="#000000"
points="108.53,-27.98 109.56,-34.9 118.94,-29.97 "
id="polygon14" />
</g>
<!-- simulation&#45;&gt;particle_stateB -->
<g
id="edge7"
class="edge">
<title
id="title15">simulation:e-&gt;particle_stateB:dst</title>
<path
fill="none"
stroke="#000000"
d="m 248.07,-29.75 c 15.36,0 20.68,7.7 31.5,11.19"
id="path15" />
<polygon
fill="#000000"
stroke="#000000"
points="278.97,-15.11 280,-22.03 289.38,-17.1 "
id="polygon15" />
</g>
<g
id="node1-2"
class="node"
transform="translate(273.90725,26.773649)">
<title
id="title1-9">read</title>
<text
text-anchor="start"
x="48.986347"
y="-102.95"
font-family="Helvetica, Arial, sans-serif"
font-weight="bold"
font-size="14px"
id="text1-1"
style="line-height:0"><tspan
style="line-height:1.25;text-align:center;text-anchor:middle"
sodipodi:role="line"
id="tspan23-2"
x="48.986347"
y="-102.95">write buffer</tspan><tspan
sodipodi:role="line"
x="48.986351"
y="-90.287735"
id="tspan17-7"
style="font-size:10.0197px;line-height:1;text-align:center;text-anchor:middle">(render target)</tspan></text>
<text
xml:space="preserve"
style="font-size:10.0197px;line-height:1.25;font-family:Monospace;-inkscape-font-specification:Monospace;text-align:center;text-anchor:middle;stroke-width:0.751476"
x="44.38129"
y="-87.754082"
id="text18-0"><tspan
sodipodi:role="line"
id="tspan18-9"
style="stroke-width:0.751476"
x="47.387688"
y="-87.754082"> </tspan></text>
</g>
</g>
</svg>

After

Width:  |  Height:  |  Size: 9.4 KiB

View File

@ -0,0 +1,563 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!-- Generated by graphviz version 12.2.1 (20241206.2353)
-->
<!-- Pages: 1 -->
<svg
width="414.87pt"
height="152.91658pt"
viewBox="0 0 414.87 152.8295"
version="1.1"
id="svg42"
sodipodi:docname="simplified_particle_data_flow_split.svg"
inkscape:version="1.4 (e7c3feb100, 2024-10-09)"
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
xmlns="http://www.w3.org/2000/svg"
xmlns:svg="http://www.w3.org/2000/svg">
<defs
id="defs42" />
<sodipodi:namedview
id="namedview42"
pagecolor="#ffffff"
bordercolor="#666666"
borderopacity="1.0"
inkscape:showpageshadow="2"
inkscape:pageopacity="0.0"
inkscape:pagecheckerboard="0"
inkscape:deskcolor="#d1d1d1"
inkscape:document-units="pt"
inkscape:zoom="1.6025057"
inkscape:cx="201.55935"
inkscape:cy="141.65316"
inkscape:window-width="2048"
inkscape:window-height="1124"
inkscape:window-x="0"
inkscape:window-y="0"
inkscape:window-maximized="1"
inkscape:current-layer="graph0" />
<g
id="graph0"
class="graph"
transform="translate(-17.109999,401.93205)">
<polygon
fill="#ffffff"
stroke="none"
points="473,-434.75 473,4 -4,4 -4,-434.75 "
id="polygon1"
transform="matrix(0.86974843,0,0,0.34852783,20.588993,-250.45698)" />
<text
text-anchor="start"
x="147.09796"
y="-315.17871"
font-family="Helvetica, Arial, sans-serif"
font-size="14px"
id="text18-9-3"
style="font-size:10px;line-height:0;fill:#ff21e0;fill-opacity:1"><tspan
sodipodi:role="line"
x="147.09796"
y="-315.17871"
style="font-size:8px;line-height:1;text-align:center;text-anchor:middle;fill:#ff21e0;fill-opacity:1"
id="tspan8">multiple</tspan><tspan
sodipodi:role="line"
x="147.09796"
y="-307.17871"
style="font-size:8px;line-height:1;text-align:center;text-anchor:middle;fill:#ff21e0;fill-opacity:1"
id="tspan9">texture</tspan><tspan
sodipodi:role="line"
x="147.09796"
y="-299.17871"
style="font-size:8px;line-height:1;text-align:center;text-anchor:middle;fill:#ff21e0;fill-opacity:1"
id="tspan7">samples</tspan></text>
<text
text-anchor="start"
x="304.98917"
y="-321.75287"
font-family="Helvetica, Arial, sans-serif"
font-size="14px"
id="text18-9-3-5"
style="font-size:10px;line-height:0"><tspan
sodipodi:role="line"
id="tspan44-6-3"
x="304.98917"
y="-321.75287"
style="line-height:1.25;text-align:center;text-anchor:middle" /><tspan
sodipodi:role="line"
x="304.98917"
y="-312.05438"
style="font-size:8px;line-height:1;text-align:center;text-anchor:middle;fill:#ff00db;fill-opacity:1"
id="tspan2-6">multiple</tspan><tspan
sodipodi:role="line"
x="304.98917"
y="-304.05438"
style="font-size:8px;line-height:1;text-align:center;text-anchor:middle;fill:#ff00db;fill-opacity:1"
id="tspan3">render</tspan><tspan
sodipodi:role="line"
x="304.98917"
y="-296.05438"
style="font-size:8px;line-height:1;text-align:center;text-anchor:middle;fill:#ff00db;fill-opacity:1"
id="tspan4">targets</tspan></text>
<!-- read -->
<g
id="node1"
class="node"
transform="translate(-1.8720682,29.953092)">
<title
id="title1">read</title>
<text
text-anchor="start"
x="71.853073"
y="-419.13019"
font-family="Helvetica, Arial, sans-serif"
font-weight="bold"
font-size="14px"
id="text1"
style="line-height:0"><tspan
style="line-height:1.25;text-align:center;text-anchor:middle"
sodipodi:role="line"
id="tspan1"
x="71.853073"
y="-419.13019">read buffer</tspan><tspan
sodipodi:role="line"
x="71.853073"
y="-405.23322"
id="tspan52"
style="font-size:10px;line-height:1.25;text-align:center;text-anchor:middle">(texture samples)</tspan></text>
</g>
<!-- shader -->
<g
id="node3"
class="node"
transform="translate(-1.8720682,29.953092)">
<title
id="title2">shader</title>
<text
text-anchor="start"
x="177.18303"
y="-410.23785"
font-family="Helvetica, Arial, sans-serif"
font-weight="bold"
font-size="14px"
id="text2">fragment shader</text>
</g>
<!-- read&#45;&gt;shader -->
<!-- particle_stateA -->
<polygon
fill="none"
stroke="#000000"
points="107.36,-367 107.36,-315.5 17.11,-315.5 17.11,-367 "
id="polygon3" />
<text
text-anchor="start"
x="61.174942"
y="-344.71234"
font-family="Helvetica, Arial, sans-serif"
font-size="14px"
id="text3"><tspan
sodipodi:role="line"
id="tspan42"
x="61.174942"
y="-344.71234"
style="text-align:center;text-anchor:middle">particle state</tspan><tspan
sodipodi:role="line"
id="tspan43"
x="61.174942"
y="-327.21234"
style="font-size:10px;text-align:center;text-anchor:middle">(position, age)</tspan></text>
<polygon
fill="#76ee00"
stroke="none"
points="124.86,-367 124.86,-341.25 107.36,-341.25 107.36,-367 "
id="polygon4" />
<polygon
fill="none"
stroke="#000000"
points="124.86,-367 124.86,-341.25 107.36,-341.25 107.36,-367 "
id="polygon5" />
<text
text-anchor="start"
x="112.36"
y="-348.70001"
font-family="Helvetica, Arial, sans-serif"
font-size="14px"
id="text5">a</text>
<polygon
fill="#c1cdcd"
stroke="none"
points="124.86,-341.25 124.86,-315.5 107.36,-315.5 107.36,-341.25 "
id="polygon6" />
<polygon
fill="none"
stroke="#000000"
points="124.86,-341.25 124.86,-315.5 107.36,-315.5 107.36,-341.25 "
id="polygon7" />
<text
text-anchor="start"
x="112.36"
y="-322.95001"
font-family="Helvetica, Arial, sans-serif"
font-size="14px"
id="text7">b</text>
<!-- read&#45;&gt;particle_stateA -->
<!-- write -->
<g
id="node2"
class="node"
transform="translate(-1.8720687,25.272921)">
<title
id="title7">write</title>
<text
text-anchor="start"
x="389.26624"
y="-414.45001"
font-family="Helvetica, Arial, sans-serif"
font-weight="bold"
font-size="14px"
id="text8"
style="line-height:0"><tspan
style="line-height:1.25;text-align:center;text-anchor:middle"
sodipodi:role="line"
id="tspan2"
x="389.26624"
y="-414.45001">write buffer</tspan><tspan
sodipodi:role="line"
x="389.26624"
y="-400.55304"
id="tspan55"
style="font-size:10px;line-height:1.25;text-align:center;text-anchor:middle">(render targets)</tspan></text>
</g>
<!-- particle_stateB -->
<polygon
fill="#c1cdcd"
stroke="none"
points="345.48,-367 345.48,-341.25 327.98,-341.25 327.98,-367 "
id="polygon8" />
<polygon
fill="none"
stroke="#000000"
points="345.48,-367 345.48,-341.25 327.98,-341.25 327.98,-367 "
id="polygon9" />
<text
text-anchor="start"
x="332.98001"
y="-348.70001"
font-family="Helvetica, Arial, sans-serif"
font-size="14px"
id="text9">a</text>
<polygon
fill="none"
stroke="#000000"
points="431.98,-367 431.98,-315.5 345.48,-315.5 345.48,-367 "
id="polygon10" />
<text
text-anchor="start"
x="389.54495"
y="-344.71234"
font-family="Helvetica, Arial, sans-serif"
font-size="14px"
id="text10"><tspan
sodipodi:role="line"
id="tspan48"
x="389.54495"
y="-344.71234"
style="text-align:center;text-anchor:middle">particle state</tspan><tspan
sodipodi:role="line"
id="tspan49"
x="389.54495"
y="-327.21234"
style="font-size:10px;text-align:center;text-anchor:middle">(position, age)</tspan></text>
<polygon
fill="#76ee00"
stroke="none"
points="345.48,-341.25 345.48,-315.5 327.98,-315.5 327.98,-341.25 "
id="polygon11" />
<polygon
fill="none"
stroke="#000000"
points="345.48,-341.25 345.48,-315.5 327.98,-315.5 327.98,-341.25 "
id="polygon12" />
<text
text-anchor="start"
x="332.98001"
y="-322.95001"
font-family="Helvetica, Arial, sans-serif"
font-size="14px"
id="text12">b</text>
<!-- write&#45;&gt;particle_stateB -->
<!-- shader&#45;&gt;write -->
<!-- simulationA -->
<!-- shader&#45;&gt;simulationA -->
<!-- clear_input -->
<g
id="node4"
class="node">
<title
id="title13">clear_input</title>
</g>
<!-- clear_plane -->
<g
id="node5"
class="node">
<title
id="title14">clear_plane</title>
</g>
<!-- clear_input&#45;&gt;clear_plane -->
<!-- clear -->
<!-- clear_input&#45;&gt;clear -->
<!-- vertex_bufferB -->
<!-- clear_plane&#45;&gt;vertex_bufferB -->
<!-- plane -->
<!-- clear_plane&#45;&gt;plane -->
<!-- particle_stateC -->
<polygon
fill="none"
stroke="#000000"
points="107.36,-220 17.11,-220 17.11,-271.5 107.36,-271.5 "
id="polygon18"
transform="translate(0,-29.953092)" />
<text
text-anchor="start"
x="61.174942"
y="-279.16544"
font-family="Helvetica, Arial, sans-serif"
font-size="14px"
id="text18"><tspan
sodipodi:role="line"
id="tspan44"
x="61.174942"
y="-279.16544"
style="text-align:center;text-anchor:middle">particle state</tspan><tspan
sodipodi:role="line"
id="tspan45"
x="61.174942"
y="-261.66544"
style="font-size:10px;text-align:center;text-anchor:middle">(velocity, random)</tspan></text>
<polygon
fill="#c1cdcd"
stroke="none"
points="124.86,-245.75 107.36,-245.75 107.36,-271.5 124.86,-271.5 "
id="polygon19"
style="fill:#76ee00;fill-opacity:1"
transform="translate(0,-29.953092)" />
<polygon
fill="none"
stroke="#000000"
points="124.86,-245.75 107.36,-245.75 107.36,-271.5 124.86,-271.5 "
id="polygon20"
transform="translate(0,-29.953092)" />
<text
text-anchor="start"
x="112.36"
y="-283.15308"
font-family="Helvetica, Arial, sans-serif"
font-size="14px"
id="text20">a</text>
<polygon
fill="#76ee00"
stroke="none"
points="124.86,-220 107.36,-220 107.36,-245.75 124.86,-245.75 "
id="polygon21"
style="fill:#c1cdcd;fill-opacity:1"
transform="translate(0,-29.953092)" />
<polygon
fill="none"
stroke="#000000"
points="124.86,-220 107.36,-220 107.36,-245.75 124.86,-245.75 "
id="polygon22"
transform="translate(0,-29.953092)" />
<text
text-anchor="start"
x="112.36"
y="-257.40308"
font-family="Helvetica, Arial, sans-serif"
font-size="14px"
id="text22">b</text>
<!-- particle_stateA&#45;&gt;particle_stateC -->
<!-- particle_stateA&#45;&gt;simulationA -->
<!-- particle_stateB&#45;&gt;particle_stateC -->
<!-- particle_stateD -->
<polygon
fill="#c1cdcd"
stroke="none"
points="345.48,-245.75 327.98,-245.75 327.98,-271.5 345.48,-271.5 "
id="polygon25"
transform="translate(0,-29.953092)" />
<polygon
fill="none"
stroke="#000000"
points="345.48,-245.75 327.98,-245.75 327.98,-271.5 345.48,-271.5 "
id="polygon26"
transform="translate(0,-29.953092)" />
<text
text-anchor="start"
x="332.98001"
y="-283.15308"
font-family="Helvetica, Arial, sans-serif"
font-size="14px"
id="text26">a</text>
<polygon
fill="none"
stroke="#000000"
points="431.98,-220 345.48,-220 345.48,-271.5 431.98,-271.5 "
id="polygon27"
transform="translate(0,-29.953092)" />
<text
text-anchor="start"
x="389.54495"
y="-279.16544"
font-family="Helvetica, Arial, sans-serif"
font-size="14px"
id="text27"><tspan
sodipodi:role="line"
id="tspan46"
x="389.54495"
y="-279.16544"
style="text-align:center;text-anchor:middle">particle state</tspan><tspan
sodipodi:role="line"
id="tspan47"
x="389.54495"
y="-261.66544"
style="font-size:10px;text-align:center;text-anchor:middle">(velocity, random)</tspan></text>
<polygon
fill="#76ee00"
stroke="none"
points="345.48,-220 327.98,-220 327.98,-245.75 345.48,-245.75 "
id="polygon28"
transform="translate(0,-29.953092)" />
<polygon
fill="none"
stroke="#000000"
points="345.48,-220 327.98,-220 327.98,-245.75 345.48,-245.75 "
id="polygon29"
transform="translate(0,-29.953092)" />
<text
text-anchor="start"
x="332.98001"
y="-257.40308"
font-family="Helvetica, Arial, sans-serif"
font-size="14px"
id="text29">b</text>
<!-- particle_stateB&#45;&gt;particle_stateD -->
<!-- particle_stateC&#45;&gt;clear_input -->
<!-- simulationB -->
<g
id="node18"
class="node"
transform="translate(-0.46801706,-62.246269)">
<title
id="title29">simulationB</title>
<ellipse
fill="none"
stroke="#000000"
cx="225.98"
cy="-245.75"
rx="57.549999"
ry="33.444565"
id="ellipse29" />
<text
text-anchor="middle"
x="226.44801"
y="-249.21233"
font-family="Helvetica, Arial, sans-serif"
font-size="14px"
id="text30"><tspan
sodipodi:role="line"
id="tspan59"
x="226.44801"
y="-249.21233">particle</tspan><tspan
sodipodi:role="line"
x="226.44801"
y="-231.71233"
id="tspan60">simulation</tspan></text>
</g>
<!-- particle_stateC&#45;&gt;simulationB -->
<g
id="edge11"
class="edge">
<title
id="title30">particle_stateC:src-&gt;simulationB:w</title>
<path
fill="none"
stroke="#000000"
d="m 124.86,-288.57809 c 15.28,0 30.65,4.73266 41.4,1.24266"
id="path30"
sodipodi:nodetypes="cc" />
<polygon
fill="#000000"
stroke="#000000"
points="156.52,-247.51 166.93,-245.53 157.56,-240.58 "
id="polygon30"
transform="translate(9.2200015,-43.290434)" />
</g>
<!-- colorbufferA -->
<!-- particle_stateD&#45;&gt;colorbufferA -->
<!-- particle_stateD&#45;&gt;vertex_bufferB -->
<!-- colorbufferB -->
<!-- colorbufferA&#45;&gt;colorbufferB -->
<!-- colorbufferC -->
<!-- colorbufferB&#45;&gt;colorbufferC -->
<!-- particle -->
<!-- vertex_bufferB&#45;&gt;particle -->
<!-- clear&#45;&gt;colorbufferA -->
<!-- plane&#45;&gt;colorbufferB -->
<!-- particle&#45;&gt;colorbufferC -->
<!-- simulationA&#45;&gt;particle_stateB -->
<g
id="edge10"
class="edge">
<title
id="title40">simulationA:e-&gt;particle_stateB:dst</title>
<path
fill="none"
stroke="#000000"
d="m 279.77796,-319.13241 c 15.74,0 24.86204,-14.25759 36.06204,-10.81759"
id="path40"
sodipodi:nodetypes="cc" />
<polygon
fill="#000000"
stroke="#000000"
points="315.09,-326.53 316.07,-333.46 325.48,-328.59 "
id="polygon40" />
</g>
<!-- simulationA&#45;&gt;simulationB -->
<!-- simulationB&#45;&gt;particle_stateD -->
<g
id="edge12"
class="edge">
<title
id="title42">simulationB:e-&gt;particle_stateD:dst</title>
<path
fill="none"
stroke="#000000"
d="m 277.22486,-293.32 c 15.74,0 27.78307,27.05096 38.98307,30.49096"
id="path42"
sodipodi:nodetypes="cc" />
<polygon
fill="#000000"
stroke="#000000"
points="315.09,-231.03 316.07,-237.96 325.48,-233.09 "
id="polygon42"
transform="translate(0.6279318,-28.33404)" />
</g>
<g
id="edge9-9"
class="edge"
transform="translate(-1,-0.005)">
<title
id="title22">particle_stateA:src-&gt;simulationA:w</title>
<path
fill="none"
stroke="#000000"
d="m 125.86,-354.12 c 15.28,0 30.34764,18.67423 41.09764,22.16423"
id="path22"
sodipodi:nodetypes="cc" />
<polygon
fill="#000000"
stroke="#000000"
points="166.93,-341.47 156.52,-339.49 157.56,-346.42 "
id="polygon23-3"
transform="rotate(34.394355,144.22934,-321.43324)" />
</g>
</g>
</svg>

After

Width:  |  Height:  |  Size: 17 KiB

BIN
diagrams/texture_grid.pdf Normal file

Binary file not shown.

46
diagrams/texture_grid.tex Normal file
View File

@ -0,0 +1,46 @@
\documentclass{standalone}
\usepackage{tikz}
\usepackage{fix-cm}
\usepackage[dvipsnames]{xcolor}
\usetikzlibrary {arrows.meta}
\newcommand*{\Width}{1}%
\newcommand*{\Height}{1}%
\newcommand*{\XOffset}{2}%
\begin{document}
\begin{tikzpicture}[scale=1]
\draw[thick,step=1.0] (0,0) grid (\Width + 1,-\Height - 1);
\foreach \y in {0,...,\Height} {
\foreach \x in {0,...,\Width} {
\node[font=\large] at (\x + 0.5,-\y - 0.3) {
p\pgfmathparse{int(\y * (\Height + 1) + \x)}\pgfmathresult
};
\node[font=\fontsize{8}{8}\selectfont] at (\x + 0.5,-\y - 0.75) {(x,y,z)};
}
}
%\draw[thick,step=1.0] (\Width + \XOffset,0) grid ((\XOffset + \Width) * 4 + 1,-\Height - 1);
\draw[thick,step=1.0] (\Width + \XOffset,0) grid (\XOffset + 2 * 4 + 1,-\Height - 1);
\foreach \y in {0,...,\Height} {
\foreach \x in {0,...,\Width} {
\foreach \i in {0,...,3} {
\node[font=\large] at (\Width + \XOffset + \x * 4 + \i + 0.5,-\y - 0.3) {
p\pgfmathparse{int(\y * (\Height + 1) + \x)}\pgfmathresult
};
\node[font=\fontsize{8}{8}\selectfont] at (\Width + \XOffset + \x * 4 + \i + 0.5,-\y - 0.75) {(x,y,z)};
}
}
}
\draw[-{Stealth[BrickRed]}, ultra thick, BrickRed] (2.1,-1) -- (2.9,-1);
\node[font=\large\bfseries ] at ( 1,0.5) {particle state};
\node[font=\large\bfseries ] at ( 7,0.5) {vertex coordinates};
\end{tikzpicture}
\end{document}

BIN
doc/R2VB_programming.pdf Normal file

Binary file not shown.

327
index.tex
View File

@ -1357,4 +1357,331 @@ extremely
\href{https://git.musl-libc.org/cgit/musl/tree/src/math/powf.c?id=cb5c057c87240a9534f8e0d9b7ff2560082f6218}{computationally expensive} \href{https://git.musl-libc.org/cgit/musl/tree/src/math/powf.c?id=cb5c057c87240a9534f8e0d9b7ff2560082f6218}{computationally expensive}
otherwise. otherwise.
\section{Progress: 11 Nov 2025}
From 1 Nov 2025 to 11 Nov 2025, I achieved the following:
\begin{itemize}
\item I briefly experimented with \href{https://git.idk.st/bilbo/r500/src/commit/fdff78f1ad/drm/matrix_cubesphere_cubemap.cpp#L1081-L1088}{cubemap} \href{https://git.idk.st/bilbo/r500/src/commit/fdff78f1ad/drm/matrix_cubesphere_cubemap.cpp#L503}{textures}
\item I experimented with point primitives and texture coordinate ``stuffing''
\item I made a demo that generates and uses \href{https://git.idk.st/bilbo/r500/src/commit/fdff78f1ad/src/matrix_cubesphere_tiled.cpp}{macrotiled/microtiled textures}
\item I created a particle system demo where the \href{https://git.idk.st/bilbo/r500/src/commit/fdff78f1ad/src/particle_physics.fs.asm}{particle simulation is computed in a fragment shader}
\item I implemented \href{https://git.idk.st/bilbo/r500/commit/9e281cba583ec4a06e02470310c31cdad6962f64}{support for \texttt{\#include} directives} in my vertex and fragment shader assemblers
\item I used the new \texttt{\#include} feature to more concisely express an \href{https://git.idk.st/bilbo/r500/commit/fdff78f1ad/main/drm/shadertoy_palette_fractal.fs.asm#L26-L29}{unrolled loop}
\end{itemize}
\subsection{Rewriting GLSL ``shadertoy'' shaders as R500 assembly}
I felt \href{https://www.shadertoy.com/view/mtyGWy}{``Shader Art Coding Introduction''}
would be fun to reimplement as R500 fragment shader assembly: it produces
an interesting visual effect, despite not being particularly complicated.
In general when writing assembly programs, I use a few techniques to improve my
productivity and accuracy, prior to writing any actual assembly:
\subsubsection{Expand/rewrite the GLSL code}
In particular, my goal in this step is to make each line of GLSL roughly equal
to one fragment shader instruction. I started by rewriting all of the function
calls the \href{https://www.shadertoy.com/view/mtyGWy}{original} code made as:
\begin{figure}
\href{verbatim/palette_fractal_functions.fs.glsl}{\includegraphics{verbatim/output/palette_fractal_functions.fs.glsl.pdf}}
\end{figure}
Defining replacements for GLSL's built-in functions is not a good practice for
writing GLSL code in general. However, the goal in this specific situation is to
give myself line-by-line hints on the R500 fragment shader assembly that I'll
eventually need to write.
I then rewrote the \texttt{main} function in a similar ``one line of GLSL per
R500 instruction'' style as:
\begin{figure}
\href{verbatim/palette_fractal_main.fs.glsl}{\includegraphics{verbatim/output/palette_fractal_main.fs.glsl.pdf}}
\end{figure}
I also decided the multiplication by 0.125, where it normally would have
required a separate multiply-add instruction, was a perfect excuse to
\href{https://git.idk.st/bilbo/r500/commit/90b486e744c14bb23283218108799186162afaad}{implement assembler support}
for the ``OMOD'' R500 fragment shader feature/syntax that I previously
\href{https://git.idk.st/bilbo/r500/commit/8e6e6e9750a33759b51ed73d3e238ebe77ee3f61}{implemented in my fragment shader disassembler}.
\subsubsection{Assign all temporary variables to registers}
Still prior to writing any fragment shader assembly, I then decided where I
would store each GLSL variable in fragment shader temporary/constant memory:
\begin{figure}
\href{verbatim/palette_fractal_memory.fs.asm}{\includegraphics{verbatim/output/palette_fractal_memory.fs.asm.pdf}}
\end{figure}
I intentionally stored scalar values in the alpha component of each
vector. Given my current fragment shader assembler syntax, this allows for
slightly more improved human-readability. For example, doing a scalar
multiply-add with the alpha unit looks like this:
\begin{figure}
\href{verbatim/palette_fractal_alpha_mad.fs.asm}{\includegraphics{verbatim/output/palette_fractal_alpha_mad.fs.asm.pdf}}
\end{figure}
If the \texttt{l} variable were instead stored in the green component, the code
would be slightly uglier, as in:
\begin{figure}
\href{verbatim/palette_fractal_rgb_mad.fs.asm}{\includegraphics{verbatim/output/palette_fractal_rgb_mad.fs.asm.pdf}}
\end{figure}
\subsubsection{Translate the GLSL to R500 fragment shader assembly, line-by-line}
Because the GLSL code was transformed to very closely match the fragment shader
assembly, this also makes it easy to test the fragment shader output when only a
fraction of the complete program is translated (e.g: by commenting out chunks of
the GLSL code to match the current state of the in-progress fragment shader
assembly translation).
The visual appearance of a half-translated varient of this fragment shader is
not intuitive, so this technique greatly improves debuggability. I made at least
at least two mistakes while translating that were not difficult to debug at a
per-instruction level by comparing the equivalent GLSL code's visuals.
\subsubsection{Translate a fixed-length GLSL loop}
Though the R500 does support it, my fragment shader assembler does not currently
implement support for loops (or any other type of flow control).
R500 fragment shader flow control is also relatively expensive compared to
``loop unrolling'', particularly in this case where the loop body is only 32
instructions, and there are only 4 total iterations of the loop body.
For this reason, I decided I wanted a concise and generalized way to ``repeat''
chunks of source code in my fragment shader assembler, without actually
duplicating the text.
To do this, I
\href{https://git.idk.st/bilbo/r500/commit/9e281cba583ec4a06e02470310c31cdad6962f64}{implemented}
an ``\texttt{\#include}'' feature in my fragment shader assembler. This is
conceptually similar to how \texttt{\#include} works in the C programming
language, though my implementation simply feeds tokens from the included file
directly from the (nested) lexer to the parser, rather than the much more
complex procedure used by the C preprocessor.
With this new feature, the translation of the GLSL loop is very simple:
\begin{figure}
\href{verbatim/palette_fractal_loop.fs.asm}{\includegraphics{verbatim/output/palette_fractal_loop.fs.asm.pdf}}
\end{figure}
The full implementation is committed as
\href{https://git.idk.st/bilbo/r500/src/commit/fdff78f1ad/drm/shadertoy_palette_fractal.fs.asm}{shadertoy\_palette\_fractal.fs.asm} and
\href{https://git.idk.st/bilbo/r500/src/commit/fdff78f1ad/drm/shadertoy_palette_fractal_loop_inner.fs.asm}{shadertoy\_palette\_fractal\_loop\_inner.fs.asm}.
\subsubsection{Demo videos}
\begin{figure}
\includegraphics{videos/shadertoy_fractal.png}
\caption*{R500 DVI capture, \texttt{shadertoy\_palette\_fractal.fs.asm}\\(variant)}
\end{figure}
\begin{figure}
\includegraphics{videos/shadertoy_fractal2.png}
\caption*{R500 DVI capture, \texttt{shadertoy\_palette\_fractal.fs.asm}}
\end{figure}
\subsection{Fragment shader particle simulation}
\subsubsection{Using fragment shaders to render non-pixel data}
ATI documentation \href{doc/R2VB_programming.pdf}{mentioned} the existence of a
``Render to Vertex Buffer'' feature.
The general idea/revelation is:
\begin{itemize}
\item fragment shader output does not need to be ``pixel data''--it can
arbitrarily be assigned any desired meaning
\item by alternating between a pair of buffers, fragment shader output can be
used as the input for the next invocation of the same fragment shader
\end{itemize}
The state manipulated by the pixel shader is double-buffered, where each
iteration of the fragment shader uses alternating ``read'' and ``write''
buffers, as in:
\begin{figure}
\href{diagrams/simplified_particle_data_flow.svg}{\includegraphics{diagrams/simplified_particle_data_flow.svg}}
\end{figure}
On the subsequent iteration of the same computation, state ``b'' would be read
and state ``a'' would be written.
For all prior fragment shader demos, I used the 32-bit \texttt{C4\_8} surface
format:
\begin{figure}
\href{diagrams/c4_8.pdf}{\includegraphics{diagrams/c4_8.pdf}}
\end{figure}
Where 8-bit unsigned integer representations of blue, green, red, and alpha
could be stored in C0, C1, C2, and C3 respectively (or any other arbitrary
color component ordering).
R500 also supports a 128-bit \texttt{C4\_32\_FP} surface format:
\begin{figure}
\href{diagrams/c4_32_fp.pdf}{\includegraphics{diagrams/c4_32_fp.pdf}}
\end{figure}
Where each component contains a 32-bit floating point value. Compared to 8-bit
integers, this increase in precision makes the format more useful for
generalized computation.
R500 conveniently also has an equivalent 128-bit per texel, 32-bit floating
point per component, 4-component texture format.
\subsubsection{Particle simulation data model}
I decided a minimal but still ``mildly interesting'' particle system would need
at least the following state:
\begin{figure}
\href{verbatim/particle_system_data_model.c}{\includegraphics{verbatim/output/particle_system_data_model.c.pdf}}
\end{figure}
\texttt{age} is used to both ``reset'' the particle after some time (allowing
the simulation to repeat indefinitely) and to give the particles non-uniform
reset timing. \texttt{random} is used to further make the behavior of each
particle less uniform. At the start of the particle simulation, all values are
randomly initialized.
This data model requires 8 components in total, which is more than the 4
components provided by both the pixel shader output surface format as well as
the texture sampler texel format. However:
\begin{itemize}
\item R500 fragment shaders can have up to 4 independent render targets
\item R500 fragment shaders can sample from up to 16 independent textures
\end{itemize}
Following this model, it makes sense to break up the data structure like this:
\begin{figure}
\href{verbatim/particle_system_data_model_split.c}{\includegraphics{verbatim/output/particle_system_data_model_split.c.pdf}}
\end{figure}
Where each fragment samples from two separate texture buffers, and has two
separate render targets as output:
\begin{figure}
\href{diagrams/simplified_particle_data_flow_split.svg}{\includegraphics{diagrams/simplified_particle_data_flow_split.svg}}
\end{figure}
\subsubsection{Drawing particles}
I decided to draw particles using the R500's ``quad list'' primitive. In a
non-fragment-shader-computed version of my particle simulation demo, I sent the
particle position as a vertex shader constant, as in:
\begin{figure}
\href{verbatim/particle_system_cpu.cpp}{\includegraphics{verbatim/output/particle_system_cpu.cpp.pdf}}
\end{figure}
The vertex shader is then able to calculate the quad vertex positions using a
vertex shader program that is equivalent to this GLSL code:
\begin{figure}
\href{verbatim/particle_system_position.glsl}{\includegraphics{verbatim/output/particle_system_position.glsl.pdf}}
\end{figure}
This works reasonably well for small particle system demos where particle
position is calculated on the CPU. However, the goal is to compute (much larger)
particle system positions via the pixel shader, and it would be highly preferred
that the particle system state never leaves R500 VRAM. In the latter case, the
``combine quad position coordinates with particle position coordinates via
vertex shader constants'' approach does not work for several reasons:
\begin{itemize}
\item R500 constant memory has 256 vectors; I'd like to make particle systems
with at least 100,000 particles.
\item The R500 pixel shader is not able to write to vertex shader constant
memory, it can only write to texture memory.
\item The \texttt{radeon} Linux kernel module generates a segmentation fault
in kernel space when given an indirect buffer larger than ~2MB (a Linux
kernel bug, not a R500 hardware limitation). Including the overhead of
multiple \texttt{3D\_DRAW\_IMMD} commands and vertex constant transfers,
100,000 particles is easily larger than 2MB of indirect buffer.
\end{itemize}
The only remaining option is to store particle position coordinates as a vertex
buffer. However, because I am drawing quads, despite the R500 vertex fetcher's
generous flexibility, the particle state buffer can't be used directly by the
vertex shader because it only operates on individual vertices.
For example, if a particle is at position \texttt{(4.0, 5.0, 6.0)}, the data
that needs to be sent to the vertex shader should be:
\begin{figure}
\href{verbatim/particle_position_vertex_shader_example.c}{\includegraphics{verbatim/output/particle_position_vertex_shader_example.c.pdf}}
\end{figure}
Or, \textit{expressed as a texture}, the desired transformation is:
\begin{figure}
\href{diagrams/texture_grid.pdf}{\includegraphics{diagrams/texture_grid.pdf}}
\end{figure}
While the R500 pixel shader unit can't itself perform this transformation
directly, the transformation can indeed be achieved by ``scaling'' the particle
state via the R500 setup engine and fragment interpolators using point texture
sampling.
Doing this via point texture sampling is absolutely critical, because a linear
interpolation between the state of two adjacent-in-memory particles is a
completely meaningless operation in this context.
This is implemented in
\href{https://git.idk.st/bilbo/r500/src/branch/main/src/particle_oriented_animated_quad_vbuf_pixel_shader.cpp#L583}{\texttt{\_copy\_to\_vertexbuffer}}
as simply ``rendering'' the particle positions into a viewport that is 4x wider
than the width of the original particle state texture.
\subsubsection{The complete rendering pipeline}
All buffers in the following diagram are entirely stored in R500 texture memory,
and are never transferred to x86 RAM.
\begin{figure}
\href{diagrams/complete_particle_data_flow.svg}{\includegraphics{diagrams/complete_particle_data_flow.svg}}
\end{figure}
The full rendering pipeline implementation is committed as
\href{https://git.idk.st/bilbo/r500/src/commit/fdff78f1ad/src/particle_oriented_animated_quad_vbuf_pixel_shader.cpp}{particle\_oriented\_animated\_quad\_vbuf\_pixel\_shader.cpp}.
The full particle simulation pixel shader implementation is committed as
\href{https://git.idk.st/bilbo/r500/src/commit/fdff78f1ad/src/particle_physics.fs.asm}{particle\_physics.fs.asm}.
\subsubsection{Demo video}
Speed comparison of my test system's Pentium 4 CPU and R500 pixel shader
simulating the same particle system (131,072 particles):
\begin{figure}
\includegraphics{videos/cpu_particle_simulation.png}
\caption*{R500 DVI capture, \texttt{particle\_oriented\_animated\_quad\_vbuf.cpp}\\(CPU -generated particle system)}
\end{figure}
\begin{figure}
\includegraphics{videos/pixel_shader_particle_simulation.png}
\caption*{R500 DVI capture, \texttt{particle\_oriented\_animated\_quad\_vbuf\_pixel\_shader.cpp}\\(pixel shader -generated particle system)}
\end{figure}
A more colorful variant of the same particle system demo (65,536 particles):
\begin{figure}
\includegraphics{videos/pixel_shader_particle_simulation_color.png}
\caption*{R500 DVI capture, \texttt{particle\_oriented\_animated\_quad\_vbuf\_pixel\_shader.cpp}\\(pixel shader -generated particle system)}
\end{figure}
It is exciting for me to realize that this ``perform generalized computations
via R500 pixel shaders'' technique has myriad other possible applications.
\end{document} \end{document}

View File

@ -1,72 +0,0 @@
set -eux
cd verbatim/
mkdir -p output
for i in *.asm; do
cat <<EOF > $i.tex
\documentclass[varwidth=13.1cm, border={0.0cm 0.0cm 0.0cm 0.0cm}]{standalone}
\usepackage{minted}
\setminted[python]{breaklines, linenos, frame=lines, framesep=2mm, fontsize=\huge, numbersep=5pt}
\standaloneenv{minted}
\begin{document}
\begin{minted}{haskell}
EOF
cat $i >> $i.tex
cat <<EOF >> $i.tex
\end{minted}
\end{document}
EOF
pdflatex -shell-escape -output-directory=output $i.tex
pdflatex -shell-escape -output-directory=output $i.tex
done
for i in *.glsl; do
cat <<EOF > $i.tex
\documentclass[varwidth=13.1cm, border={0.0cm 0.0cm 0.0cm 0.0cm}]{standalone}
\usepackage{minted}
\setminted[python]{breaklines, linenos, frame=lines, framesep=2mm, fontsize=\huge, numbersep=5pt}
\standaloneenv{minted}
\begin{document}
\begin{minted}{glsl}
EOF
cat $i >> $i.tex
cat <<EOF >> $i.tex
\end{minted}
\end{document}
EOF
pdflatex -shell-escape -output-directory=output $i.tex
pdflatex -shell-escape -output-directory=output $i.tex
done
for i in *.c; do
cat <<EOF > $i.tex
\documentclass[varwidth=13.1cm, border={0.0cm 0.0cm 0.0cm 0.0cm}]{standalone}
\usepackage{minted}
\setminted[python]{breaklines, linenos, frame=lines, framesep=2mm, fontsize=\huge, numbersep=5pt}
\standaloneenv{minted}
\begin{document}
\begin{minted}{c}
EOF
cat $i >> $i.tex
cat <<EOF >> $i.tex
\end{minted}
\end{document}
EOF
pdflatex -shell-escape -output-directory=output $i.tex
pdflatex -shell-escape -output-directory=output $i.tex
done

20
verbatim/Makefile Normal file
View File

@ -0,0 +1,20 @@
PDF += $(wildcard *.asm)
PDF += $(wildcard *.glsl)
PDF += $(wildcard *.c)
PDF += $(wildcard *.cpp)
PDF_OBJ := $(PDF:%=output/%.pdf)
all: $(PDF_OBJ)
output/%.asm.pdf: %.asm
sh verbatim.sh $< haskell
output/%.glsl.pdf: %.glsl
sh verbatim.sh $< glsl
output/%.c.pdf: %.c
sh verbatim.sh $< c
output/%.cpp.pdf: %.cpp
sh verbatim.sh $< cpp

View File

@ -0,0 +1,5 @@
-- d = i * 0.4 + l;
src0.a = const[0] , -- 0.4
src1.a = temp[0] , -- l
src2.a = temp[3] : -- i
temp[1].a = MAD src2.a src0.a src1.a ;

View File

@ -0,0 +1,97 @@
--
-- float length_(vec2 v)
--
-- float n = dot(v, v);
src0.rgb = temp[0] : -- v
DP3 src0.rg0 src0.rg0 ,
temp[0].a = DP ;
-- n = 1.0 / sqrt(n);
src0.a = temp[0] : -- n
temp[0].a = RSQ |src0.a| ;
-- n = 1.0 / n;
src0.a = temp[0] : -- n
temp[0].a = RCP src0.a ;
--
-- float pow_(float x, float n)
--
-- x = log2(x);
src0.a = temp[0] : -- x
temp[0].a = LN2 src0.a ;
-- x = x * n;
src0.a = temp[0] , -- x
src1.a = temp[1] : -- n
temp[0].a = MAD src0.a src1.a src0.0 ;
-- x = exp2(x);
src0.a = temp[0] : -- x
temp[0].a = EX2 src0.a ;
--
-- float sin_(float x)
--
-- const[0] = { 1.0 / (pi * 2.0), 0, 0, 0 }
--
-- x = x * 0.159154936671257019043 + 0.5;
src0.a = temp[0] , -- x
src1.rgb = const[0] , -- I_PI_2 (r)
src2.a = float(48) : -- 0.5
temp[0].a = MAD src0.a src1.r src2.a ;
-- x = fract(x);
NOP
src0.a = temp[0] : -- x
temp[0].a = FRC src0.a ;
-- x = sin((x - 0.5) * PI_2);
src0.a = float(48) , -- 0.5
src1.a = temp[0] , -- x
srcp.a = neg : -- (src1.a - src0.a)
temp[0].a = SIN srcp.a ;
-- the R500 fragment shader SIN instruction multiplies
-- the operand by 2 * pi
--
-- vec3 palette(float d)
--
-- the call to cos_ is inlined and algebraically simplified
--
-- const[1] = {0.25, 0.40625, 0.5625, 0}
--
-- v = d + (vec3(0.25, 0.40625, 0.5625) + 0.5)
src0.a = temp[0] , -- d
src0.rgb = const[1] , -- vec3(0.25, 0.40625, 0.5625)
src1.rgb = float(48) , -- 0.5
srcp.rgb = add : -- (vec3(0.25, 0.40625, 0.5625) + 0.5)
temp[0].rgb = MAD src0.111 src0.aaa srcp.rgb ;
-- v = frac(v)
src0.rgb = temp[0] : -- v
temp[0].rgb = FRC src0.rgb ;
-- v = v - 0.5
src0.rgb = temp[0] , -- v
src1.rgb = float(48) : -- 0.5
temp[0].rgb = MAD src0.111 src0.rgb -src1.rgb ;
-- v = cos(v)
src0.rgb = temp[0] : -- v
COS src0.r ,
temp[0].r = SOP ;
src0.rgb = temp[0] : -- v
COS src0.g ,
temp[0].g = SOP ;
src0.rgb = temp[0] : -- v
COS src0.b ,
temp[0].b = SOP ;
-- col = vec3(0.5, 0.5, 0.5) * v + vec3(0.5, 0.5, 0.5)
src0.rgb = temp[0] , -- v
src1.rgb = float(48) : -- 0.5
temp[0].rgb = MAD src1.rgb src0.rgb src1.rgb;

View File

@ -0,0 +1,39 @@
float length_(vec2 v)
{
float n = dot(v, v);
n = 1.0 / sqrt(n);
n = 1.0 / n;
return n;
}
float pow_(float x, float n)
{
x = log2(x);
x = x * n;
x = exp2(x);
return x;
}
float sin_(float x)
{
x = x * 0.159154936671257019043 + 0.5; // 48
x = fract(x); // nop
x = sin((x - 0.5) * 6.28318548202514648438); // presubtract
return x;
}
float cos_(float x)
{
x = x * 0.159154936671257019043 + 0.5; // 48
x = fract(x); // nop
x = cos((x - 0.5) * 6.28318548202514648438); // presubtract
return x;
}
vec3 palette(float d) {
vec3 v = d + vec3(0.25, 0.40625, 0.5625); // 40 45 49
v = v * 6.28318548202514648438; // (2 * pi * (1 / (2 * pi))) == 1
v = cos_(v);
v = vec3(0.5, 0.5, 0.5) * v + vec3(0.5, 0.5, 0.5); // 48
return v;
}

View File

@ -0,0 +1,25 @@
-- vec4 final_color = vec4(0, 0, 0, 1);
:
temp[2].a = MAX src0.1 src0.1 ,
temp[2].rgb = MAX src0.000 src0.000 ;
-- i = 0;
:
temp[3].a = MAX src0.0 src0.0 ;
--------------------------------------------------------------------------------
-- loop start
--------------------------------------------------------------------------------
#include "shadertoy_palette_fractal_loop_inner.fs.asm"
#include "shadertoy_palette_fractal_loop_inner.fs.asm"
#include "shadertoy_palette_fractal_loop_inner.fs.asm"
#include "shadertoy_palette_fractal_loop_inner.fs.asm"
--------------------------------------------------------------------------------
-- loop end
--------------------------------------------------------------------------------
-- gl_FragColor = final_color;
OUT TEX_SEM_WAIT
src0.rgb = temp[2] :
out[0].a = MAX src0.1 src0.1 ,
out[0].rgb = MAX src0.rgb src0.rgb ;

View File

@ -0,0 +1,33 @@
void main()
{
vec2 uv = uv0; // temp[1]
vec4 final_color = vec4(0, 0, 0, 1);
for (float i = 0.0; i < 4.0; i++) {
uv = uv * vec2(1.5, 1.5); // 60
uv = fract(uv);
uv = uv - vec2(0.5, 0.5); // 48
float l = length_(uv0);
float d = i * 0.4 + l; // const[0].a
d = time * 0.4 + d; // const[0].a
vec3 col = palette(d);
d = exp2(-l);
l = length_(uv);
d = l * d;
d = d * 8.0 + time; // 80
d = 0.125 * sin_(d); // omod
d = 1.0 / abs(d);
d = 0.01 * d; // const[0].b
d = pow_(d, 1.2); // const[0].g
final_color.xyz = col * vec3(d, d, d) + final_color.xyz;
}
gl_FragColor = final_color;
}

View File

@ -0,0 +1,8 @@
-- CONST[0] = { time, 1.2, 0.01, 0.4 }
-- CONST[1] = { PI_2, I_PI_2, 0, 0 },
-- CONST[2] = { 0.25, 0.40625, 0.5625, 0 },
-- temp[0] : { uv0.xy , _, l }
-- temp[1] : { uv.xy , _, d }
-- temp[2] : final_color.xyzw
-- temp[3] : {col.xyz , i }

View File

@ -0,0 +1,5 @@
-- d = i * 0.4 + l;
src0.a = const[0] , -- 0.4
src1.rgb = temp[0] , -- l
src2.a = temp[3] : -- i
temp[1].g = MAD src2.0a0 src0.0a0 src1.0g0 ;

View File

@ -0,0 +1,7 @@
const float vertices[] = {
// [particle position] [quad texture coordinate]
4.0, 5.0, 6.0, 0.0, 0.0, // vertex 0
4.0, 5.0, 6.0, 1.0, 0.0, // vertex 1
4.0, 5.0, 6.0, 1.0, 1.0, // vertex 2
4.0, 5.0, 6.0, 0.0, 1.0, // vertex 3
};

View File

@ -0,0 +1,56 @@
const int particles_length = 128;
for (int i = 0; i < particles_length; i++) {
T0V(VAP_PVS_STATE_FLUSH_REG, 0x00000000);
const vec3 position = particles[i].position;
const float consts[] = {
// 0: local space to clip space transformation matrix
trans[0][0], trans[0][1], trans[0][2], trans[0][3],
trans[1][0], trans[1][1], trans[1][2], trans[1][3],
trans[2][0], trans[2][1], trans[2][2], trans[2][3],
trans[3][0], trans[3][1], trans[3][2], trans[3][3],
// 4: dx ("right" change of basis vector)
local_to_view[0][0], local_to_view[0][1], local_to_view[0][2], 0,
// 5: dy ("up" change of basis vector)
local_to_view[1][0], local_to_view[1][1], local_to_view[1][2], 0,
// 6: particle position, scale
position.x, position.y, position.z, scale
};
// transfer the constants to vertex shader constant memory
ib_vap_pvs_const_cntl(consts, (sizeof (consts)));
const int dwords_per_vtx = 2;
T0V(VAP_VTX_SIZE
, VAP_VTX_SIZE__DWORDS_PER_VTX(dwords_per_vtx)
);
const int vertex_count = 4;
const vec2 vertices[vertex_count] = {
{0.0, 0.0f},
{1.0, 0.0f},
{1.0, 1.0f},
{0.0, 1.0f},
};
T3(_3D_DRAW_IMMD_2, (1 + vertex_count * dwords_per_vtx) - 1);
TU( VAP_VF_CNTL__PRIM_TYPE(13) // quad list
| VAP_VF_CNTL__PRIM_WALK(3) // embedded/immediate vertex data
| VAP_VF_CNTL__INDEX_SIZE(0)
| VAP_VF_CNTL__VTX_REUSE_DIS(0)
| VAP_VF_CNTL__DUAL_INDEX_MODE(0)
| VAP_VF_CNTL__USE_ALT_NUM_VERTS(0)
| VAP_VF_CNTL__NUM_VERTICES(vertex_count)
);
// transfer particle quad vertices
for (int j = 0; j < vertex_count; j++) {
TF(vertices[i].x);
TF(vertices[i].y);
}
}

View File

@ -0,0 +1,9 @@
struct particle_state {
vec3 position;
vec3 velocity;
float age;
float random;
};
const int particle_count = 128;
struct particle_state particle_system[particle_count];

View File

@ -0,0 +1,12 @@
struct position_age {
vec3 position;
float age;
};
struct velocity_random {
vec3 velocity;
float random;
};
const int particle_count = 128;
struct position_age position_age[particle_count];
struct velocity_random velocity_random[particle_count];

View File

@ -0,0 +1,27 @@
#version 120
uniform mat4 trans; // local space to clip space
uniform vec3 dx; // "right" change of basis vector
uniform vec3 dy; // "up" change of basis vector
uniform vec3 particle_position;
uniform float scale;
attribute vec2 quad_texture;
attribute float age;
void main()
{
// calculate unit quad position coordinates from texture coordinates
vec2 quad_position = quad_texture * 2 + 1;
vec3 position = particle_position;
position += position.xxx * dx;
position += position.yyy * dy;
position *= scale;
position = trans * position;
gl_Position = vec4(position, 1);
gl_TexCoord[0] = vec4(quad_texture, 0, age);
}

24
verbatim/verbatim.sh Normal file
View File

@ -0,0 +1,24 @@
set -eux
cat <<EOF > $1.tex
\documentclass[varwidth=13.1cm, border={0.0cm 0.0cm 0.0cm 0.0cm}]{standalone}
\usepackage{minted}
\setminted[python]{breaklines, linenos, frame=lines, framesep=2mm, fontsize=\huge, numbersep=5pt}
\standaloneenv{minted}
\begin{document}
\begin{minted}
EOF
echo "{$2}" >> $1.tex
cat $1 >> $1.tex
cat <<EOF >> $1.tex
\end{minted}
\end{document}
EOF
pdflatex -shell-escape -output-directory=output $1.tex
pdflatex -shell-escape -output-directory=output $1.tex
pdflatex -shell-escape -output-directory=output $1.tex

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 68 KiB

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 68 KiB

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 67 KiB

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 708 KiB

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 608 KiB