From 5716b2bc240d3ab7fe164e40d87d0716fc52836e Mon Sep 17 00:00:00 2001 From: Zack Buhman Date: Wed, 29 Oct 2025 23:18:06 -0500 Subject: [PATCH] drm: add matrix --- .gitignore | 1 + drm/Makefile | 8 +- drm/math/float_types.hpp | 15 + drm/math/mat.hpp | 26 + drm/math/mat2x2.hpp | 112 ++++ drm/math/mat3x3.hpp | 201 +++++++ drm/math/mat4x4.hpp | 247 ++++++++ drm/math/transform.hpp | 167 ++++++ drm/math/vec.hpp | 29 + drm/math/vec2.hpp | 154 +++++ drm/math/vec3.hpp | 171 ++++++ drm/math/vec4.hpp | 164 ++++++ drm/matrix.cpp | 1186 ++++++++++++++++++++++++++++++++++++++ drm/matrix.vs.asm | 11 + drm/matrix.vs.inc | 6 + 15 files changed, 2495 insertions(+), 3 deletions(-) create mode 100644 drm/math/float_types.hpp create mode 100644 drm/math/mat.hpp create mode 100644 drm/math/mat2x2.hpp create mode 100644 drm/math/mat3x3.hpp create mode 100644 drm/math/mat4x4.hpp create mode 100644 drm/math/transform.hpp create mode 100644 drm/math/vec.hpp create mode 100644 drm/math/vec2.hpp create mode 100644 drm/math/vec3.hpp create mode 100644 drm/math/vec4.hpp create mode 100644 drm/matrix.cpp create mode 100644 drm/matrix.vs.asm create mode 100644 drm/matrix.vs.inc diff --git a/.gitignore b/.gitignore index 49b8a83..ee2b5b3 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,4 @@ *.mod *.mod.* *.ko +*.gch \ No newline at end of file diff --git a/drm/Makefile b/drm/Makefile index 9f572f1..fbf9685 100644 --- a/drm/Makefile +++ b/drm/Makefile @@ -1,14 +1,16 @@ OPT = -O0 CFLAGS += -g -CFLAGS += -Wall -Werror -Wfatal-errors -Wno-error=unused-variable -CFLAGS += $(shell pkg-config --cflags libdrm) +CFLAGS += -Wall -Werror -Wfatal-errors -Wno-error=unused-variable -Wno-narrowing -LDFLAGS += $(shell pkg-config --libs libdrm) -lm +LDFLAGS += $(shell pkg-config --cflags --libs libdrm) -lm %: %.c $(CC) $(ARCH) $(CFLAGS) $(LDFLAGS) $(OPT) $< -o $@ +%: %.cpp + $(CXX) $(ARCH) $(CFLAGS) $(LDFLAGS) $(OPT) $< -o $@ + %.vs.inc: %.vs.asm PYTHONPATH=../regs/ python -m assembler.vs $< > $@ diff --git a/drm/math/float_types.hpp b/drm/math/float_types.hpp new file mode 100644 index 0000000..e88b9bb --- /dev/null +++ b/drm/math/float_types.hpp @@ -0,0 +1,15 @@ +#pragma once + +#include "vec2.hpp" +#include "vec3.hpp" +#include "vec4.hpp" +#include "mat2x2.hpp" +#include "mat3x3.hpp" +#include "mat4x4.hpp" + +using vec2 = vec<2, float>; +using vec3 = vec<3, float>; +using vec4 = vec<4, float>; +using mat2x2 = mat<2, 2, float>; +using mat3x3 = mat<3, 3, float>; +using mat4x4 = mat<4, 4, float>; diff --git a/drm/math/mat.hpp b/drm/math/mat.hpp new file mode 100644 index 0000000..3fbeaea --- /dev/null +++ b/drm/math/mat.hpp @@ -0,0 +1,26 @@ +#pragma once + +template +struct mat; + +template +inline constexpr typename mat::col_type +col(mat const& m, int c) +{ + typename mat::col_type v; + for (int r = 0; r < R; r++) { + v[r] = m[r][c]; + } + return v; +} + +template +inline constexpr vec<3, T> +col(mat const& m, int c) +{ + vec<3, T> v; + for (int r = 0; r < 3; r++) { + v[r] = m[r][c]; + } + return v; +} diff --git a/drm/math/mat2x2.hpp b/drm/math/mat2x2.hpp new file mode 100644 index 0000000..8b8aec7 --- /dev/null +++ b/drm/math/mat2x2.hpp @@ -0,0 +1,112 @@ +#pragma once + +#include +#include "vec2.hpp" +#include "mat.hpp" + +// +// mat2x2 +// + +template +struct mat<2, 2, T> +{ + typedef vec<2, T> row_type; + typedef vec<2, T> col_type; + +private: + row_type value[2]; + +public: + inline constexpr mat(); + + inline constexpr mat + ( + T const& a00, T const& a01, + T const& a10, T const& a11 + ); + + inline static constexpr int length() { return 4; } + + inline constexpr typename mat<2, 2, T>::row_type & + operator[](int i); + + inline constexpr typename mat<2, 2, T>::row_type const & + operator[](int i) const; + + void operator=(const mat<2, 2, T>&) = delete; + +}; + + +template +inline constexpr mat<2, 2, T>::mat() + : value{std::move(row_type(1, 0)), + std::move(row_type(0, 1))} +{ } + +template +inline constexpr mat<2, 2, T>::mat +( + T const& a00, T const& a01, + T const& a10, T const& a11 +) + : value{std::move(row_type(a00, a01)), + std::move(row_type(a10, a11))} +{ } + +template +inline constexpr typename mat<2, 2, T>::row_type & +mat<2, 2, T>::operator[](int i) +{ + return value[i]; +} + +template +inline constexpr typename mat<2, 2, T>::row_type const & +mat<2, 2, T>::operator[](int i) const +{ + return value[i]; +} + +template +inline constexpr mat<2, 2, T> operator*(mat<2, 2, T> const& m1, mat<2, 2, T> const& m2) +{ +#define c(i, j) ( \ + m1[i][0] * m2[0][j] \ + + m1[i][1] * m2[1][j]) + + return mat<2, 2, T>(c(0,0), c(0,1), + c(1,0), c(1,1)); +#undef c +} + +template +inline constexpr typename mat<2, 2, T>::row_type operator* +( + mat<2, 2, T> const& m, + typename mat<2, 2, T>::col_type const& v +) +{ +#define c(i) ( \ + m[i][0] * v[0] \ + + m[i][1] * v[1]) + + return typename mat<2, 2, T>::row_type(c(0), c(1)); +#undef c +} + +template +inline constexpr mat<2, 2, T> transpose(mat<2, 2, T> const& m) +{ + return mat<2, 2, T>( + m[0][0], m[1][0], + m[0][1], m[1][1] + ); +} + +template +inline constexpr float determinant(mat<2, 2, T> const& a) +{ + return a[0][0] * a[1][1] - a[0][1] * a[1][0]; +} diff --git a/drm/math/mat3x3.hpp b/drm/math/mat3x3.hpp new file mode 100644 index 0000000..3bd64d9 --- /dev/null +++ b/drm/math/mat3x3.hpp @@ -0,0 +1,201 @@ +#pragma once + +#include +#include "vec3.hpp" +#include "mat.hpp" + +// +// mat3x3 +// + +template +struct mat<3, 3, T> +{ + typedef vec<3, T> row_type; + typedef vec<3, T> col_type; + +private: + row_type value[3]; + +public: + inline constexpr mat(); + + inline constexpr mat + ( + T const& a00, T const& a01, T const& a02, + T const& a10, T const& a11, T const& a12, + T const& a20, T const& a21, T const& a22 + ); + + inline static constexpr int length() { return 3; } + + inline constexpr typename mat<3, 3, T>::row_type & + operator[](int i); + + inline constexpr typename mat<3, 3, T>::row_type const & + operator[](int i) const; + + //void operator=(const mat<3, 3, T>&) = delete; + +}; + + +template +inline constexpr mat<3, 3, T>::mat() + : value{std::move(row_type(1, 0, 0)), + std::move(row_type(0, 1, 0)), + std::move(row_type(0, 0, 1))} +{ } + +template +inline constexpr mat<3, 3, T>::mat +( + T const& a00, T const& a01, T const& a02, + T const& a10, T const& a11, T const& a12, + T const& a20, T const& a21, T const& a22 +) + : value{std::move(row_type(a00, a01, a02)), + std::move(row_type(a10, a11, a12)), + std::move(row_type(a20, a21, a22))} +{ } + +template +inline constexpr typename mat<3, 3, T>::row_type & +mat<3, 3, T>::operator[](int i) +{ + return value[i]; +} + +template +inline constexpr typename mat<3, 3, T>::row_type const & +mat<3, 3, T>::operator[](int i) const +{ + return value[i]; +} + +template +inline constexpr mat<3, 3, T> operator+(mat<3, 3, T> const& m1, mat<3, 3, T> const& m2) +{ +#define c(i, j) ( m1[i][j] + m2[i][j] ) + + return mat<3, 3, T>(c(0,0), c(0,1), c(0,2), + c(1,0), c(1,1), c(1,2), + c(2,0), c(2,1), c(2,2)); +#undef c +} + +template +inline constexpr mat<3, 3, T> operator*(mat<3, 3, T> const& m1, mat<3, 3, T> const& m2) +{ +#define c(i, j) ( \ + m1[i][0] * m2[0][j] \ + + m1[i][1] * m2[1][j] \ + + m1[i][2] * m2[2][j] ) + + return mat<3, 3, T>(c(0,0), c(0,1), c(0,2), + c(1,0), c(1,1), c(1,2), + c(2,0), c(2,1), c(2,2)); +#undef c +} + +template +inline constexpr mat<3, 3, T> operator*(mat<3, 3, T> const& m1, float s) +{ +#define c(i, j) ( m1[i][j] * s ) + + return mat<3, 3, T>(c(0,0), c(0,1), c(0,2), + c(1,0), c(1,1), c(1,2), + c(2,0), c(2,1), c(2,2)); +#undef c +} + +template +inline constexpr typename mat<3, 3, T>::row_type operator* +( + mat<3, 3, T> const& m, + typename mat<3, 3, T>::col_type const& v +) +{ +#define c(i) ( \ + m[i][0] * v[0] \ + + m[i][1] * v[1] \ + + m[i][2] * v[2] ) + + return typename mat<3, 3, T>::row_type(c(0), c(1), c(2)); +#undef c +} + +template +inline constexpr mat<3, 3, T> transpose(mat<3, 3, T> const& m) +{ + return mat<3, 3, T>( + m[0][0], m[1][0], m[2][0], + m[0][1], m[1][1], m[2][1], + m[0][2], m[1][2], m[2][2] + ); +} + +template +inline constexpr mat<2, 2, T> submatrix(mat<3, 3, T> const& a, int r, int c) +{ + mat<2, 2, T> b; + int row2 = 0; + for (int row3 = 0; row3 < 3; row3++) { + if (row3 == r) continue; + int col2 = 0; + for (int col3 = 0; col3 < 3; col3++) { + if (col3 == c) continue; + b[row2][col2] = a[row3][col3]; + col2++; + } + row2++; + } + return b; +} + +template +inline constexpr float minor(mat<3, 3, T> const& a, int r, int c) +{ + mat<2, 2, T> s = submatrix(a, r, c); + float ret = determinant(s); + return ret; +} + +template +inline constexpr float cofactor(mat<3, 3, T> const& a, int r, int c) +{ + float m = minor(a, r, c); + if ((r + c) & 1) + return -m; + else + return m; +} + +template +inline constexpr float determinant(mat<3, 3, T> const& a) +{ + float f0 = cofactor(a, 0, 0); + float f1 = cofactor(a, 0, 1); + float f2 = cofactor(a, 0, 2); + return + a[0][0] * f0 + + a[0][1] * f1 + + a[0][2] * f2; +} + +template +inline constexpr mat<3, 3, T> inverse(mat<3, 3, T> const& a) +{ + mat<3, 3, T> m; + float idet = 1.0f / determinant(a); + m[0][0] = cofactor(a, 0, 0) * idet; + m[1][0] = cofactor(a, 0, 1) * idet; + m[2][0] = cofactor(a, 0, 2) * idet; + m[0][1] = cofactor(a, 1, 0) * idet; + m[1][1] = cofactor(a, 1, 1) * idet; + m[2][1] = cofactor(a, 1, 2) * idet; + m[0][2] = cofactor(a, 2, 0) * idet; + m[1][2] = cofactor(a, 2, 1) * idet; + m[2][2] = cofactor(a, 2, 2) * idet; + return m; +} diff --git a/drm/math/mat4x4.hpp b/drm/math/mat4x4.hpp new file mode 100644 index 0000000..bcddf1f --- /dev/null +++ b/drm/math/mat4x4.hpp @@ -0,0 +1,247 @@ +#pragma once + +#include +#include "vec4.hpp" +#include "mat.hpp" + +// +// mat4x4 +// + +template +struct mat<4, 4, T> +{ + typedef vec<4, T> row_type; + typedef vec<4, T> col_type; + +private: + row_type value[4]; + +public: + inline constexpr mat(); + + inline constexpr mat + ( + T const& a00, T const& a01, T const& a02, T const& a03, + T const& a10, T const& a11, T const& a12, T const& a13, + T const& a20, T const& a21, T const& a22, T const& a23, + T const& a30, T const& a31, T const& a32, T const& a33 + ); + + inline static constexpr int length() { return 4; } + + inline constexpr typename mat<4, 4, T>::row_type & + operator[](int i); + + inline constexpr typename mat<4, 4, T>::row_type const & + operator[](int i) const; + + //void operator=(const mat<4, 4, T>&) = delete; + +}; + + +template +inline constexpr mat<4, 4, T>::mat() + : value{std::move(row_type(1, 0, 0, 0)), + std::move(row_type(0, 1, 0, 0)), + std::move(row_type(0, 0, 1, 0)), + std::move(row_type(0, 0, 0, 1))} +{ } + +template +inline constexpr mat<4, 4, T>::mat +( + T const& a00, T const& a01, T const& a02, T const& a03, + T const& a10, T const& a11, T const& a12, T const& a13, + T const& a20, T const& a21, T const& a22, T const& a23, + T const& a30, T const& a31, T const& a32, T const& a33 +) + : value{std::move(row_type(a00, a01, a02, a03)), + std::move(row_type(a10, a11, a12, a13)), + std::move(row_type(a20, a21, a22, a23)), + std::move(row_type(a30, a31, a32, a33))} +{ } + + +template +inline constexpr typename mat<4, 4, T>::row_type & +mat<4, 4, T>::operator[](int i) +{ + return value[i]; +} + +template +inline constexpr typename mat<4, 4, T>::row_type const & +mat<4, 4, T>::operator[](int i) const +{ + return value[i]; +} + +template +inline constexpr mat<4, 4, T> operator+(mat<4, 4, T> const& m1, mat<4, 4, T> const& m2) +{ +#define c(i, j) ( m1[i][j] + m2[i][j] ) + + return mat<4, 4, T>(c(0,0), c(0,1), c(0,2), c(0,3), + c(1,0), c(1,1), c(1,2), c(1,3), + c(2,0), c(2,1), c(2,2), c(2,3), + c(3,0), c(3,1), c(3,2), c(3,3)); +#undef c +} + +template +inline constexpr mat<4, 4, T> operator*(mat<4, 4, T> const& m1, mat<4, 4, T> const& m2) +{ +#define c(i, j) ( \ + m1[i][0] * m2[0][j] \ + + m1[i][1] * m2[1][j] \ + + m1[i][2] * m2[2][j] \ + + m1[i][3] * m2[3][j] ) + + return mat<4, 4, T>(c(0,0), c(0,1), c(0,2), c(0,3), + c(1,0), c(1,1), c(1,2), c(1,3), + c(2,0), c(2,1), c(2,2), c(2,3), + c(3,0), c(3,1), c(3,2), c(3,3)); +#undef c +} + +template +inline constexpr mat<4, 4, T> operator*(mat<4, 4, T> const& m1, float s) +{ +#define c(i, j) ( m1[i][j] * s ) + + return mat<4, 4, T>(c(0,0), c(0,1), c(0,2), c(0,3), + c(1,0), c(1,1), c(1,2), c(1,3), + c(2,0), c(2,1), c(2,2), c(2,3), + c(3,0), c(3,1), c(3,2), c(3,3)); +#undef c +} + +template +inline constexpr typename mat<4, 4, T>::row_type operator* +( + mat<4, 4, T> const& m, + typename mat<4, 4, T>::col_type const& v +) +{ +#define c(i) ( \ + m[i][0] * v[0] \ + + m[i][1] * v[1] \ + + m[i][2] * v[2] \ + + m[i][3] * v[3] ) + + return typename mat<4, 4, T>::row_type(c(0), c(1), c(2), c(3)); +#undef c +} + +template +inline constexpr vec<3, T> operator* +( + mat<4, 4, T> const& m, + vec<3, T> const& v +) +{ +#define c(i) ( \ + m[i][0] * v[0] \ + + m[i][1] * v[1] \ + + m[i][2] * v[2] \ + + m[i][3] ) + + return vec<3, T>(c(0), c(1), c(2)); +#undef c +} + +template +inline constexpr mat<4, 4, T> transpose(mat<4, 4, T> const& m) +{ + return mat<4, 4, T>( + m[0][0], m[1][0], m[2][0], m[3][0], + m[0][1], m[1][1], m[2][1], m[3][1], + m[0][2], m[1][2], m[2][2], m[3][2], + m[0][3], m[1][3], m[2][3], m[3][3] + ); +} + +template +inline constexpr mat<3, 3, T> submatrix(mat<4, 4, T> const& a, int r, int c) +{ + mat<3, 3, T> b; + int row3 = 0; + for (int row4 = 0; row4 < 4; row4++) { + if (row4 == r) continue; + int col3 = 0; + for (int col4 = 0; col4 < 4; col4++) { + if (col4 == c) continue; + b[row3][col3] = a[row4][col4]; + col3++; + } + row3++; + } + return b; +} + +template +inline constexpr float minor(mat<4, 4, T> const& a, int r, int c) +{ + mat<3, 3, T> s = submatrix(a, r, c); + float ret = determinant(s); + return ret; +} + +template +inline constexpr float cofactor(mat<4, 4, T> const& a, int r, int c) +{ + float m = minor(a, r, c); + if ((r + c) & 1) + return -m; + else + return m; +} + +template +inline constexpr float determinant(mat<4, 4, T> const& a) +{ + float f0 = cofactor(a, 0, 0); + float f1 = cofactor(a, 0, 1); + float f2 = cofactor(a, 0, 2); + float f3 = cofactor(a, 0, 3); + return + a[0][0] * f0 + + a[0][1] * f1 + + a[0][2] * f2 + + a[0][3] * f3; +} + +template +inline constexpr mat<4, 4, T> inverse(mat<4, 4, T> const& a) +{ + mat<4, 4, T> m; + float idet = 1.0f / determinant(a); + m[0][0] = cofactor(a, 0, 0) * idet; + m[1][0] = cofactor(a, 0, 1) * idet; + m[2][0] = cofactor(a, 0, 2) * idet; + m[3][0] = cofactor(a, 0, 3) * idet; + m[0][1] = cofactor(a, 1, 0) * idet; + m[1][1] = cofactor(a, 1, 1) * idet; + m[2][1] = cofactor(a, 1, 2) * idet; + m[3][1] = cofactor(a, 1, 3) * idet; + m[0][2] = cofactor(a, 2, 0) * idet; + m[1][2] = cofactor(a, 2, 1) * idet; + m[2][2] = cofactor(a, 2, 2) * idet; + m[3][2] = cofactor(a, 2, 3) * idet; + m[0][3] = cofactor(a, 3, 0) * idet; + m[1][3] = cofactor(a, 3, 1) * idet; + m[2][3] = cofactor(a, 3, 2) * idet; + m[3][3] = cofactor(a, 3, 3) * idet; + return m; +} + +template +inline constexpr mat<4, 4, T> identity() +{ + return mat<4, 4, T>(1.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 1.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 1.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f); +} diff --git a/drm/math/transform.hpp b/drm/math/transform.hpp new file mode 100644 index 0000000..f36cba0 --- /dev/null +++ b/drm/math/transform.hpp @@ -0,0 +1,167 @@ +#pragma once + +#include "vec.hpp" +#include "mat.hpp" + +template +inline constexpr mat<4, 4, T> translate(vec<3, T> t) +{ + return { + 1, 0, 0, t.x, + 0, 1, 0, t.y, + 0, 0, 1, t.z, + 0, 0, 0, 1 + }; +} + +template +inline constexpr mat<4, 4, T> scale(vec<3, T> s) +{ + return { + s.x, 0, 0, 0, + 0, s.y, 0, 0, + 0, 0, s.z, 0, + 0, 0, 0, 1 + }; +} + +template +inline constexpr mat<4, 4, T> scale(T s) +{ + return { + s, 0, 0, 0, + 0, s, 0, 0, + 0, 0, s, 0, + 0, 0, 0, 1 + }; +} + +template +inline constexpr mat<4, 4, T> rotate_x(T t) +{ + return { + 1, 0, 0, 0, + 0, cos(t), -sin(t), 0, + 0, sin(t), cos(t), 0, + 0, 0, 0, 1 + }; +} + +template +inline constexpr mat<4, 4, T> rotate_y(T t) +{ + return { + cos(t), 0, sin(t), 0, + 0, 1, 0, 0, + -sin(t), 0, cos(t), 0, + 0, 0, 0, 1 + }; +} + +template +inline constexpr mat<4, 4, T> rotate_z(T t) +{ + return { + cos(t), -sin(t), 0, 0, + sin(t), cos(t), 0, 0, + 0, 0, 1, 0, + 0, 0, 0, 1 + }; +} + +template +inline constexpr mat<4, 4, T> rotate_axis_angle(vec<3, T> u, T t) +{ + T st = sin(t); + T ct = cos(t); + T oct = 1.0 - ct; + + T xx = u.x * u.x; + T xy = u.x * u.y; + T xz = u.x * u.z; + T yy = u.y * u.y; + T yz = u.y * u.z; + T zz = u.z * u.z; + + return { + xx * oct + ct, xy * oct - u.z * st, xz * oct + u.y * st, 0, + xy * oct + u.z * st, yy * oct + ct, yz * oct - u.x * st, 0, + xz * oct - u.y * st, yz * oct + u.x * st, zz * oct + ct, 0, + 0, 0, 0, 0 + }; +} + +template +inline constexpr mat<4, 4, T> rotate_axis_angle(vec<4, T> u) +{ + return rotate_axis_angle({u.x, u.y, u.z}, u.w); +} + +template +inline constexpr mat<4, 4, T> rotate_quaternion(vec<4, T> r) +{ + T xx2 = 2 * r.x * r.x; + T xy2 = 2 * r.x * r.y; + T xz2 = 2 * r.x * r.z; + T xw2 = 2 * r.x * r.w; + T yy2 = 2 * r.y * r.y; + T yz2 = 2 * r.y * r.z; + T yw2 = 2 * r.y * r.w; + T zz2 = 2 * r.z * r.z; + T zw2 = 2 * r.z * r.w; + + return { + 1 - yy2 - zz2, xy2 - zw2, xz2 + yw2, 0, + xy2 + zw2, 1 - xx2 - zz2, yz2 - xw2, 0, + xz2 - yw2, yz2 + xw2, 1 - xx2 - yy2, 0, + 0, 0, 0, 1, + }; +} + +template +inline constexpr mat<4, 4, T> look_at(vec<3, T> eye, vec<3, T> center, vec<3, T> up) +{ + vec<3, T> z = normalize(eye - center); + vec<3, T> y = up; + vec<3, T> x = cross(y, z); + y = cross(z, x); + x = normalize(x); + y = normalize(y); + + mat<4, 4, T> mat = { + x.x, x.y, x.z, -dot(x, eye), + y.x, y.y, y.z, -dot(y, eye), + z.x, z.y, z.z, -dot(z, eye), + 0, 0, 0, 1, + }; + + return mat; +} + +template +inline constexpr vec<3, T> normal_multiply(mat<4, 4, T> m, vec<3, T> n) +{ + vec<4, T> n4 = m * (vec<4, T>){n.x, n.y, n.z, 0.0}; + return {n4.x, n4.y, n4.z}; +} + +template +inline constexpr T inverse_length(vec<3, T> v) +{ + float f = dot(v, v); + return 1.0f / sqrt(f); +} + +template +inline constexpr T screen_transform(T x, T y) +{ + T x2 = x / 2.0; + T y2 = y / 2.0; + + return { + y2, 0, 0, x2, + 0, y2, 0, y2, + 0, 0, 1, 0, + 0, 0, 0, 1 + }; +} diff --git a/drm/math/vec.hpp b/drm/math/vec.hpp new file mode 100644 index 0000000..83699a6 --- /dev/null +++ b/drm/math/vec.hpp @@ -0,0 +1,29 @@ +#pragma once + +template +struct vec; + +template +inline constexpr T magnitude(vec const& v) +{ + return sqrt(dot(v, v)); +} + +template +inline constexpr T magnitude_squared(vec const& v) +{ + return dot(v, v); +} + +template +inline constexpr vec normalize(vec const& v) +{ + T d = 1.0f / magnitude(v); + return v * d; +} + +template +inline constexpr vec<3, T> reflect(vec const& i, vec const& n) +{ + return i - dot(n, i) * n * static_cast(2.0); +} diff --git a/drm/math/vec2.hpp b/drm/math/vec2.hpp new file mode 100644 index 0000000..b617964 --- /dev/null +++ b/drm/math/vec2.hpp @@ -0,0 +1,154 @@ +#pragma once + +#include "vec.hpp" + +// +// vec3 +// + +template +struct vec<2, T> +{ + union { + T val[2]; + struct { T x, y; }; + struct { T u, v; }; + }; + + inline constexpr vec(); + inline constexpr vec(T scalar); + inline constexpr vec(T _x, T _y); + + constexpr inline vec<2, T> operator-() const; + inline constexpr T & operator[](int i); + inline constexpr T const& operator[](int i) const; + inline constexpr vec<2, T>& operator=(vec<2, T> const& v); + inline constexpr vec<2, T>& operator+=(vec<2, T> const& v); + inline constexpr vec<2, T>& operator-=(vec<2, T> const& v); +}; + +template +inline constexpr vec<2, T>::vec() + : x(0), y(0) +{} + +template +inline constexpr vec<2, T>::vec(T scalar) + : x(scalar), y(scalar) +{} + +template +inline constexpr vec<2, T>::vec(T _x, T _y) + : x(_x), y(_y) +{} + +template +constexpr inline vec<2, T> vec<2, T>::operator-() const +{ + return vec<2, T>(-x, -y); +} + +template +inline constexpr T & vec<2, T>::operator[](int i) +{ + return val[i]; +} + +template +inline constexpr T const& vec<2, T>::operator[](int i) const +{ + return val[i]; +} + +template +inline constexpr vec<2, T>& vec<2, T>::operator=(vec<2, T> const& v) +{ + this->x = static_cast(v.x); + this->y = static_cast(v.y); + return *this; +} + +template +inline constexpr vec<2, T>& vec<2, T>::operator+=(vec<2, T> const& v) +{ + *this = *this + vec<2, T>(v); + return *this; +} + +template +inline constexpr vec<2, T>& vec<2, T>::operator-=(vec<2, T> const& v) +{ + *this = *this - vec<2, T>(v); + return *this; +} + +template +inline constexpr vec<2, T> operator+(vec<2, T> const& v1, vec<2, T> const& v2) +{ + return vec<2, T>(v1.x + v2.x, + v1.y + v2.y); +} + +template +inline constexpr vec<2, T> operator-(vec<2, T> const& v1, vec<2, T> const& v2) +{ + return vec<2, T>(v1.x - v2.x, + v1.y - v2.y); +} + +template +inline constexpr vec<2, T> operator*(vec<2, T> const& v1, vec<2, T> const& v2) +{ + return vec<2, T>(v1.x * v2.x, + v1.y * v2.y); +} + +template +inline constexpr vec<2, T> operator*(vec<2, T> const& v1, T const& scalar) +{ + return v1 * vec<2, T>(scalar); +} + +template +inline constexpr vec<2, T> operator*(T const& scalar, vec<2, T> const& v1) +{ + return vec<2, T>(scalar) * v1; +} + +template +inline constexpr vec<2, T> operator/(vec<2, T> const& v1, vec<2, T> const& v2) +{ + return vec<2, T>(v1.x / v2.x, + v1.y / v2.y); +} + +template +inline constexpr vec<2, T> operator/(vec<2, T> const& v1, T const& scalar) +{ + return v1 / vec<2, T>(scalar); +} + +template +inline constexpr T dot(vec<2, T> const& v1, vec<2, T> const& v2) +{ + vec<2, T> tmp(v1 * v2); + return tmp.x + tmp.y; +} + +template +inline constexpr T cross(vec<2, T> const& v1, vec<2, T> const& v2) +{ + return v1.x * v2.y - v2.x * v1.y; +} + +template +inline constexpr vec<2, T> functor1(T (&func) (T const& x), vec<2, T> const& v) +{ + return vec<2, T>(func(v.x), func(v.y)); +} + +template +inline constexpr vec<2, U> functor1(U (&func) (T const& x), vec<2, T> const& v) +{ + return vec<2, U>(func(v.x), func(v.y)); +} diff --git a/drm/math/vec3.hpp b/drm/math/vec3.hpp new file mode 100644 index 0000000..38022de --- /dev/null +++ b/drm/math/vec3.hpp @@ -0,0 +1,171 @@ +#pragma once + +#include "vec.hpp" + +// +// vec3 +// + +template +struct vec<3, T> +{ + union { + T val[3]; + struct { T x, y, z; }; + struct { T r, g, b; }; + }; + + inline constexpr vec(); + inline constexpr vec(T scalar); + inline constexpr vec(T _x, T _y, T _z); + + constexpr inline vec<3, T> operator-() const; + inline constexpr T & operator[](int i); + inline constexpr T const& operator[](int i) const; + inline constexpr vec<3, T>& operator=(vec<3, T> const& v); + inline constexpr vec<3, T>& operator+=(vec<3, T> const& v); + inline constexpr vec<3, T>& operator-=(vec<3, T> const& v); + inline constexpr vec<3, T>& operator*=(T const& scalar); +}; + +template +inline constexpr vec<3, T>::vec() + : x(0), y(0), z(0) +{} + +template +inline constexpr vec<3, T>::vec(T scalar) + : x(scalar), y(scalar), z(scalar) +{} + +template +inline constexpr vec<3, T>::vec(T _x, T _y, T _z) + : x(_x), y(_y), z(_z) +{} + +template +constexpr inline vec<3, T> vec<3, T>::operator-() const +{ + return vec<3, T>(-x, -y, -z); +} + +template +inline constexpr T & vec<3, T>::operator[](int i) +{ + return val[i]; +} + +template +inline constexpr T const& vec<3, T>::operator[](int i) const +{ + return val[i]; +} + +template +inline constexpr vec<3, T>& vec<3, T>::operator=(vec<3, T> const& v) +{ + this->x = static_cast(v.x); + this->y = static_cast(v.y); + this->z = static_cast(v.z); + return *this; +} + +template +inline constexpr vec<3, T>& vec<3, T>::operator+=(vec<3, T> const& v) +{ + *this = *this + vec<3, T>(v); + return *this; +} + +template +inline constexpr vec<3, T>& vec<3, T>::operator-=(vec<3, T> const& v) +{ + *this = *this - vec<3, T>(v); + return *this; +} + +template +inline constexpr vec<3, T>& vec<3, T>::operator*=(T const& scalar) +{ + *this = *this * scalar; + return *this; +} + +template +inline constexpr vec<3, T> operator+(vec<3, T> const& v1, vec<3, T> const& v2) +{ + return vec<3, T>(v1.x + v2.x, + v1.y + v2.y, + v1.z + v2.z); +} + +template +inline constexpr vec<3, T> operator-(vec<3, T> const& v1, vec<3, T> const& v2) +{ + return vec<3, T>(v1.x - v2.x, + v1.y - v2.y, + v1.z - v2.z); +} + +template +inline constexpr vec<3, T> operator*(vec<3, T> const& v1, vec<3, T> const& v2) +{ + return vec<3, T>(v1.x * v2.x, + v1.y * v2.y, + v1.z * v2.z); +} + +template +inline constexpr vec<3, T> operator*(vec<3, T> const& v1, T const& scalar) +{ + return v1 * vec<3, T>(scalar); +} + +template +inline constexpr vec<3, T> operator*(T const& scalar, vec<3, T> const& v1) +{ + return vec<3, T>(scalar) * v1; +} + +template +inline constexpr vec<3, T> operator/(vec<3, T> const& v1, vec<3, T> const& v2) +{ + return vec<3, T>(v1.x / v2.x, + v1.y / v2.y, + v1.z / v2.z); +} + +template +inline constexpr vec<3, T> operator/(vec<3, T> const& v1, T const& scalar) +{ + return v1 / vec<3, T>(scalar); +} + +template +inline constexpr T dot(vec<3, T> const& v1, vec<3, T> const& v2) +{ + return + v1.x * v2.x + + v1.y * v2.y + + v1.z * v2.z; +} + +template +inline constexpr vec<3, T> cross(vec<3, T> const& v1, vec<3, T> const& v2) +{ + return vec<3, T>(v1.y * v2.z - v2.y * v1.z, + v1.z * v2.x - v2.z * v1.x, + v1.x * v2.y - v2.x * v1.y); +} + +template +inline constexpr vec<3, T> functor1(T (&func) (T const& x), vec<3, T> const& v) +{ + return vec<3, T>(func(v.x), func(v.y), func(v.z)); +} + +template +inline constexpr vec<3, U> functor1(U (&func) (T const& x), vec<3, T> const& v) +{ + return vec<3, U>(func(v.x), func(v.y), func(v.z)); +} diff --git a/drm/math/vec4.hpp b/drm/math/vec4.hpp new file mode 100644 index 0000000..5f8bb52 --- /dev/null +++ b/drm/math/vec4.hpp @@ -0,0 +1,164 @@ +#pragma once + +#include "vec.hpp" + +// +// vec4 +// + +template +struct vec<4, T> +{ + union { + T val[4]; + struct { T x, y, z, w; }; + struct { T a, r, g, b; }; + }; + + inline constexpr vec(); + inline constexpr vec(T scalar); + inline constexpr vec(T _x, T _y, T _z, T _w); + inline constexpr vec(const vec<3, T>& v); + + constexpr inline vec<4, T> operator-() const; + inline constexpr T & operator[](int i); + inline constexpr T const& operator[](int i) const; + inline constexpr vec<4, T>& operator=(vec<4, T> const& v); + inline constexpr vec<4, T>& operator+=(vec<4, T> const& v); + inline constexpr vec<4, T>& operator-=(vec<4, T> const& v); +}; + +template +inline constexpr vec<4, T>::vec() + : x(0), y(0), z(0), w(0) +{} + +template +inline constexpr vec<4, T>::vec(T scalar) + : x(scalar), y(scalar), z(scalar), w(scalar) +{} + +template +inline constexpr vec<4, T>::vec(T _x, T _y, T _z, T _w) + : x(_x), y(_y), z(_z), w(_w) +{} + +template +inline constexpr vec<4, T>::vec(const vec<3, T>& v) + : x(v.x), y(v.y), z(v.z), w(1.f) +{} + +template +constexpr inline vec<4, T> vec<4, T>::operator-() const +{ + return vec<4, T>(-x, -y, -z, -w); +} + +template +inline constexpr T & vec<4, T>::operator[](int i) +{ + return val[i]; +} + +template +inline constexpr T const& vec<4, T>::operator[](int i) const +{ + return val[i]; +} + +template +inline constexpr vec<4, T>& vec<4, T>::operator=(vec<4, T> const& v) +{ + this->x = static_cast(v.x); + this->y = static_cast(v.y); + this->z = static_cast(v.z); + this->w = static_cast(v.w); + return *this; +} + +template +inline constexpr vec<4, T>& vec<4, T>::operator+=(vec<4, T> const& v) +{ + *this = *this + vec<4, T>(v); + return *this; +} + +template +inline constexpr vec<4, T>& vec<4, T>::operator-=(vec<4, T> const& v) +{ + *this = *this - vec<4, T>(v); + return *this; +} + +template +inline constexpr vec<4, T> operator+(vec<4, T> const& v1, vec<4, T> const& v2) +{ + return vec<4, T>(v1.x + v2.x, + v1.y + v2.y, + v1.z + v2.z, + v1.w + v2.w); +} + +template +inline constexpr vec<4, T> operator-(vec<4, T> const& v1, vec<4, T> const& v2) +{ + return vec<4, T>(v1.x - v2.x, + v1.y - v2.y, + v1.z - v2.z, + v1.w - v2.w); +} + +template +inline constexpr vec<4, T> operator*(vec<4, T> const& v1, vec<4, T> const& v2) +{ + return vec<4, T>(v1.x * v2.x, + v1.y * v2.y, + v1.z * v2.z, + v1.w * v2.w); +} + +template +inline constexpr vec<4, T> operator*(vec<4, T> const& v1, T const& scalar) +{ + return v1 * vec<4, T>(scalar); +} + +template +inline constexpr vec<4, T> operator*(T const& scalar, vec<4, T> const& v1) +{ + return vec<4, T>(scalar) * v1; +} + +template +inline constexpr vec<4, T> operator/(vec<4, T> const& v1, vec<4, T> const& v2) +{ + return vec<4, T>(v1.x / v2.x, + v1.y / v2.y, + v1.z / v2.z, + v1.w / v2.w); +} + +template +inline constexpr vec<4, T> operator/(vec<4, T> const& v1, T const& scalar) +{ + return v1 / vec<4, T>(scalar); +} + +template +inline constexpr T dot(vec<4, T> const& v1, vec<4, T> const& v2) +{ + vec<4, T> tmp(v1 * v2); + return tmp.x + tmp.y + tmp.z + tmp.w; +} + +template +inline constexpr vec<4, T> functor1(T (&func) (T const& x), vec<4, T> const& v) +{ + return vec<4, T>(func(v.x), func(v.y), func(v.z), func(v.w)); +} + +template +inline constexpr vec<4, U> functor1(U (&func) (T const& x), vec<4, T> const& v) +{ + return vec<4, U>(func(v.x), func(v.y), func(v.z), func(v.w)); +} diff --git a/drm/matrix.cpp b/drm/matrix.cpp new file mode 100644 index 0000000..d526177 --- /dev/null +++ b/drm/matrix.cpp @@ -0,0 +1,1186 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +#include "3d_registers.h" +#include "3d_registers_undocumented.h" +#include "3d_registers_bits.h" +#include "command_processor.h" + +#include "math/float_types.hpp" +#include "math/transform.hpp" + +#define PI (3.14159274101257324219f) +#define PI_2 (PI * 2.0f) +#define I_PI_2 (1.0f / (PI_2)) + +static inline uint32_t rreg(void * rmmio, uint32_t offset) +{ + uint32_t value = *((volatile uint32_t *)(((uintptr_t)rmmio) + offset)); + asm volatile ("" ::: "memory"); + return value; +} + +static inline void wreg(void * rmmio, uint32_t offset, uint32_t value) +{ + *((volatile uint32_t *)(((uintptr_t)rmmio) + offset)) = value; + asm volatile ("" ::: "memory"); +} + +static void * read_file(const char * filename) +{ + int fd = open(filename, O_RDONLY); + if (fd == -1) { + fprintf(stderr, "open(%s): %s\n", filename, strerror(errno)); + return NULL; + } + + off_t size = lseek(fd, 0, SEEK_END); + if (size == (off_t)-1) { + fprintf(stderr, "lseek(%s, SEEK_END): %s\n", filename, strerror(errno)); + return NULL; + } + + off_t start = lseek(fd, 0, SEEK_SET); + if (start == (off_t)-1) { + fprintf(stderr, "lseek(%s, SEEK_SET): %s\n", filename, strerror(errno)); + return NULL; + } + + void * buf = malloc(size+1); + + ssize_t read_size = read(fd, buf, size); + if (read_size == -1) { + fprintf(stderr, "read(%s): %s\n", filename, strerror(errno)); + return NULL; + } + ((char*)buf)[read_size] = 0; + + close(fd); + + return buf; +} + +typedef struct { + int p; + int t; +} face; + +const vec3 position[] = { + { 1.0, 1.0, -1.0}, + { 1.0, -1.0, -1.0}, + { 1.0, 1.0, 1.0}, + { 1.0, -1.0, 1.0}, + {-1.0, 1.0, -1.0}, + {-1.0, -1.0, -1.0}, + {-1.0, 1.0, 1.0}, + {-1.0, -1.0, 1.0}, +}; + +const vec2 texture[] = { + {1.0, 0.0}, + {0.0, 1.0}, + {0.0, 0.0}, + {1.0, 1.0}, +}; + +const face faces[] = { + {5,1}, {3,2}, {1,3}, + {3,1}, {8,2}, {4,3}, + {7,1}, {6,2}, {8,3}, + {2,1}, {8,2}, {6,3}, + {1,1}, {4,2}, {2,3}, + {5,1}, {2,2}, {6,3}, + {5,1}, {7,4}, {3,2}, + {3,1}, {7,4}, {8,2}, + {7,1}, {5,4}, {6,2}, + {2,1}, {4,4}, {8,2}, + {1,1}, {3,4}, {4,2}, + {5,1}, {1,4}, {2,2}, +}; +static const int faces_length = (sizeof (faces)) / (sizeof (faces[0])); + +static const uint32_t fragment_shader[] = { +#include "texture_cube.fs.inc" +#include "clear.fs.inc" +}; +static const int fragment_shader_length = (sizeof (fragment_shader)) / (sizeof (fragment_shader[0])); +static const int fragment_shader_instructions = (fragment_shader_length / 6) - 1; + +static const uint32_t vertex_shader[] = { + #include "matrix.vs.inc" + #include "clear_nop.vs.inc" +}; +static const int vertex_shader_length = (sizeof (vertex_shader)) / (sizeof (vertex_shader[0])); +static const int vertex_shader_instructions = (vertex_shader_length / 4) - 1; + +union u32_f32 { + uint32_t u32; + float f32; +}; + +static union u32_f32 ib[16384]; + +int _3d_clear(int ix) +{ + ////////////////////////////////////////////////////////////////////////////// + // ZB + ////////////////////////////////////////////////////////////////////////////// + + T0V(ZB_CNTL + , ZB_CNTL__Z_ENABLE__ENABLED // 1 + | ZB_CNTL__ZWRITEENABLE__ENABLE // 1 + ); + T0V(ZB_ZSTENCILCNTL + , ZB_ZSTENCILCNTL__ZFUNC__ALWAYS // greater than + ); + + T0V(ZB_FORMAT + , ZB_FORMAT__DEPTHFORMAT(2) // 24-bit integer Z, 8 bit stencil + ); + + T0V(ZB_DEPTHOFFSET, 0); + T3(_NOP, 0); + ib[ix++].u32 = 1 * 4; // index into relocs array + + T0V(ZB_DEPTHPITCH + , ZB_DEPTHPITCH__DEPTHPITCH(1600 >> 2) + //| ZB_DEPTHPITCH__DEPTHMACROTILE(1) + //| ZB_DEPTHPITCH__DEPTHMICROTILE(1) + ); + T3(_NOP, 0); + ib[ix++].u32 = 1 * 4; // index into relocs array + + ////////////////////////////////////////////////////////////////////////////// + // RS + ////////////////////////////////////////////////////////////////////////////// + + T0V(RS_IP_0 + , RS_IP__TEX_PTR_S(0) + | RS_IP__TEX_PTR_T(0) + | RS_IP__TEX_PTR_R(0) + | RS_IP__TEX_PTR_Q(0) + | RS_IP__COL_PTR(0) + | RS_IP__COL_FMT(6) // Zero components (0,0,0,1) + | RS_IP__OFFSET_EN(0) + ); + T0V(RS_COUNT + , RS_COUNT__IT_COUNT(0) + | RS_COUNT__IC_COUNT(1) + | RS_COUNT__W_ADDR(0) + | RS_COUNT__HIRES_EN(1) + ); + T0V(RS_INST_COUNT, 0x00000000); + T0V(RS_INST_0, 0x00000000); + + ////////////////////////////////////////////////////////////////////////////// + // TX + ////////////////////////////////////////////////////////////////////////////// + + T0V(TX_ENABLE, 0x00000000); + + ////////////////////////////////////////////////////////////////////////////// + // VAP_PVS + ////////////////////////////////////////////////////////////////////////////// + + T0V(VAP_PVS_CODE_CNTL_0 + , VAP_PVS_CODE_CNTL_0__PVS_FIRST_INST(vertex_shader_instructions) + | VAP_PVS_CODE_CNTL_0__PVS_XYZW_VALID_INST(vertex_shader_instructions) + | VAP_PVS_CODE_CNTL_0__PVS_LAST_INST(vertex_shader_instructions) + ); + T0V(VAP_PVS_CODE_CNTL_1 + , VAP_PVS_CODE_CNTL_1__PVS_LAST_VTX_SRC_INST(vertex_shader_instructions) + ); + + ////////////////////////////////////////////////////////////////////////////// + // VAP + ////////////////////////////////////////////////////////////////////////////// + + T0V(VAP_CLIP_CNTL + , VAP_CLIP_CNTL__CLIP_DISABLE(1) + ); + + T0V(VAP_VTE_CNTL + , VAP_VTE_CNTL__VTX_XY_FMT(1) + | VAP_VTE_CNTL__VTX_Z_FMT(1) + ); + + T0V(VAP_CNTL_STATUS, VAP_CNTL_STATUS__PVS_BYPASS(0)); + + T0V(VAP_PROG_STREAM_CNTL_0 + , VAP_PROG_STREAM_CNTL__DATA_TYPE_0__FLOAT_2 + | VAP_PROG_STREAM_CNTL__SKIP_DWORDS_0(0) + | VAP_PROG_STREAM_CNTL__DST_VEC_LOC_0(0) + | VAP_PROG_STREAM_CNTL__LAST_VEC_0(1) + ); + T0V(VAP_PROG_STREAM_CNTL_EXT_0 + , VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_X_0__SELECT_X + | VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_Y_0__SELECT_Y + | VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_Z_0__SELECT_FP_ONE + | VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_W_0__SELECT_FP_ONE + | VAP_PROG_STREAM_CNTL_EXT__WRITE_ENA_0(0b1111) + ); + + T0V(VAP_VTX_SIZE + , VAP_VTX_SIZE__DWORDS_PER_VTX(2) + ); + + T0V(VAP_INDEX_OFFSET, 0x00000000); + + T0V(VAP_OUT_VTX_FMT_0 + , VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1)); + T0V(VAP_OUT_VTX_FMT_1 + , 0x0); + + ////////////////////////////////////////////////////////////////////////////// + // GA_US + ////////////////////////////////////////////////////////////////////////////// + + T0V(US_CODE_RANGE + , US_CODE_RANGE__CODE_ADDR(fragment_shader_instructions) + | US_CODE_RANGE__CODE_SIZE(0) + ); + T0V(US_CODE_OFFSET + , US_CODE_OFFSET__OFFSET_ADDR(fragment_shader_instructions) + ); + T0V(US_CODE_ADDR + , US_CODE_ADDR__START_ADDR(0) + | US_CODE_ADDR__END_ADDR(0) + ); + + ////////////////////////////////////////////////////////////////////////////// + // 3D_DRAW + ////////////////////////////////////////////////////////////////////////////// + + const float center[] = { + 800.0f, 600.0f, + }; + T3(_3D_DRAW_IMMD_2, (1 + 2) - 1); + ib[ix++].u32 + = VAP_VF_CNTL__PRIM_TYPE(1) // point list + | VAP_VF_CNTL__PRIM_WALK(3) + | VAP_VF_CNTL__INDEX_SIZE(0) + | VAP_VF_CNTL__VTX_REUSE_DIS(0) + | VAP_VF_CNTL__DUAL_INDEX_MODE(0) + | VAP_VF_CNTL__USE_ALT_NUM_VERTS(0) + | VAP_VF_CNTL__NUM_VERTICES(1) + ; + for (int i = 0; i < 2; i++) { + ib[ix++].f32 = center[i]; + } + + return ix; +} + +mat4x4 perspective(float low1, float high1, + float low2, float high2, + float low3, float high3) +{ + float scale2 = (high2 - low2) / (high1 - low1); + float scale3 = (high3 - low3) / (high1 - low1); + + mat4x4 m1 = mat4x4(1, 0, 0, 0, + 0, 1, 0, 0, + 0, 0, 1, -low1, + 0, 0, 0, 1 + ); + + mat4x4 m2 = mat4x4(1, 0, 0, 0, + 0, 1, 0, 0, + 0, 0, scale2, low2, + 0, 0, scale3, low3 + ); + + return m2 * m1; +} + +int _3d_cube(int ix, float theta) +{ + printf("faces length %d\n", faces_length); + + ////////////////////////////////////////////////////////////////////////////// + // ZB + ////////////////////////////////////////////////////////////////////////////// + + T0V(ZB_CNTL + , ZB_CNTL__Z_ENABLE__ENABLED // 1 + | ZB_CNTL__ZWRITEENABLE__ENABLE // 1 + ); + T0V(ZB_ZSTENCILCNTL + , ZB_ZSTENCILCNTL__ZFUNC(1) // less than + ); + + T0V(ZB_FORMAT + , ZB_FORMAT__DEPTHFORMAT(2) // 24-bit integer Z, 8 bit stencil + ); + + T0V(ZB_DEPTHOFFSET, 0); + T3(_NOP, 0); + ib[ix++].u32 = 1 * 4; // index into relocs array + + T0V(ZB_DEPTHPITCH + , ZB_DEPTHPITCH__DEPTHPITCH(1600 >> 2) + //| ZB_DEPTHPITCH__DEPTHMACROTILE(1) + //| ZB_DEPTHPITCH__DEPTHMICROTILE(1) + ); + T3(_NOP, 0); + ib[ix++].u32 = 1 * 4; // index into relocs array + + ////////////////////////////////////////////////////////////////////////////// + // RS + ////////////////////////////////////////////////////////////////////////////// + + T0V(RS_IP_0 + , RS_IP__TEX_PTR_S(0) + | RS_IP__TEX_PTR_T(1) + | RS_IP__TEX_PTR_R(2) + | RS_IP__TEX_PTR_Q(3) + | RS_IP__COL_PTR(0) + | RS_IP__COL_FMT(0) + | RS_IP__OFFSET_EN(0) + ); + T0V(RS_COUNT + , RS_COUNT__IT_COUNT(4) + | RS_COUNT__IC_COUNT(0) + | RS_COUNT__W_ADDR(0) + | RS_COUNT__HIRES_EN(1) + ); + T0V(RS_INST_COUNT, 0x00000000); + T0V(RS_INST_0 + , RS_INST__TEX_ID(0) + | RS_INST__TEX_CN(1) + | RS_INST__TEX_ADDR(0) + ); + + ////////////////////////////////////////////////////////////////////////////// + // TX + ////////////////////////////////////////////////////////////////////////////// + + T0V(TX_INVALTAGS, 0x00000000); + + T0V(TX_ENABLE + , TX_ENABLE__TEX_0_ENABLE__ENABLE); + T0V(TX_FILTER0_0 + , TX_FILTER0__MAG_FILTER__LINEAR + | TX_FILTER0__MIN_FILTER__LINEAR + ); + T0V(TX_FILTER1_0 + , TX_FILTER1__LOD_BIAS(1) + ); + T0V(TX_BORDER_COLOR_0, 0); + T0V(TX_FORMAT0_0 + , TX_FORMAT0__TXWIDTH(1024 - 1) + | TX_FORMAT0__TXHEIGHT(1024 - 1) + ); + + T0V(TX_FORMAT1_0 + , TX_FORMAT1__TXFORMAT__TX_FMT_8_8_8_8 + | TX_FORMAT1__SEL_ALPHA(5) + | TX_FORMAT1__SEL_RED(0) + | TX_FORMAT1__SEL_GREEN(1) + | TX_FORMAT1__SEL_BLUE(2) + | TX_FORMAT1__TEX_COORD_TYPE__2D + ); + T0V(TX_FORMAT2_0, 0); + + T0V(TX_OFFSET_0 + //, TX_OFFSET__MACRO_TILE(1) + //| TX_OFFSET__MICRO_TILE(1) + , 0 + ); + + T3(_NOP, 0); + ib[ix++].u32 = 2 * 4; // index into relocs array + + ////////////////////////////////////////////////////////////////////////////// + // VAP_PVS + ////////////////////////////////////////////////////////////////////////////// + + float theta1 = theta; + //float theta2 = 3.14f * theta; + float theta2 = theta; + + mat4x4 p = perspective(1.0f, 3.0f, + 0.001f, 0.999f, + 0.5f, 2.0f); + + mat4x4 t = translate(vec3(0, 0, 2)); + mat4x4 rx = rotate_x(theta1); + mat4x4 ry = rotate_y(theta2 * 2); + mat4x4 s = scale(0.5f); + + mat4x4 trans = p * t * rx * ry * s; + + const float consts[] = { + trans[0][0], trans[0][1], trans[0][2], trans[0][3], + trans[1][0], trans[1][1], trans[1][2], trans[1][3], + trans[2][0], trans[2][1], trans[2][2], trans[2][3], + trans[3][0], trans[3][1], trans[3][2], trans[3][3], + }; + const int consts_length = (sizeof (consts)) / (sizeof (consts[0])); + assert(consts_length % 4 == 0); + + T0V(VAP_PVS_CONST_CNTL + , VAP_PVS_CONST_CNTL__PVS_CONST_BASE_OFFSET(0) + | VAP_PVS_CONST_CNTL__PVS_MAX_CONST_ADDR((consts_length / 4) - 1) + ); + + T0V(VAP_PVS_VECTOR_INDX_REG + , VAP_PVS_VECTOR_INDX_REG__OCTWORD_OFFSET(1024) + ); + + T0_ONE_REG(VAP_PVS_VECTOR_DATA_REG_128, (consts_length - 1)); + for (int i = 0; i < consts_length; i++) + ib[ix++].f32 = consts[i]; + + T0V(VAP_PVS_CODE_CNTL_0 + , VAP_PVS_CODE_CNTL_0__PVS_FIRST_INST(0) + | VAP_PVS_CODE_CNTL_0__PVS_XYZW_VALID_INST((vertex_shader_instructions - 1)) + | VAP_PVS_CODE_CNTL_0__PVS_LAST_INST((vertex_shader_instructions - 1)) + ); + T0V(VAP_PVS_CODE_CNTL_1 + , VAP_PVS_CODE_CNTL_1__PVS_LAST_VTX_SRC_INST((vertex_shader_instructions - 1)) + ); + + ////////////////////////////////////////////////////////////////////////////// + // VAP + ////////////////////////////////////////////////////////////////////////////// + + T0V(VAP_CLIP_CNTL + , VAP_CLIP_CNTL__PS_UCP_MODE(3) + ); + + T0V(VAP_VTE_CNTL + , VAP_VTE_CNTL__VPORT_X_SCALE_ENA(1) + | VAP_VTE_CNTL__VPORT_X_OFFSET_ENA(1) + | VAP_VTE_CNTL__VPORT_Y_SCALE_ENA(1) + | VAP_VTE_CNTL__VPORT_Y_OFFSET_ENA(1) + | VAP_VTE_CNTL__VPORT_Z_SCALE_ENA(0) + | VAP_VTE_CNTL__VPORT_Z_OFFSET_ENA(0) + | VAP_VTE_CNTL__VTX_XY_FMT(0) + | VAP_VTE_CNTL__VTX_Z_FMT(1) + | VAP_VTE_CNTL__VTX_W0_FMT(1) + | VAP_VTE_CNTL__SERIAL_PROC_ENA(0) + ); + + T0V(VAP_CNTL_STATUS, 0); + + T0V(VAP_PROG_STREAM_CNTL_0 + , VAP_PROG_STREAM_CNTL__DATA_TYPE_0__FLOAT_3 + | VAP_PROG_STREAM_CNTL__SKIP_DWORDS_0(0) + | VAP_PROG_STREAM_CNTL__DST_VEC_LOC_0(0) + | VAP_PROG_STREAM_CNTL__LAST_VEC_0(0) + | VAP_PROG_STREAM_CNTL__DATA_TYPE_1__FLOAT_2 + | VAP_PROG_STREAM_CNTL__SKIP_DWORDS_1(0) + | VAP_PROG_STREAM_CNTL__DST_VEC_LOC_1(1) + | VAP_PROG_STREAM_CNTL__LAST_VEC_1(1) + ); + T0V(VAP_PROG_STREAM_CNTL_EXT_0 + , VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_X_0__SELECT_X + | VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_Y_0__SELECT_Y + | VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_Z_0__SELECT_Z + | VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_W_0__SELECT_FP_ONE + | VAP_PROG_STREAM_CNTL_EXT__WRITE_ENA_0(0b1111) // XYZW + | VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_X_1__SELECT_X + | VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_Y_1__SELECT_Y + | VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_Z_1__SELECT_FP_ZERO + | VAP_PROG_STREAM_CNTL_EXT__SWIZZLE_SELECT_W_1__SELECT_FP_ONE + | VAP_PROG_STREAM_CNTL_EXT__WRITE_ENA_1(0b1111) // XYZW + ); + + T0V(VAP_VTX_SIZE + , VAP_VTX_SIZE__DWORDS_PER_VTX(5) + ); + + T0V(VAP_INDEX_OFFSET, 0x00000000); + + T0V(VAP_VF_MAX_VTX_INDX + , VAP_VF_MAX_VTX_INDX__MAX_INDX(faces_length - 1) + ); + T0V(VAP_VF_MIN_VTX_INDX + , VAP_VF_MIN_VTX_INDX__MIN_INDX(0) + ); + + T0V(VAP_OUT_VTX_FMT_0 + , VAP_OUT_VTX_FMT_0__VTX_POS_PRESENT(1)); + T0V(VAP_OUT_VTX_FMT_1 + , VAP_OUT_VTX_FMT_1__TEX_0_COMP_CNT(4)); + + ////////////////////////////////////////////////////////////////////////////// + // GA_US + ////////////////////////////////////////////////////////////////////////////// + + T0V(US_CODE_RANGE + , US_CODE_RANGE__CODE_ADDR(0) + | US_CODE_RANGE__CODE_SIZE(fragment_shader_instructions - 1) + ); + T0V(US_CODE_OFFSET + , US_CODE_OFFSET__OFFSET_ADDR(0) + ); + T0V(US_CODE_ADDR + , US_CODE_ADDR__START_ADDR(0) + | US_CODE_ADDR__END_ADDR(fragment_shader_instructions - 1) + ); + + ////////////////////////////////////////////////////////////////////////////// + // 3D_DRAW + ////////////////////////////////////////////////////////////////////////////// + + T3(_3D_DRAW_IMMD_2, (1 + faces_length * 5) - 1); + ib[ix++].u32 + = VAP_VF_CNTL__PRIM_TYPE(4) + | VAP_VF_CNTL__PRIM_WALK(3) + | VAP_VF_CNTL__INDEX_SIZE(0) + | VAP_VF_CNTL__VTX_REUSE_DIS(0) + | VAP_VF_CNTL__DUAL_INDEX_MODE(0) + | VAP_VF_CNTL__USE_ALT_NUM_VERTS(0) + | VAP_VF_CNTL__NUM_VERTICES(faces_length) + ; + for (int i = 0; i < faces_length; i++) { + vec3 p = position[faces[i].p - 1]; + vec2 t = texture[faces[i].t - 1]; + + ib[ix++].f32 = p.x; + ib[ix++].f32 = p.y; + ib[ix++].f32 = p.z; + ib[ix++].f32 = t.x; + ib[ix++].f32 = t.y; + } + + return ix; +} + +int indirect_buffer(float theta) +{ + int ix = 0; + + T0V(RB3D_DSTCACHE_CTLSTAT + , RB3D_DSTCACHE_CTLSTAT__DC_FLUSH(0x2) // Flush dirty 3D data + | RB3D_DSTCACHE_CTLSTAT__DC_FREE(0x2) // Free 3D tags + ); + + T0V(ZB_ZCACHE_CTLSTAT + , ZB_ZCACHE_CTLSTAT__ZC_FLUSH(1) + | ZB_ZCACHE_CTLSTAT__ZC_FREE(1) + ); + + T0V(WAIT_UNTIL, 0x00020000); + + T0V(GB_AA_CONFIG, 0x00000000); + + T0V(RB3D_AARESOLVE_CTL, 0x00000000); + + T0V(RB3D_CCTL + , RB3D_CCTL__INDEPENDENT_COLORFORMAT_ENABLE(1) + ); + + T0V(ZB_BW_CNTL, 0x00000000); + T0V(ZB_DEPTHCLEARVALUE, 0x00000000); + T0V(SC_HYPERZ_EN, 0x00000000); + T0V(GB_Z_PEQ_CONFIG, 0x00000000); + T0V(ZB_ZTOP + , ZB_ZTOP__ZTOP(1) + ); + T0V(FG_ALPHA_FUNC, 0x00000000); + T0V(ZB_STENCILREFMASK, 0x00000000); + T0V(ZB_STENCILREFMASK_BF, 0x00000000); + + T0V(FG_ALPHA_VALUE, 0x00000000); + T0V(RB3D_ROPCNTL, 0x00000000); + T0V(RB3D_BLENDCNTL, 0x00000000); + T0V(RB3D_ABLENDCNTL, 0x00000000); + T0V(RB3D_COLOR_CHANNEL_MASK + , RB3D_COLOR_CHANNEL_MASK__BLUE_MASK(1) + | RB3D_COLOR_CHANNEL_MASK__GREEN_MASK(1) + | RB3D_COLOR_CHANNEL_MASK__RED_MASK(1) + | RB3D_COLOR_CHANNEL_MASK__ALPHA_MASK(1) + ); + T0V(RB3D_DITHER_CTL, 0x00000000); + T0V(RB3D_CONSTANT_COLOR_AR, 0x00000000); + T0V(RB3D_CONSTANT_COLOR_GB, 0x00000000); + + T0V(SC_CLIP_0_A, 0x00000000); + T0V(SC_CLIP_0_B, 0xffffffff); + T0V(SC_SCREENDOOR, 0x00ffffff); + + T0V(GB_SELECT, 0x00000000); + T0V(FG_FOG_BLEND, 0x00000000); + T0V(GA_OFFSET, 0x00000000); + T0V(SU_TEX_WRAP, 0x00000000); + T0Vf(SU_DEPTH_SCALE, 16777215.0f); + T0V(SU_DEPTH_OFFSET, 0x00000000); + T0V(SC_EDGERULE + , SC_EDGERULE__ER_TRI(5) // L-in,R-out,HT-in,HB-in + | SC_EDGERULE__ER_POINT(9) // L-out,R-in,HT-in,HB-out + | SC_EDGERULE__ER_LINE_LR(5) // L-in,R-out,HT-in,HB-out + | SC_EDGERULE__ER_LINE_RL(9) // L-out,R-in,HT-in,HB-out + | SC_EDGERULE__ER_LINE_TB(26) // T-in,B-out,VL-out,VR-in + | SC_EDGERULE__ER_LINE_BT(22) // T-out,B-in,VL-out,VR-in + ); + T0V(RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD + , RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD__BLUE(1) + | RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD__GREEN(1) + | RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD__RED(1) + | RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD__ALPHA(1) + ); + T0V(RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD + , RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD__BLUE(254) + | RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD__GREEN(254) + | RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD__RED(254) + | RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD__ALPHA(254) + ); + T0V(GA_COLOR_CONTROL_PS3, 0x00000000); + T0V(SU_TEX_WRAP_PS3, 0x00000000); + T0V(VAP_PVS_STATE_FLUSH_REG, 0x00000000); + T0V(VAP_PVS_VTX_TIMEOUT_REG + , VAP_PVS_VTX_TIMEOUT_REG__CLK_COUNT(0xffff) + ); + T0Vf(VAP_GB_VERT_CLIP_ADJ, 1.0f); + T0Vf(VAP_GB_VERT_DISC_ADJ, 1.0f); + T0Vf(VAP_GB_HORZ_CLIP_ADJ, 1.0f); + T0Vf(VAP_GB_HORZ_DISC_ADJ, 1.0f); + T0V(VAP_PSC_SGN_NORM_CNTL + , VAP_PSC_SGN_NORM_CNTL__SGN_NORM_METHOD_0(2) + | VAP_PSC_SGN_NORM_CNTL__SGN_NORM_METHOD_1(2) + | VAP_PSC_SGN_NORM_CNTL__SGN_NORM_METHOD_2(2) + | VAP_PSC_SGN_NORM_CNTL__SGN_NORM_METHOD_3(2) + | VAP_PSC_SGN_NORM_CNTL__SGN_NORM_METHOD_4(2) + | VAP_PSC_SGN_NORM_CNTL__SGN_NORM_METHOD_5(2) + | VAP_PSC_SGN_NORM_CNTL__SGN_NORM_METHOD_6(2) + | VAP_PSC_SGN_NORM_CNTL__SGN_NORM_METHOD_7(2) + | VAP_PSC_SGN_NORM_CNTL__SGN_NORM_METHOD_8(2) + | VAP_PSC_SGN_NORM_CNTL__SGN_NORM_METHOD_9(2) + | VAP_PSC_SGN_NORM_CNTL__SGN_NORM_METHOD_10(2) + | VAP_PSC_SGN_NORM_CNTL__SGN_NORM_METHOD_11(2) + | VAP_PSC_SGN_NORM_CNTL__SGN_NORM_METHOD_12(2) + | VAP_PSC_SGN_NORM_CNTL__SGN_NORM_METHOD_13(2) + | VAP_PSC_SGN_NORM_CNTL__SGN_NORM_METHOD_14(2) + | VAP_PSC_SGN_NORM_CNTL__SGN_NORM_METHOD_15(2) + ); + T0V(VAP_TEX_TO_COLOR_CNTL, 0x00000000); + + T0V(VAP_CNTL + , VAP_CNTL__PVS_NUM_SLOTS(10) + | VAP_CNTL__PVS_NUM_CNTLRS(5) + | VAP_CNTL__PVS_NUM_FPUS(5) + | VAP_CNTL__VAP_NO_RENDER(0) + | VAP_CNTL__VF_MAX_VTX_NUM(12) + | VAP_CNTL__DX_CLIP_SPACE_DEF(0) + | VAP_CNTL__TCL_STATE_OPTIMIZATION(1) + ); + T0V(VAP_PVS_FLOW_CNTL_OPC, 0x00000000); + + T0(VAP_PVS_FLOW_CNTL_ADDRS_LW_0, 31); + for (int i = 0; i < 32; i++) + ib[ix++].u32 = 0x00000000; + + T0(VAP_PVS_FLOW_CNTL_LOOP_INDEX_0, 15); + for (int i = 0; i < 16; i++) + ib[ix++].u32 = 0x00000000; + + T0V(VAP_PVS_VECTOR_INDX_REG + , VAP_PVS_VECTOR_INDX_REG__OCTWORD_OFFSET(1536)); + T0_ONE_REG(VAP_PVS_VECTOR_DATA_REG_128, 23); + for (int i = 0; i < 24; i++) + ib[ix++].u32 = 0x00000000; + + T0V(VAP_VTX_STATE_CNTL + , VAP_VTX_STATE_CNTL__COLOR_0_ASSEMBLY_CNTL(1) + | VAP_VTX_STATE_CNTL__COLOR_1_ASSEMBLY_CNTL(1) + | VAP_VTX_STATE_CNTL__COLOR_2_ASSEMBLY_CNTL(1) + | VAP_VTX_STATE_CNTL__COLOR_3_ASSEMBLY_CNTL(1) + | VAP_VTX_STATE_CNTL__COLOR_4_ASSEMBLY_CNTL(1) + | VAP_VTX_STATE_CNTL__COLOR_5_ASSEMBLY_CNTL(1) + | VAP_VTX_STATE_CNTL__COLOR_6_ASSEMBLY_CNTL(1) + | VAP_VTX_STATE_CNTL__COLOR_7_ASSEMBLY_CNTL(1) + | VAP_VTX_STATE_CNTL__UPDATE_USER_COLOR_0_ENA(0) + ); + + T0V(GB_ENABLE, 0x00000000); + T0V(GA_POINT_SIZE + , GA_POINT_SIZE__HEIGHT(7200) + | GA_POINT_SIZE__WIDTH(9600) + ); + T0V(GA_POINT_MINMAX + , GA_POINT_MINMAX__MIN_SIZE(60) + | GA_POINT_MINMAX__MAX_SIZE(60) + ); + T0V(GA_LINE_CNTL + , GA_LINE_CNTL__WIDTH(6) + | GA_LINE_CNTL__END_TYPE(2) + | GA_LINE_CNTL__SORT(0) + ); + T0V(SU_POLY_OFFSET_ENABLE, 0x00000000); + T0V(SU_CULL_MODE, 0x00000000); + T0V(GA_LINE_STIPPLE_CONFIG, 0x00000000); + T0V(GA_LINE_STIPPLE_VALUE, 0x00000000); + T0V(GA_POLY_MODE, 0x00000000); + T0V(GA_ROUND_MODE + , GA_ROUND_MODE__GEOMETRY_ROUND(1) + | GA_ROUND_MODE__COLOR_ROUND(0) + | GA_ROUND_MODE__RGB_CLAMP(1) + | GA_ROUND_MODE__ALPHA_CLAMP(1) + | GA_ROUND_MODE__GEOMETRY_MASK(0) + ); + T0V(SC_CLIP_RULE + , SC_CLIP_RULE__CLIP_RULE(0xffff)); + T0Vf(GA_POINT_S0, 0.0f); + T0Vf(GA_POINT_T0, 1.0f); + T0Vf(GA_POINT_S1, 1.0f); + T0Vf(GA_POINT_T1, 0.0f); + T0V(US_OUT_FMT_0 + , US_OUT_FMT__OUT_FMT(0) // C4_8 + | US_OUT_FMT__C0_SEL(3) // Blue + | US_OUT_FMT__C1_SEL(2) // Green + | US_OUT_FMT__C2_SEL(1) // Red + | US_OUT_FMT__C3_SEL(0) // Alpha + | US_OUT_FMT__OUT_SIGN(0) + ); + T0V(US_OUT_FMT_1 + , US_OUT_FMT__OUT_FMT(15) // render target is not used + ); + T0V(US_OUT_FMT_2 + , US_OUT_FMT__OUT_FMT(15) // render target is not used + ); + T0V(US_OUT_FMT_2 + , US_OUT_FMT__OUT_FMT(15) // render target is not used + ); + T0V(GB_MSPOS0 + , GB_MSPOS0__MS_X0(6) + | GB_MSPOS0__MS_Y0(6) + | GB_MSPOS0__MS_X1(6) + | GB_MSPOS0__MS_Y1(6) + | GB_MSPOS0__MS_X2(6) + | GB_MSPOS0__MS_Y2(6) + | GB_MSPOS0__MSBD0_Y(6) + | GB_MSPOS0__MSBD0_X(6) + ); + T0V(GB_MSPOS1 + , GB_MSPOS1__MS_X3(6) + | GB_MSPOS1__MS_Y3(6) + | GB_MSPOS1__MS_X4(6) + | GB_MSPOS1__MS_Y4(6) + | GB_MSPOS1__MS_X5(6) + | GB_MSPOS1__MS_Y5(6) + | GB_MSPOS1__MSBD1(6) + ); + T0V(US_CONFIG + , US_CONFIG__ZERO_TIMES_ANYTHING_EQUALS_ZERO(1) + ); + T0V(US_PIXSIZE + , US_PIXSIZE__PIX_SIZE(1) + ); + T0V(US_FC_CTRL, 0); + + T0V(FG_DEPTH_SRC, 0x00000000); + T0V(US_W_FMT, 0x00000000); + T0V(GA_COLOR_CONTROL + , GA_COLOR_CONTROL__RGB0_SHADING(2) + | GA_COLOR_CONTROL__ALPHA0_SHADING(2) + | GA_COLOR_CONTROL__RGB1_SHADING(2) + | GA_COLOR_CONTROL__ALPHA1_SHADING(2) + | GA_COLOR_CONTROL__RGB2_SHADING(2) + | GA_COLOR_CONTROL__ALPHA2_SHADING(2) + | GA_COLOR_CONTROL__RGB3_SHADING(2) + | GA_COLOR_CONTROL__ALPHA3_SHADING(2) + | GA_COLOR_CONTROL__PROVOKING_VERTEX(3) + ); + + ////////////////////////////////////////////////////////////////////////////// + // CB + ////////////////////////////////////////////////////////////////////////////// + + T0V(RB3D_COLOROFFSET0 + , 0x00000000 // value replaced by kernel from relocs + ); + T3(_NOP, 0); + ib[ix++].u32 = 0 * 4; // index into relocs array + + T0V(RB3D_COLORPITCH0 + , RB3D_COLORPITCH__COLORPITCH(1600 >> 1) + | RB3D_COLORPITCH__COLORFORMAT(6) // ARGB8888 + ); + // The COLORPITCH NOP is ignored/not applied due to + // RADEON_CS_KEEP_TILING_FLAGS, but is still required. + T3(_NOP, 0); + ib[ix++].u32 = 0 * 4; // index into relocs array + + ////////////////////////////////////////////////////////////////////////////// + // SC + ////////////////////////////////////////////////////////////////////////////// + + T0V(SC_SCISSOR0 + , SC_SCISSOR0__XS0(0) + | SC_SCISSOR0__YS0(0) + ); + T0V(SC_SCISSOR1 + , SC_SCISSOR1__XS1(1600 - 1) + | SC_SCISSOR1__YS1(1200 - 1) + ); + + ////////////////////////////////////////////////////////////////////////////// + // VAP + ////////////////////////////////////////////////////////////////////////////// + + T0Vf(VAP_VPORT_XSCALE, 600.0f); + T0Vf(VAP_VPORT_XOFFSET, 800.0f); + T0Vf(VAP_VPORT_YSCALE, -600.0f); + T0Vf(VAP_VPORT_YOFFSET, 600.0f); + T0Vf(VAP_VPORT_ZSCALE, 0.5f); + T0Vf(VAP_VPORT_ZOFFSET, 0.5f); + + T0V(VAP_VSM_VTX_ASSM + , 0x00000401); // undocumented + + ////////////////////////////////////////////////////////////////////////////// + // VAP_PVS + ////////////////////////////////////////////////////////////////////////////// + + printf("vs length %d\n", vertex_shader_length); + assert(vertex_shader_length % 4 == 0); + printf("vs instructions %d\n", vertex_shader_instructions); + + T0V(VAP_PVS_VECTOR_INDX_REG + , VAP_PVS_VECTOR_INDX_REG__OCTWORD_OFFSET(0) + ); + T0_ONE_REG(VAP_PVS_VECTOR_DATA_REG_128, vertex_shader_length - 1); + for (int i = 0; i < vertex_shader_length; i++) { + ib[ix++].u32 = vertex_shader[i]; + } + + ////////////////////////////////////////////////////////////////////////////// + // GA_US + ////////////////////////////////////////////////////////////////////////////// + + printf("fs length %d\n", fragment_shader_length); + assert(fragment_shader_length % 6 == 0); + printf("fs instructions %d\n", fragment_shader_instructions); + + T0V(GA_US_VECTOR_INDEX, 0x00000000); + T0_ONE_REG(GA_US_VECTOR_DATA, fragment_shader_length - 1); + for (int i = 0; i < fragment_shader_length; i++) { + ib[ix++].u32 = fragment_shader[i]; + } + + ////////////////////////////////////////////////////////////////////////////// + // DRAW + ////////////////////////////////////////////////////////////////////////////// + + ix = _3d_clear(ix); + ix = _3d_cube(ix, theta); + + ////////////////////////////////////////////////////////////////////////////// + // padding + ////////////////////////////////////////////////////////////////////////////// + + while ((ix % 8) != 0) { + ib[ix++].u32 = 0x80000000; + } + + return ix; +} + +int create_colorbuffer(int fd, int colorbuffer_size, void ** out_ptr) +{ + int ret; + + struct drm_radeon_gem_create args = { + .size = colorbuffer_size, + .alignment = 4096, + .handle = 0, + .initial_domain = 4, // RADEON_GEM_DOMAIN_VRAM + .flags = 4 + }; + + ret = drmCommandWriteRead(fd, DRM_RADEON_GEM_CREATE, &args, (sizeof (struct drm_radeon_gem_create))); + if (ret != 0) { + perror("drmCommandWriteRead(DRM_RADEON_GEM_CREATE)"); + } + assert(args.handle != 0); + + struct drm_radeon_gem_mmap mmap_args = { + .handle = args.handle, + .offset = 0, + .size = colorbuffer_size, + }; + ret = drmCommandWriteRead(fd, DRM_RADEON_GEM_MMAP, &mmap_args, (sizeof (struct drm_radeon_gem_mmap))); + if (ret != 0) { + perror("drmCommandWriteRead(DRM_RADEON_GEM_MMAP)"); + } + + void * ptr = mmap(0, + colorbuffer_size, + PROT_READ | PROT_WRITE, + MAP_SHARED, + fd, + mmap_args.addr_ptr); + assert(ptr != MAP_FAILED); + + // clear colorbuffer + for (int i = 0; i < colorbuffer_size / 4; i++) { + ((uint32_t*)ptr)[i] = 0x00000000; + } + asm volatile ("" ::: "memory"); + + if (out_ptr != NULL) { + *out_ptr = ptr; + } else { + munmap(ptr, colorbuffer_size); + } + + return args.handle; +} + +int main() +{ + ////////////////////////////////////////////////////////////////////////////// + // PCI resource0 + ////////////////////////////////////////////////////////////////////////////// + const char * resource2_path = "/sys/bus/pci/devices/0000:01:00.0/resource2"; + int resource2_fd = open(resource2_path, O_RDWR | O_SYNC); + assert(resource2_fd >= 0); + + uint32_t resource2_size = 0x10000; + void * resource2_base = mmap(0, resource2_size, PROT_READ | PROT_WRITE, MAP_SHARED, resource2_fd, 0); + assert(resource2_base != MAP_FAILED); + + void * rmmio = resource2_base; + + ////////////////////////////////////////////////////////////////////////////// + // DRI card0 + ////////////////////////////////////////////////////////////////////////////// + + int ret; + int fd = open("/dev/dri/card0", O_RDWR | O_CLOEXEC); + + const int colorbuffer_size = 1600 * 1200 * 4; + int colorbuffer_handle[2]; + int zbuffer_handle; + int texturebuffer_handle; + int flush_handle; + + void * colorbuffer_ptr[2]; + void * zbuffer_ptr; + + // colorbuffer + colorbuffer_handle[0] = create_colorbuffer(fd, colorbuffer_size, &colorbuffer_ptr[0]); + colorbuffer_handle[1] = create_colorbuffer(fd, colorbuffer_size, &colorbuffer_ptr[1]); + zbuffer_handle = create_colorbuffer(fd, colorbuffer_size, &zbuffer_ptr); + + fprintf(stderr, "colorbuffer handle[0] %d\n", colorbuffer_handle[0]); + fprintf(stderr, "colorbuffer handle[1] %d\n", colorbuffer_handle[1]); + fprintf(stderr, "zbuffer handle %d\n", zbuffer_handle); + + // texture + { + const int texture_size = 1024 * 1024 * 4; + + struct drm_radeon_gem_create args = { + .size = texture_size, + .alignment = 4096, + .handle = 0, + .initial_domain = 4, // RADEON_GEM_DOMAIN_VRAM + .flags = 4 + }; + + ret = drmCommandWriteRead(fd, DRM_RADEON_GEM_CREATE, &args, (sizeof (struct drm_radeon_gem_create))); + if (ret != 0) { + perror("drmCommandWriteRead(DRM_RADEON_GEM_CREATE)"); + } + assert(args.handle != 0); + + texturebuffer_handle = args.handle; + + struct drm_radeon_gem_mmap mmap_args = { + .handle = texturebuffer_handle, + .offset = 0, + .size = texture_size, + }; + ret = drmCommandWriteRead(fd, DRM_RADEON_GEM_MMAP, &mmap_args, (sizeof (struct drm_radeon_gem_mmap))); + if (ret != 0) { + perror("drmCommandWriteRead(DRM_RADEON_GEM_MMAP)"); + } + + void * texturebuffer_ptr = mmap(0, mmap_args.size, PROT_READ|PROT_WRITE, MAP_SHARED, + fd, mmap_args.addr_ptr); + assert(texturebuffer_ptr != MAP_FAILED); + + // copy texture + void * texture_buf = read_file("../texture/butterfly_1024x1024_argb8888.data"); + assert(texture_buf != NULL); + + for (int i = 0; i < texture_size / 4; i++) { + ((uint32_t*)texturebuffer_ptr)[i] = ((uint32_t*)texture_buf)[i]; + } + asm volatile ("" ::: "memory"); + free(texture_buf); + munmap(texturebuffer_ptr, texture_size); + } + + // flush + { + struct drm_radeon_gem_create args = { + .size = 4096, + .alignment = 4096, + .handle = 0, + .initial_domain = 2, // GTT + .flags = 0 + }; + + ret = drmCommandWriteRead(fd, DRM_RADEON_GEM_CREATE, + &args, (sizeof (args))); + if (ret != 0) { + perror("drmCommandWriteRead(DRM_RADEON_GEM_CREATE)"); + } + assert(args.handle != 0); + flush_handle = args.handle; + } + + uint32_t flags[2] = { + 5, // RADEON_CS_KEEP_TILING_FLAGS | RADEON_CS_END_OF_FRAME + 0, // RADEON_CS_RING_GFX + }; + + int colorbuffer_ix = 0; + float theta = 3.14 / 4.0; + int ib_dwords = indirect_buffer(theta); + + while (true) { + struct drm_radeon_cs_reloc relocs[] = { + { + .handle = colorbuffer_handle[colorbuffer_ix], + .read_domains = 4, // RADEON_GEM_DOMAIN_VRAM + .write_domain = 4, // RADEON_GEM_DOMAIN_VRAM + .flags = 8, + }, + { + .handle = zbuffer_handle, + .read_domains = 4, // RADEON_GEM_DOMAIN_VRAM + .write_domain = 4, // RADEON_GEM_DOMAIN_VRAM + .flags = 8, + }, + { + .handle = texturebuffer_handle, + .read_domains = 4, // RADEON_GEM_DOMAIN_VRAM + .write_domain = 4, // RADEON_GEM_DOMAIN_VRAM + .flags = 8, + }, + { + .handle = flush_handle, + .read_domains = 2, // RADEON_GEM_DOMAIN_GTT + .write_domain = 2, // RADEON_GEM_DOMAIN_GTT + .flags = 0, + } + }; + + struct drm_radeon_cs_chunk chunks[3] = { + { + .chunk_id = RADEON_CHUNK_ID_IB, + .length_dw = ib_dwords, + .chunk_data = (uint64_t)(uintptr_t)ib, + }, + { + .chunk_id = RADEON_CHUNK_ID_RELOCS, + .length_dw = (sizeof (relocs)) / (sizeof (uint32_t)), + .chunk_data = (uint64_t)(uintptr_t)relocs, + }, + { + .chunk_id = RADEON_CHUNK_ID_FLAGS, + .length_dw = (sizeof (flags)) / (sizeof (uint32_t)), + .chunk_data = (uint64_t)(uintptr_t)&flags, + }, + }; + + uint64_t chunks_array[3] = { + (uint64_t)(uintptr_t)&chunks[0], + (uint64_t)(uintptr_t)&chunks[1], + (uint64_t)(uintptr_t)&chunks[2], + }; + + struct drm_radeon_cs cs = { + .num_chunks = 3, + .cs_id = 0, + .chunks = (uint64_t)(uintptr_t)chunks_array, + .gart_limit = 0, + .vram_limit = 0, + }; + + ret = drmCommandWriteRead(fd, DRM_RADEON_CS, &cs, (sizeof (struct drm_radeon_cs))); + if (ret != 0) { + perror("drmCommandWriteRead(DRM_RADEON_CS)"); + } + + /* + struct drm_radeon_gem_wait_idle args = { + .handle = flush_handle + }; + while (drmCommandWrite(fd, DRM_RADEON_GEM_WAIT_IDLE, &args, (sizeof (struct drm_radeon_gem_wait_idle))) == -EBUSY); + */ +#define D1CRTC_DOUBLE_BUFFER_CONTROL 0x60ec +#define D1GRPH_PRIMARY_SURFACE_ADDRESS 0x6110 +#define D1GRPH_UPDATE 0x6144 +#define D1GRPH_UPDATE__D1GRPH_SURFACE_UPDATE_PENDING (1 << 2) + + uint32_t d1crtc_double_buffer_control = rreg(rmmio, D1CRTC_DOUBLE_BUFFER_CONTROL); + printf("D1CRTC_DOUBLE_BUFFER_CONTROL: %08x\n", d1crtc_double_buffer_control); + assert(d1crtc_double_buffer_control == (1 << 8)); + + // addresses were retrieved from /sys/kernel/debug/radeon_vram_mm + // + // This assumes GEM buffer allocation always starts from the lowest + // unallocated address. + const uint32_t colorbuffer_addresses[2] = { + 0x813000, + 0xf66000, + }; + + wreg(rmmio, D1GRPH_PRIMARY_SURFACE_ADDRESS, colorbuffer_addresses[colorbuffer_ix]); + while ((rreg(rmmio, D1GRPH_UPDATE) & D1GRPH_UPDATE__D1GRPH_SURFACE_UPDATE_PENDING) != 0); + + // next state + theta += 0.01f; + colorbuffer_ix = (colorbuffer_ix + 1) & 1; + + // next indirect buffer + ib_dwords = indirect_buffer(theta); + + } + + { + int out_fd = open("colorbuffer0.data", O_RDWR|O_CREAT, 0644); + assert(out_fd >= 0); + ssize_t write_length = write(out_fd, colorbuffer_ptr[0], colorbuffer_size); + assert(write_length == colorbuffer_size); + close(out_fd); + } + + { + int out_fd = open("colorbuffer1.data", O_RDWR|O_CREAT, 0644); + assert(out_fd >= 0); + ssize_t write_length = write(out_fd, colorbuffer_ptr[1], colorbuffer_size); + assert(write_length == colorbuffer_size); + close(out_fd); + } + + { + int out_fd = open("zbuffer.data", O_RDWR|O_CREAT, 0644); + assert(out_fd >= 0); + ssize_t write_length = write(out_fd, zbuffer_ptr, colorbuffer_size); + assert(write_length == colorbuffer_size); + close(out_fd); + } + + close(fd); +} diff --git a/drm/matrix.vs.asm b/drm/matrix.vs.asm new file mode 100644 index 0000000..77a94b5 --- /dev/null +++ b/drm/matrix.vs.asm @@ -0,0 +1,11 @@ +-- +-- dot(m[0], v), dot(m[1], v), dot(m[2], v), dot(m[3], v) +-- + +temp[1].x = VE_DOT const[0].xyzw input[0].xyzw ; +temp[1].y = VE_DOT const[1].xyzw input[0].xyzw ; +temp[1].z = VE_DOT const[2].xyzw input[0].xyzw ; +temp[1].w = VE_DOT const[3].xyzw input[0].xyzw ; + +out[0].xyzw = VE_MAX temp[1].xyzw temp[1].xyzw ; +out[1].xyzw = VE_MAX input[1].xyzw input[1].xyzw ; diff --git a/drm/matrix.vs.inc b/drm/matrix.vs.inc new file mode 100644 index 0000000..4a51013 --- /dev/null +++ b/drm/matrix.vs.inc @@ -0,0 +1,6 @@ +0x00102001, 0x00d10002, 0x00d10001, 0x01ffe001, +0x00202001, 0x00d10022, 0x00d10001, 0x01ffe001, +0x00402001, 0x00d10042, 0x00d10001, 0x01ffe001, +0x00802001, 0x00d10062, 0x00d10001, 0x01ffe001, +0x00f00207, 0x00d10020, 0x00d10020, 0x01ffe020, +0x00f02207, 0x00d10021, 0x00d10021, 0x01ffe021,