reorganize math and libgcc

I'd like to include bits of libgcc piecemeal--I don't want to "accidentally"
start depending on libgcc bits that I'm not aware of.

Reworked division so that it uses the on-chip division register.
This commit is contained in:
Zack Buhman 2023-01-24 23:27:30 -08:00
parent a4b72e2f85
commit ab809791cd
11 changed files with 2678 additions and 29 deletions

1
.gitignore vendored
View File

@ -3,5 +3,6 @@
*.elf
*.bin
*.iso
*.cue
*.ppm
*.png

View File

@ -1,10 +1,17 @@
CFLAGS = -Isaturn -Imath
OPT = -O3
LIBGCC = $(shell $(CC) -print-file-name=libgcc.a)
all: raytracing.iso
LIB = ./saturn
include $(LIB)/common.mk
LIBGCC = $(shell $(CC) -print-file-name=libgcc.a)
raytracing.elf: main-saturn.o raytracing.o $(LIBGCC)
sh/lib1funcs.o: CFLAGS += -DL_ashiftrt
raytracing.elf: main-saturn.o raytracing.o sh/lib1funcs.o
# clean
clean: clean-sh
clean-sh:
rm -f sh/*.o

View File

@ -31,15 +31,15 @@ void put_pixel(int32_t x, int32_t y, const vec3& color)
return;
}
vec3 px255 = functor1(clamp, color) * fp16_16(255);
frame[sy][sx] = functor1(to_uint8_t, px255);
vec3 px31 = functor1(clamp, color) * fp16_16(31);
frame[sy][sx] = functor1(to_uint8_t, px31);
}
void render_ppm(ostream& out)
{
using namespace canvas;
out << "P3 " << width << ' ' << height << " 255\n";
out << "P3 " << width << ' ' << height << " 31\n";
for (int sy = 0; sy < height; sy++) {
for (int sx = 0; sx < width; sx++) {
const pixel& px = frame[sy][sx];
@ -51,4 +51,6 @@ void render_ppm(ostream& out)
int main()
{
render(put_pixel);
render_ppm(cout);
}

View File

@ -13,23 +13,45 @@ fp16_16 clamp(fp16_16 const& n)
return (n > fp16_16(1) ? fp16_16(1) : (n < fp16_16(0) ? fp16_16(0) : n));
};
uint16_t rgb15(const vec3& color)
template<typename T, int P>
inline constexpr T rgb(const vec3& color)
{
vec3 c = functor1(clamp, color) * fp16_16(255);
constexpr int channel_mask = (1 << P) - 1;
constexpr int last_bit = ((sizeof(T) * 8) - 1);
uint8_t red = (c.r.value >> 16) & 0xff;
uint8_t green = (c.g.value >> 16) & 0xff;
uint8_t blue = (c.b.value >> 16) & 0xff;
vec3 c = functor1(clamp, color) * fp16_16(channel_mask);
return (blue << 10) | (green << 5) | (red << 0);
T red = static_cast<T>(c.r.value >> 16);
T green = static_cast<T>(c.g.value >> 16);
T blue = static_cast<T>(c.b.value >> 16);
return (1 << last_bit)
| (blue << (P * 2))
| (green << (P * 1))
| (red << (P * 0));
}
constexpr auto rgb15 = rgb<uint16_t, 5>;
constexpr auto rgb24 = rgb<uint32_t, 8>;
void put_pixel(int32_t x, int32_t y, const vec3& color)
{
int sx = 320 / 2 + x;
int sy = 240 / 2 - y;
vdp2.vram.u16[512 * sy + sx] = (1 << 15) | rgb15(color);
if (sx >= 320 || sx < 0 || sy >= 240 || sy < 0)
return;
vdp2.vram.u16[512 * sy + sx] = rgb15(color);
}
template <class T>
void fill(T * buf, T v, int32_t n) noexcept
{
while (n > 0) {
*buf++ = v;
n -= (sizeof (T));
}
}
void main_asdf()
@ -40,13 +62,33 @@ void main_asdf()
vdp2.reg.BGON = BGON__N0ON;
vdp2.reg.CHCTLA = ( CHCTLA__N0CHCN__32K_COLOR // 15 bits per pixel, RGB
vdp2.reg.CHCTLA = (
CHCTLA__N0CHCN__32K_COLOR // 15 bits per pixel, RGB
//CHCTLA__N0CHCN__16M_COLOR // 24 bits per pixel
| CHCTLA__N0BMSZ__512x256_DOT
| CHCTLA__N0BMEN__BITMAP_FORMAT
);
vdp2.reg.MPOFN = MPOFN__N0MP(0);
constexpr s32 plane_size = 512 * 256 * 2;
fill<volatile uint32_t>(&vdp2.vram.u32[0x0 / 4], (1 << 31) | (1 << 15), plane_size);
vdp2.reg.SCXIN0 = 0;
vdp2.reg.SCXDN0 = 0;
vdp2.reg.SCYIN0 = 0;
vdp2.reg.SCYDN0 = 0;
vdp2.reg.ZMXIN0 = 1;
vdp2.reg.ZMXDN0 = 0;
vdp2.reg.ZMYIN0 = 1;
vdp2.reg.ZMYDN0 = 0;
vdp2.reg.VCSTA = 0;
vdp2.reg.WCTLA = 0;
vdp2.reg.WCTLB = 0;
vdp2.reg.WCTLC = 0;
render(put_pixel);
}

120
math/div.hpp Normal file
View File

@ -0,0 +1,120 @@
#pragma once
#include <stdint.h>
#ifndef USE_SH2_DVSR
inline constexpr uint32_t
__udiv32(uint32_t n, uint32_t d)
{
uint32_t q = 0;
uint32_t r = 0;
for (int i = 31; i >= 0; --i) {
q = q << 1;
r = r << 1;
r |= (n >> 31) & 1;
n = n << 1;
if (d <= r) {
r = r - d;
q = q | 1;
}
}
return q;
}
inline constexpr uint32_t
__udiv64_32(uint64_t n, uint32_t base)
{
uint64_t rem = n;
uint64_t b = base;
uint64_t res = 0, d = 1;
uint32_t high = rem >> 32;
if (high >= base) {
high = __udiv32(high, base);
res = (uint64_t)high << 32;
rem -= (uint64_t)(high*base) << 32;
}
while ((int64_t)b > 0 && b < rem) {
b = b+b;
d = d+d;
}
do {
if (rem >= b) {
rem -= b;
res += d;
}
b >>= 1;
d >>= 1;
} while (d);
return res;
}
#else
#include "sh2.h"
inline uint32_t
__udiv64_32(uint64_t n, uint32_t d)
{
sh2.reg.DVSR = d;
sh2.reg.DVDNTH = (uint32_t)(n >> 32);
sh2.reg.DVDNTL = (uint32_t)(n);
// 39 cycles
asm volatile ("nop");
asm volatile ("nop");
asm volatile ("nop");
asm volatile ("nop");
asm volatile ("nop");
asm volatile ("nop");
asm volatile ("nop");
asm volatile ("nop");
asm volatile ("nop");
asm volatile ("nop");
asm volatile ("nop");
asm volatile ("nop");
asm volatile ("nop");
asm volatile ("nop");
asm volatile ("nop");
asm volatile ("nop");
asm volatile ("nop");
asm volatile ("nop");
asm volatile ("nop");
asm volatile ("nop");
asm volatile ("nop");
asm volatile ("nop");
asm volatile ("nop");
asm volatile ("nop");
asm volatile ("nop");
asm volatile ("nop");
asm volatile ("nop");
asm volatile ("nop");
asm volatile ("nop");
asm volatile ("nop");
asm volatile ("nop");
asm volatile ("nop");
asm volatile ("nop");
asm volatile ("nop");
asm volatile ("nop");
asm volatile ("nop");
asm volatile ("nop");
asm volatile ("nop");
asm volatile ("nop");
return sh2.reg.DVDNTL;
}
#endif
inline int32_t
__div64_32(int64_t n, int32_t d)
{
uint64_t n_abs = n >= 0 ? (uint64_t)n : -(uint64_t)n;
uint32_t d_abs = d >= 0 ? (uint32_t)d : -(uint32_t)d;
uint32_t q_abs = __udiv64_32(n_abs, d_abs);
return (n < 0) == (d < 0) ? (int32_t)q_abs : -(int32_t)q_abs;
}

View File

@ -1,6 +1,7 @@
#pragma once
#include <stdint.h>
#include "div.hpp"
struct fp_raw_tag {};
@ -21,8 +22,35 @@ struct fp
{
return fp(-value, fp_raw_tag{});
}
inline constexpr fp<T, I, B>& operator=(fp<T, I, B> const& v);
inline constexpr fp<T, I, B>& operator+=(fp<T, I, B> const& v);
inline constexpr fp<T, I, B>& operator-=(fp<T, I, B> const& v);
};
template <typename T, typename I, int B>
inline constexpr fp<T, I, B>& fp<T, I, B>::operator=(fp<T, I, B> const& v)
{
this->value = v.value;
return *this;
}
template <typename T, typename I, int B>
inline constexpr fp<T, I, B>& fp<T, I, B>::operator+=(fp<T, I, B> const& v)
{
*this = *this + v;
return *this;
}
template <typename T, typename I, int B>
inline constexpr fp<T, I, B>& fp<T, I, B>::operator-=(fp<T, I, B> const& v)
{
*this = *this - v;
return *this;
}
template <typename T, typename I, int B>
constexpr inline fp<T, I, B> operator+(const fp<T, I, B>& a, const fp<T, I, B>& b) noexcept
{
@ -59,7 +87,10 @@ constexpr inline fp<T, I, B> operator*(T b, const fp<T, I, B>& a) noexcept
template <typename T, typename I, int B>
constexpr inline fp<T, I, B> operator/(const fp<T, I, B>& a, const fp<T, I, B>& b) noexcept
{
I p = (static_cast<I>(a.value) * (static_cast<I>(1) << B)) / static_cast<I>(b.value);
//T p = (static_cast<T>(a.value) * ) / static_cast<T>(b.value);
//T p = static_cast<T>(a.value) / static_cast<T>(b.value);
I p = __div64_32((static_cast<I>(a.value) << 16), static_cast<T>(b.value));
return fp<T, I, B>(static_cast<T>(p), fp_raw_tag{});
}

View File

@ -25,14 +25,11 @@ struct vec<3, T>
inline constexpr T const& operator[](int i) const;
template<typename U>
inline constexpr vec<3, T>& operator=(vec<3, U> const& v);
inline constexpr vec<3, T>& operator=(vec<3, T> const& v);
template<typename U>
inline constexpr vec<3, T>& operator+=(vec<3, U> const& v);
inline constexpr vec<3, T>& operator+=(vec<3, T> const& v);
template<typename U>
inline constexpr vec<3, T>& operator-=(vec<3, U> const& v);
inline constexpr vec<3, T>& operator-=(vec<3, T> const& v);
};
template <typename T>
@ -66,8 +63,7 @@ inline constexpr T const& vec<3, T>::operator[](int i) const
}
template<typename T>
template<typename U>
inline constexpr vec<3, T>& vec<3, T>::operator=(vec<3, U> const& v)
inline constexpr vec<3, T>& vec<3, T>::operator=(vec<3, T> const& v)
{
this->x = static_cast<T>(v.x);
this->y = static_cast<T>(v.y);
@ -76,16 +72,14 @@ inline constexpr vec<3, T>& vec<3, T>::operator=(vec<3, U> const& v)
}
template<typename T>
template<typename U>
inline constexpr vec<3, T>& vec<3, T>::operator+=(vec<3, U> const& v)
inline constexpr vec<3, T>& vec<3, T>::operator+=(vec<3, T> const& v)
{
*this = *this + vec<3, T>(v);
return *this;
}
template<typename T>
template<typename U>
inline constexpr vec<3, T>& vec<3, T>::operator-=(vec<3, U> const& v)
inline constexpr vec<3, T>& vec<3, T>::operator-=(vec<3, T> const& v)
{
*this = *this + vec<3, T>(v);
return *this;
@ -115,12 +109,30 @@ inline constexpr vec<3, T> operator*(vec<3, T> const& v1, vec<3, T> const& v2)
v1.z * v2.z);
}
/*
template <typename T>
inline constexpr vec<3, T> operator/(vec<3, T> const& v1, vec<3, T> const& v2)
{
return vec<3, T>(v1.x / v2.x,
v1.y / v2.y,
v1.z / v2.z);
}
*/
template <typename T>
inline constexpr vec<3, T> operator*(vec<3, T> const& v1, T const& scalar)
{
return v1 * vec<3, T>(scalar);
}
/*
template <typename T>
inline constexpr vec<3, T> operator/(vec<3, T> const& v1, T const& scalar)
{
return v1 / vec<3, T>(scalar);
}
*/
template <typename T>
inline constexpr T dot(vec<3, T> const& v1, vec<3, T> const& v2)
{

View File

@ -24,8 +24,24 @@ struct sphere {
vec3 color;
};
enum class light_type {
ambient,
point,
directional
};
struct light {
light_type type;
fp16_16 intensity;
union {
vec3 position;
vec3 direction;
};
};
struct scene {
sphere spheres[3];
sphere spheres[4];
light lights[3];
};
constexpr scene scene {
@ -44,17 +60,65 @@ constexpr scene scene {
{-2, 0, 4},
fp16_16(1),
{0, 1, 0},
},
{
{0, -61, 0},
fp16_16(60),
{1, 1, 0},
}
},
{ // lights
{
light_type::ambient, // type
fp16_16(65536 * 0.2, fp_raw_tag{}), // intensity
{{0, 0, 0}} //
},
{
light_type::point, // type
fp16_16(65536 * 0.6, fp_raw_tag{}), // intensity
{{2, 1, 0}} // position
},
{
light_type::directional, // type
fp16_16(65536 * 0.6, fp_raw_tag{}), // intensity
{{1, 4, 4}} // direction
}
}
};
static_assert(scene.spheres[0].center.z.value == (3 << 16));
static_assert(scene.lights[0].intensity.value != 0);
static_assert(scene.lights[1].position.x.value == (2 << 16));
struct t1_t2 {
fp16_16 t1;
fp16_16 t2;
};
fp16_16 compute_lighting(const vec3& point, const vec3& normal)
{
fp16_16 intensity{0};
for (int i = 0; i < 3; i++) {
const light& light = scene.lights[i];
if (light.type == light_type::ambient) {
intensity += light.intensity;
} else {
vec3 light_vector;
if (light.type == light_type::point) {
light_vector = light.position - point;
} else {
light_vector = light.direction;
}
auto n_dot_l = dot(normal, light_vector);
if (n_dot_l > fp16_16(0)) {
intensity += light.intensity * n_dot_l * (fp16_16(1) / length(light_vector));
}
}
}
return intensity;
}
t1_t2 intersect_ray_sphere(const vec3& origin, const vec3& direction, const sphere& sphere)
{
fp16_16 r = sphere.radius;
@ -87,7 +151,7 @@ static vec3 trace_ray
{
fp16_16 closest_t = fp_limits<fp16_16>::max();
const sphere * closest_sphere = nullptr;
for (int i = 0; i < 3; i++) {
for (int i = 0; i < 4; i++) {
auto& sphere = scene.spheres[i];
auto [t1, t2] = intersect_ray_sphere(origin, direction, sphere);
if (t1 >= t_min && t1 < t_max && t1 < closest_t) {
@ -102,7 +166,10 @@ static vec3 trace_ray
if (closest_sphere == nullptr) {
return vec3(0, 0, 0);
} else {
return closest_sphere->color;
vec3 point = origin + direction * closest_t;
vec3 normal = point - closest_sphere->center;
normal = normal * (fp16_16(1) / length(normal));
return closest_sphere->color * compute_lighting(point, normal);
}
}

2293
sh/lib1funcs.S Normal file

File diff suppressed because it is too large Load Diff

74
sh/lib1funcs.h Normal file
View File

@ -0,0 +1,74 @@
/* Copyright (C) 1994-2022 Free Software Foundation, Inc.
This file is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 3, or (at your option) any
later version.
This file is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#ifdef __ELF__
#define LOCAL(X) .L_##X
#define FUNC(X) .type X,@function
#define HIDDEN_FUNC(X) FUNC(X); .hidden X
#define HIDDEN_ALIAS(X,Y) ALIAS (X,Y); .hidden GLOBAL(X)
#define ENDFUNC0(X) .Lfe_##X: .size X,.Lfe_##X-X
#define ENDFUNC(X) ENDFUNC0(X)
#else
#define LOCAL(X) L_##X
#define FUNC(X)
#define HIDDEN_FUNC(X)
#define HIDDEN_ALIAS(X,Y) ALIAS (X,Y)
#define ENDFUNC(X)
#endif
#define CONCAT(A,B) A##B
#define GLOBAL0(U,X) CONCAT(U,__##X)
#define GLOBAL(X) GLOBAL0(__USER_LABEL_PREFIX__,X)
#define ALIAS(X,Y) .global GLOBAL(X); .set GLOBAL(X),GLOBAL(Y)
#if defined __SH2A__ && defined __FMOVD_ENABLED__
#undef FMOVD_WORKS
#define FMOVD_WORKS
#endif
#ifdef __LITTLE_ENDIAN__
#define DR00 fr1
#define DR01 fr0
#define DR20 fr3
#define DR21 fr2
#define DR40 fr5
#define DR41 fr4
#else /* !__LITTLE_ENDIAN__ */
#define DR00 fr0
#define DR01 fr1
#define DR20 fr2
#define DR21 fr3
#define DR40 fr4
#define DR41 fr5
#endif /* !__LITTLE_ENDIAN__ */
#ifdef __sh1__
#define SL(branch, dest, in_slot, in_slot_arg2) \
in_slot, in_slot_arg2; branch dest
#define SL1(branch, dest, in_slot) \
in_slot; branch dest
#else /* ! __sh1__ */
#define SL(branch, dest, in_slot, in_slot_arg2) \
branch##.s dest; in_slot, in_slot_arg2
#define SL1(branch, dest, in_slot) \
branch##/s dest; in_slot
#endif /* !__sh1__ */