sh-dis/c/fpu.h

540 lines
13 KiB
C

#pragma once
#include <assert.h>
#include <softfloat.h>
#include "status_bits.h"
/* floating point */
static inline struct fpscr_bits _fpscr_bits(uint32_t fpscr)
{
union {
struct fpscr_bits bits;
uint32_t value;
} fpscr_union;
fpscr_union.value = fpscr;
return fpscr_union.bits;
}
static inline bool fpu_flag_i(uint32_t fps)
{
return _fpscr_bits(fps).flag_inexact;
}
static inline bool fpu_flag_u(uint32_t fps)
{
return _fpscr_bits(fps).flag_underflow;
}
static inline bool fpu_flag_o(uint32_t fps)
{
return _fpscr_bits(fps).flag_overflow;
}
static inline bool fpu_flag_z(uint32_t fps)
{
return _fpscr_bits(fps).flag_divide_by_zero;
}
static inline bool fpu_flag_v(uint32_t fps)
{
return _fpscr_bits(fps).flag_invalid;
}
static inline bool fpu_cause_i(uint32_t fps)
{
return _fpscr_bits(fps).cause_inexact;
}
static inline bool fpu_cause_u(uint32_t fps)
{
return _fpscr_bits(fps).cause_underflow;
}
static inline bool fpu_cause_o(uint32_t fps)
{
return _fpscr_bits(fps).cause_overflow;
}
static inline bool fpu_cause_z(uint32_t fps)
{
return _fpscr_bits(fps).cause_divide_by_zero;
}
static inline bool fpu_cause_v(uint32_t fps)
{
return _fpscr_bits(fps).cause_invalid;
}
static inline bool fpu_cause_e(uint32_t fps)
{
return _fpscr_bits(fps).cause_fpu_error;
}
static inline bool fpu_enable_i(uint32_t fps)
{
return _fpscr_bits(fps).enable_inexact;
}
static inline bool fpu_enable_u(uint32_t fps)
{
return _fpscr_bits(fps).enable_underflow;
}
static inline bool fpu_enable_o(uint32_t fps)
{
return _fpscr_bits(fps).enable_overflow;
}
static inline bool fpu_enable_z(uint32_t fps)
{
return _fpscr_bits(fps).enable_divide_by_zero;
}
static inline bool fpu_enable_v(uint32_t fps)
{
return _fpscr_bits(fps).enable_invalid;
}
static inline void update_fpscr(uint32_t * fps)
{
struct fpscr_bits * fpscr = (struct fpscr_bits *)fps;
bool inexact = (softfloat_exceptionFlags & softfloat_flag_inexact) != 0;
bool underflow = (softfloat_exceptionFlags & softfloat_flag_underflow) != 0;
bool overflow = (softfloat_exceptionFlags & softfloat_flag_overflow) != 0;
bool infinite = (softfloat_exceptionFlags & softfloat_flag_infinite) != 0;
bool invalid = (softfloat_exceptionFlags & softfloat_flag_invalid) != 0;
fpscr->flag_inexact |= inexact;
fpscr->flag_underflow |= underflow;
fpscr->flag_overflow |= overflow;
fpscr->flag_divide_by_zero |= infinite;
fpscr->flag_invalid |= invalid;
fpscr->cause_inexact = inexact;
fpscr->cause_underflow = underflow;
fpscr->cause_overflow = overflow;
fpscr->cause_divide_by_zero = infinite;
fpscr->cause_invalid = invalid;
}
static inline void set_rounding_mode(uint32_t * fps)
{
struct fpscr_bits * fpscr = (struct fpscr_bits *)fps;
switch (fpscr->rm) {
case 0b00:
softfloat_roundingMode = softfloat_round_near_even;
break;
case 0b01:
softfloat_roundingMode = softfloat_round_minMag;
break;
default:
// undefined rounding mode
break;
}
}
static inline bool is_nan_f32(float32_t a)
{
bool exp = (a.v & 0x7f800000) == 0x7f800000;
bool sig = (a.v & 0x007fffff) != 0;
return exp & sig;
}
static inline bool is_denormal_f32(float32_t a)
{
bool exp = (a.v & 0x7f800000) == 0x00000000;
bool sig = (a.v & 0x007fffff) != 0;
return exp & sig;
}
static inline float32_t flush_to_zero_f32(float32_t a)
{
return (float32_t){ a.v & 0x80000000 };
}
static inline bool dn_f32_f32_f32(float32_t * a, float32_t * b, float32_t * c, uint32_t * fps)
{
struct fpscr_bits * fpscr = (struct fpscr_bits *)fps;
switch (fpscr->dn) {
case 0:
/*
* an FPU error is signaled if FPSCR.DN is zero, neither input is
* a NaN and either input is a denormalized number.
*/
if ((!is_nan_f32(*a)) && (!is_nan_f32(*b)) && (!is_nan_f32(*c))) { // neither input is a NaN
if (is_denormal_f32(*a) || is_denormal_f32(*b) || is_denormal_f32(*c)) { // either input is denormalized
fpscr->cause_fpu_error = 1;
return true; // do not continue
}
}
return false;
case 1:
/*
* When FPSCR.DN is 1, a positive denormalized number is treated as
* +0 and a negative denormalized number as -0. This flush-to-zero
* treatment is applied before exception detection and special case
* handling.
*/
if (is_denormal_f32(*a)) *a = flush_to_zero_f32(*a);
if (is_denormal_f32(*b)) *b = flush_to_zero_f32(*b);
if (is_denormal_f32(*c)) *c = flush_to_zero_f32(*c);
return false;
default:
assert(false);
}
}
static inline bool dn_f32_f32(float32_t * a, float32_t * b, uint32_t * fps)
{
struct fpscr_bits * fpscr = (struct fpscr_bits *)fps;
switch (fpscr->dn) {
case 0:
/*
* an FPU error is signaled if FPSCR.DN is zero, neither input is
* a NaN and either input is a denormalized number.
*/
if ((!is_nan_f32(*a)) && (!is_nan_f32(*b))) { // neither input is a NaN
if (is_denormal_f32(*a) || is_denormal_f32(*b)) { // either input is denormalized
fpscr->cause_fpu_error = 1;
return true; // do not continue
}
}
return false;
case 1:
/*
* When FPSCR.DN is 1, a positive denormalized number is treated as
* +0 and a negative denormalized number as -0. This flush-to-zero
* treatment is applied before exception detection and special case
* handling.
*/
if (is_denormal_f32(*a)) *a = flush_to_zero_f32(*a);
if (is_denormal_f32(*b)) *b = flush_to_zero_f32(*b);
return false;
default:
assert(false);
}
}
static inline bool dn_f32(float32_t * a,uint32_t * fps)
{
struct fpscr_bits * fpscr = (struct fpscr_bits *)fps;
switch (fpscr->dn) {
case 0:
/*
* an FPU error is signaled if FPSCR.DN is zero, neither input is
* a NaN and either input is a denormalized number.
*/
if (is_denormal_f32(*a)) { // either input is denormalized
fpscr->cause_fpu_error = 1;
return true; // do not continue
}
return false;
case 1:
/*
* When FPSCR.DN is 1, a positive denormalized number is treated as
* +0 and a negative denormalized number as -0. This flush-to-zero
* treatment is applied before exception detection and special case
* handling.
*/
if (is_denormal_f32(*a)) *a = flush_to_zero_f32(*a);
return false;
default:
assert(false);
}
}
static inline bool is_nan_f64(float64_t a)
{
bool exp = (a.v & 0x7ff00000'00000000) == 0x7ff00000'00000000;
bool sig = (a.v & 0x000fffff'ffffffff) != 0;
return exp & sig;
}
static inline bool is_denormal_f64(float64_t a)
{
bool exp = (a.v & 0x7ff00000'00000000) == 0x00000000'00000000;
bool sig = (a.v & 0x000fffff'ffffffff) != 0;
return exp & sig;
}
static inline float64_t flush_to_zero_f64(float64_t a)
{
return (float64_t){ a.v & 0x80000000'00000000 };
}
static inline bool dn_f64_f64(float64_t * a, float64_t * b, uint32_t * fps)
{
struct fpscr_bits * fpscr = (struct fpscr_bits *)fps;
switch (fpscr->dn) {
case 0:
/*
* an FPU error is signaled if FPSCR.DN is zero, neither input is
* a NaN and either input is a denormalized number.
*/
if ((!is_nan_f64(*a)) && (!is_nan_f64(*b))) { // neither input is a NaN
if (is_denormal_f64(*a) || is_denormal_f64(*b)) { // either input is denormalized
fpscr->cause_fpu_error = 1;
return true; // do not continue
}
}
return false;
case 1:
/*
* When FPSCR.DN is 1, a positive denormalized number is treated as
* +0 and a negative denormalized number as -0. This flush-to-zero
* treatment is applied before exception detection and special case
* handling.
*/
if (is_denormal_f64(*a)) *a = flush_to_zero_f64(*a);
if (is_denormal_f64(*b)) *b = flush_to_zero_f64(*b);
return false;
default:
assert(false);
}
}
static inline bool dn_f64(float64_t * a, uint32_t * fps)
{
struct fpscr_bits * fpscr = (struct fpscr_bits *)fps;
switch (fpscr->dn) {
case 0:
/*
* an FPU error is signaled if FPSCR.DN is zero, neither input is
* a NaN and either input is a denormalized number.
*/
if (is_denormal_f64(*a)) { // either input is denormalized
fpscr->cause_fpu_error = 1;
return true; // do not continue
}
return false;
case 1:
/*
* When FPSCR.DN is 1, a positive denormalized number is treated as
* +0 and a negative denormalized number as -0. This flush-to-zero
* treatment is applied before exception detection and special case
* handling.
*/
if (is_denormal_f64(*a)) *a = flush_to_zero_f64(*a);
return false;
default:
assert(false);
}
}
static inline void fadd_s(float32_t op1, float32_t * op2, uint32_t * fps)
{
if (dn_f32_f32(&op1, op2, fps)) return;
set_rounding_mode(fps);
*op2 = f32_add(op1, *op2);
update_fpscr(fps);
}
static inline void fadd_d(float64_t op1, float64_t * op2, uint32_t * fps)
{
if (dn_f64_f64(&op1, op2, fps)) return;
set_rounding_mode(fps);
*op2 = f64_add(op1, *op2);
update_fpscr(fps);
}
static inline void fsub_s(float32_t * op2, float32_t op1, uint32_t * fps)
{
if (dn_f32_f32(op2, &op1, fps)) return;
set_rounding_mode(fps);
*op2 = f32_sub(*op2, op1);
update_fpscr(fps);
}
static inline void fsub_d(float64_t * op2, float64_t op1, uint32_t * fps)
{
if (dn_f64_f64(op2, &op1, fps)) return;
set_rounding_mode(fps);
*op2 = f64_sub(*op2, op1);
update_fpscr(fps);
}
static inline void fmul_s(float32_t op1, float32_t * op2, uint32_t * fps)
{
if (dn_f32_f32(&op1, op2, fps)) return;
set_rounding_mode(fps);
*op2 = f32_mul(op1, *op2);
update_fpscr(fps);
}
static inline void fmul_d(float64_t op1, float64_t * op2, uint32_t * fps)
{
if (dn_f64_f64(&op1, op2, fps)) return;
set_rounding_mode(fps);
*op2 = f64_mul(op1, *op2);
update_fpscr(fps);
}
static inline void fdiv_s(float32_t * op2, float32_t op1, uint32_t * fps)
{
if (dn_f32_f32(op2, &op1, fps)) return;
set_rounding_mode(fps);
*op2 = f32_div(*op2, op1);
update_fpscr(fps);
}
static inline void fdiv_d(float64_t * op2, float64_t op1, uint32_t * fps)
{
if (dn_f64_f64(op2, &op1, fps)) return;
set_rounding_mode(fps);
*op2 = f64_div(*op2, op1);
update_fpscr(fps);
}
static inline float32_t float_ls(int32_t fpul, uint32_t * fps)
{
set_rounding_mode(fps);
float32_t value = i32_to_f32(fpul);
update_fpscr(fps);
return value;
}
static inline float64_t float_ld(int32_t fpul, uint32_t * fps)
{
set_rounding_mode(fps);
float64_t value = i32_to_f64(fpul);
update_fpscr(fps);
return value;
}
static inline int32_t ftrc_sl(float32_t op1, uint32_t * fps)
{
set_rounding_mode(fps);
int32_t value = f32_to_i32(op1, softfloat_round_minMag, false);
update_fpscr(fps);
return value;
}
static inline int32_t ftrc_dl(float64_t op1, uint32_t * fps)
{
set_rounding_mode(fps);
int32_t value = f64_to_i32(op1, softfloat_round_minMag, false);
update_fpscr(fps);
return value;
}
static inline float32_t fabs_s(float32_t op1)
{
op1.v &= 0x7fffffff;
return op1;
}
static inline float64_t fabs_d(float64_t op1)
{
op1.v &= 0x7fffffff'ffffffff;
return op1;
}
static inline float32_t fneg_s(float32_t op1)
{
op1.v ^= 0x80000000;
return op1;
}
static inline float64_t fneg_d(float64_t op1)
{
op1.v ^= 0x80000000'00000000;
return op1;
}
static inline uint32_t fcnv_ds(float64_t op1, uint32_t * fps)
{
if (dn_f64(&op1, fps)) return 0;
set_rounding_mode(fps);
float32_t result = f64_to_f32(op1);
update_fpscr(fps);
return result.v;
}
static inline float64_t fcnv_sd(int32_t fpul, uint32_t * fps)
{
float32_t a = { fpul };
if (dn_f32(&a, fps)) return (float64_t){ 0 };
set_rounding_mode(fps);
float64_t result = f32_to_f64(a);
update_fpscr(fps);
return result;
}
static inline bool fcmpeq_s(float32_t op1, float32_t op2, uint32_t * fps)
{
if (dn_f32_f32(&op1, &op2, fps)) return false;
set_rounding_mode(fps);
bool result = f32_eq(op1, op2);
update_fpscr(fps);
return result;
}
static inline bool fcmpeq_d(float64_t op1, float64_t op2, uint32_t * fps)
{
if (dn_f64_f64(&op1, &op2, fps)) return false;
set_rounding_mode(fps);
bool result = f64_eq(op1, op2);
update_fpscr(fps);
return result;
}
static inline bool fcmpgt_s(float32_t op2, float32_t op1, uint32_t * fps)
{
if (dn_f32_f32(&op2, &op1, fps)) return false;
set_rounding_mode(fps);
bool result = f32_le(op2, op1);
update_fpscr(fps);
return !result;
}
static inline bool fcmpgt_d(float64_t op2, float64_t op1, uint32_t * fps)
{
if (dn_f64_f64(&op2, &op1, fps)) return false;
set_rounding_mode(fps);
bool result = f64_le(op2, op1);
update_fpscr(fps);
return !result;
}
static inline void fmac_s(float32_t fr0, float32_t op1, float32_t * op2, uint32_t * fps)
{
if (dn_f32_f32_f32(&fr0, &op1, op2, fps)) return;
set_rounding_mode(fps);
*op2 = f32_mulAdd(fr0, op1, *op2);
update_fpscr(fps);
}
static inline void fsqrt_s(float32_t * op1, uint32_t * fps)
{
if (dn_f32(op1, fps)) return;
set_rounding_mode(fps);
*op1 = f32_sqrt(*op1);
update_fpscr(fps);
}
static inline void fsqrt_d(float64_t * op1, uint32_t * fps)
{
if (dn_f64(op1, fps)) return;
set_rounding_mode(fps);
*op1 = f64_sqrt(*op1);
update_fpscr(fps);
}