540 lines
13 KiB
C
540 lines
13 KiB
C
#pragma once
|
|
|
|
#include <assert.h>
|
|
#include <softfloat.h>
|
|
|
|
#include "status_bits.h"
|
|
|
|
/* floating point */
|
|
static inline struct fpscr_bits _fpscr_bits(uint32_t fpscr)
|
|
{
|
|
union {
|
|
struct fpscr_bits bits;
|
|
uint32_t value;
|
|
} fpscr_union;
|
|
fpscr_union.value = fpscr;
|
|
return fpscr_union.bits;
|
|
}
|
|
|
|
static inline bool fpu_flag_i(uint32_t fps)
|
|
{
|
|
return _fpscr_bits(fps).flag_inexact;
|
|
}
|
|
|
|
static inline bool fpu_flag_u(uint32_t fps)
|
|
{
|
|
return _fpscr_bits(fps).flag_underflow;
|
|
}
|
|
|
|
static inline bool fpu_flag_o(uint32_t fps)
|
|
{
|
|
return _fpscr_bits(fps).flag_overflow;
|
|
}
|
|
|
|
static inline bool fpu_flag_z(uint32_t fps)
|
|
{
|
|
return _fpscr_bits(fps).flag_divide_by_zero;
|
|
}
|
|
|
|
static inline bool fpu_flag_v(uint32_t fps)
|
|
{
|
|
return _fpscr_bits(fps).flag_invalid;
|
|
}
|
|
|
|
static inline bool fpu_cause_i(uint32_t fps)
|
|
{
|
|
return _fpscr_bits(fps).cause_inexact;
|
|
}
|
|
|
|
static inline bool fpu_cause_u(uint32_t fps)
|
|
{
|
|
return _fpscr_bits(fps).cause_underflow;
|
|
}
|
|
|
|
static inline bool fpu_cause_o(uint32_t fps)
|
|
{
|
|
return _fpscr_bits(fps).cause_overflow;
|
|
}
|
|
|
|
static inline bool fpu_cause_z(uint32_t fps)
|
|
{
|
|
return _fpscr_bits(fps).cause_divide_by_zero;
|
|
}
|
|
|
|
static inline bool fpu_cause_v(uint32_t fps)
|
|
{
|
|
return _fpscr_bits(fps).cause_invalid;
|
|
}
|
|
|
|
static inline bool fpu_cause_e(uint32_t fps)
|
|
{
|
|
return _fpscr_bits(fps).cause_fpu_error;
|
|
}
|
|
|
|
static inline bool fpu_enable_i(uint32_t fps)
|
|
{
|
|
return _fpscr_bits(fps).enable_inexact;
|
|
}
|
|
|
|
static inline bool fpu_enable_u(uint32_t fps)
|
|
{
|
|
return _fpscr_bits(fps).enable_underflow;
|
|
}
|
|
|
|
static inline bool fpu_enable_o(uint32_t fps)
|
|
{
|
|
return _fpscr_bits(fps).enable_overflow;
|
|
}
|
|
|
|
static inline bool fpu_enable_z(uint32_t fps)
|
|
{
|
|
return _fpscr_bits(fps).enable_divide_by_zero;
|
|
}
|
|
|
|
static inline bool fpu_enable_v(uint32_t fps)
|
|
{
|
|
return _fpscr_bits(fps).enable_invalid;
|
|
}
|
|
|
|
static inline void update_fpscr(uint32_t * fps)
|
|
{
|
|
struct fpscr_bits * fpscr = (struct fpscr_bits *)fps;
|
|
bool inexact = (softfloat_exceptionFlags & softfloat_flag_inexact) != 0;
|
|
bool underflow = (softfloat_exceptionFlags & softfloat_flag_underflow) != 0;
|
|
bool overflow = (softfloat_exceptionFlags & softfloat_flag_overflow) != 0;
|
|
bool infinite = (softfloat_exceptionFlags & softfloat_flag_infinite) != 0;
|
|
bool invalid = (softfloat_exceptionFlags & softfloat_flag_invalid) != 0;
|
|
|
|
fpscr->flag_inexact |= inexact;
|
|
fpscr->flag_underflow |= underflow;
|
|
fpscr->flag_overflow |= overflow;
|
|
fpscr->flag_divide_by_zero |= infinite;
|
|
fpscr->flag_invalid |= invalid;
|
|
|
|
fpscr->cause_inexact = inexact;
|
|
fpscr->cause_underflow = underflow;
|
|
fpscr->cause_overflow = overflow;
|
|
fpscr->cause_divide_by_zero = infinite;
|
|
fpscr->cause_invalid = invalid;
|
|
}
|
|
|
|
static inline void set_rounding_mode(uint32_t * fps)
|
|
{
|
|
struct fpscr_bits * fpscr = (struct fpscr_bits *)fps;
|
|
switch (fpscr->rm) {
|
|
case 0b00:
|
|
softfloat_roundingMode = softfloat_round_near_even;
|
|
break;
|
|
case 0b01:
|
|
softfloat_roundingMode = softfloat_round_minMag;
|
|
break;
|
|
default:
|
|
// undefined rounding mode
|
|
break;
|
|
}
|
|
}
|
|
|
|
static inline bool is_nan_f32(float32_t a)
|
|
{
|
|
bool exp = (a.v & 0x7f800000) == 0x7f800000;
|
|
bool sig = (a.v & 0x007fffff) != 0;
|
|
return exp & sig;
|
|
}
|
|
|
|
static inline bool is_denormal_f32(float32_t a)
|
|
{
|
|
bool exp = (a.v & 0x7f800000) == 0x00000000;
|
|
bool sig = (a.v & 0x007fffff) != 0;
|
|
return exp & sig;
|
|
}
|
|
|
|
static inline float32_t flush_to_zero_f32(float32_t a)
|
|
{
|
|
return (float32_t){ a.v & 0x80000000 };
|
|
}
|
|
|
|
static inline bool dn_f32_f32_f32(float32_t * a, float32_t * b, float32_t * c, uint32_t * fps)
|
|
{
|
|
struct fpscr_bits * fpscr = (struct fpscr_bits *)fps;
|
|
switch (fpscr->dn) {
|
|
case 0:
|
|
/*
|
|
* an FPU error is signaled if FPSCR.DN is zero, neither input is
|
|
* a NaN and either input is a denormalized number.
|
|
*/
|
|
if ((!is_nan_f32(*a)) && (!is_nan_f32(*b)) && (!is_nan_f32(*c))) { // neither input is a NaN
|
|
if (is_denormal_f32(*a) || is_denormal_f32(*b) || is_denormal_f32(*c)) { // either input is denormalized
|
|
fpscr->cause_fpu_error = 1;
|
|
return false; // do not continue
|
|
}
|
|
}
|
|
return true;
|
|
case 1:
|
|
/*
|
|
* When FPSCR.DN is 1, a positive denormalized number is treated as
|
|
* +0 and a negative denormalized number as -0. This flush-to-zero
|
|
* treatment is applied before exception detection and special case
|
|
* handling.
|
|
*/
|
|
if (is_denormal_f32(*a)) *a = flush_to_zero_f32(*a);
|
|
if (is_denormal_f32(*b)) *b = flush_to_zero_f32(*b);
|
|
if (is_denormal_f32(*c)) *c = flush_to_zero_f32(*c);
|
|
return true;
|
|
default:
|
|
assert(false);
|
|
}
|
|
}
|
|
|
|
static inline bool dn_f32_f32(float32_t * a, float32_t * b, uint32_t * fps)
|
|
{
|
|
struct fpscr_bits * fpscr = (struct fpscr_bits *)fps;
|
|
switch (fpscr->dn) {
|
|
case 0:
|
|
/*
|
|
* an FPU error is signaled if FPSCR.DN is zero, neither input is
|
|
* a NaN and either input is a denormalized number.
|
|
*/
|
|
if ((!is_nan_f32(*a)) && (!is_nan_f32(*b))) { // neither input is a NaN
|
|
if (is_denormal_f32(*a) || is_denormal_f32(*b)) { // either input is denormalized
|
|
fpscr->cause_fpu_error = 1;
|
|
return false; // do not continue
|
|
}
|
|
}
|
|
return true;
|
|
case 1:
|
|
/*
|
|
* When FPSCR.DN is 1, a positive denormalized number is treated as
|
|
* +0 and a negative denormalized number as -0. This flush-to-zero
|
|
* treatment is applied before exception detection and special case
|
|
* handling.
|
|
*/
|
|
if (is_denormal_f32(*a)) *a = flush_to_zero_f32(*a);
|
|
if (is_denormal_f32(*b)) *b = flush_to_zero_f32(*b);
|
|
return true;
|
|
default:
|
|
assert(false);
|
|
}
|
|
}
|
|
|
|
static inline bool dn_f32(float32_t * a,uint32_t * fps)
|
|
{
|
|
struct fpscr_bits * fpscr = (struct fpscr_bits *)fps;
|
|
switch (fpscr->dn) {
|
|
case 0:
|
|
/*
|
|
* an FPU error is signaled if FPSCR.DN is zero, neither input is
|
|
* a NaN and either input is a denormalized number.
|
|
*/
|
|
if (is_denormal_f32(*a)) { // either input is denormalized
|
|
fpscr->cause_fpu_error = 1;
|
|
return true; // do not continue
|
|
}
|
|
return false;
|
|
case 1:
|
|
/*
|
|
* When FPSCR.DN is 1, a positive denormalized number is treated as
|
|
* +0 and a negative denormalized number as -0. This flush-to-zero
|
|
* treatment is applied before exception detection and special case
|
|
* handling.
|
|
*/
|
|
if (is_denormal_f32(*a)) *a = flush_to_zero_f32(*a);
|
|
return false;
|
|
default:
|
|
assert(false);
|
|
}
|
|
}
|
|
|
|
static inline bool is_nan_f64(float64_t a)
|
|
{
|
|
bool exp = (a.v & 0x7ff00000'00000000) == 0x7ff00000'00000000;
|
|
bool sig = (a.v & 0x000fffff'ffffffff) != 0;
|
|
return exp & sig;
|
|
}
|
|
|
|
static inline bool is_denormal_f64(float64_t a)
|
|
{
|
|
bool exp = (a.v & 0x7ff00000'00000000) == 0x00000000'00000000;
|
|
bool sig = (a.v & 0x000fffff'ffffffff) != 0;
|
|
return exp & sig;
|
|
}
|
|
|
|
static inline float64_t flush_to_zero_f64(float64_t a)
|
|
{
|
|
return (float64_t){ a.v & 0x80000000'00000000 };
|
|
}
|
|
|
|
static inline bool dn_f64_f64(float64_t * a, float64_t * b, uint32_t * fps)
|
|
{
|
|
struct fpscr_bits * fpscr = (struct fpscr_bits *)fps;
|
|
switch (fpscr->dn) {
|
|
case 0:
|
|
/*
|
|
* an FPU error is signaled if FPSCR.DN is zero, neither input is
|
|
* a NaN and either input is a denormalized number.
|
|
*/
|
|
if ((!is_nan_f64(*a)) && (!is_nan_f64(*b))) { // neither input is a NaN
|
|
if (is_denormal_f64(*a) || is_denormal_f64(*b)) { // either input is denormalized
|
|
fpscr->cause_fpu_error = 1;
|
|
return true; // do not continue
|
|
}
|
|
}
|
|
return false;
|
|
case 1:
|
|
/*
|
|
* When FPSCR.DN is 1, a positive denormalized number is treated as
|
|
* +0 and a negative denormalized number as -0. This flush-to-zero
|
|
* treatment is applied before exception detection and special case
|
|
* handling.
|
|
*/
|
|
if (is_denormal_f64(*a)) *a = flush_to_zero_f64(*a);
|
|
if (is_denormal_f64(*b)) *b = flush_to_zero_f64(*b);
|
|
return false;
|
|
default:
|
|
assert(false);
|
|
}
|
|
}
|
|
|
|
static inline bool dn_f64(float64_t * a, uint32_t * fps)
|
|
{
|
|
struct fpscr_bits * fpscr = (struct fpscr_bits *)fps;
|
|
switch (fpscr->dn) {
|
|
case 0:
|
|
/*
|
|
* an FPU error is signaled if FPSCR.DN is zero, neither input is
|
|
* a NaN and either input is a denormalized number.
|
|
*/
|
|
if (is_denormal_f64(*a)) { // either input is denormalized
|
|
fpscr->cause_fpu_error = 1;
|
|
return true; // do not continue
|
|
}
|
|
return false;
|
|
case 1:
|
|
/*
|
|
* When FPSCR.DN is 1, a positive denormalized number is treated as
|
|
* +0 and a negative denormalized number as -0. This flush-to-zero
|
|
* treatment is applied before exception detection and special case
|
|
* handling.
|
|
*/
|
|
if (is_denormal_f64(*a)) *a = flush_to_zero_f64(*a);
|
|
return false;
|
|
default:
|
|
assert(false);
|
|
}
|
|
}
|
|
|
|
static inline void fadd_s(float32_t op1, float32_t * op2, uint32_t * fps)
|
|
{
|
|
if (dn_f32_f32(&op1, op2, fps)) return;
|
|
|
|
set_rounding_mode(fps);
|
|
*op2 = f32_add(op1, *op2);
|
|
update_fpscr(fps);
|
|
}
|
|
|
|
static inline void fadd_d(float64_t op1, float64_t * op2, uint32_t * fps)
|
|
{
|
|
if (dn_f64_f64(&op1, op2, fps)) return;
|
|
|
|
set_rounding_mode(fps);
|
|
*op2 = f64_add(op1, *op2);
|
|
update_fpscr(fps);
|
|
}
|
|
|
|
static inline void fsub_s(float32_t * op2, float32_t op1, uint32_t * fps)
|
|
{
|
|
if (dn_f32_f32(op2, &op1, fps)) return;
|
|
|
|
set_rounding_mode(fps);
|
|
*op2 = f32_sub(*op2, op1);
|
|
update_fpscr(fps);
|
|
}
|
|
|
|
static inline void fsub_d(float64_t * op2, float64_t op1, uint32_t * fps)
|
|
{
|
|
if (dn_f64_f64(op2, &op1, fps)) return;
|
|
|
|
set_rounding_mode(fps);
|
|
*op2 = f64_sub(*op2, op1);
|
|
update_fpscr(fps);
|
|
}
|
|
|
|
static inline void fmul_s(float32_t op1, float32_t * op2, uint32_t * fps)
|
|
{
|
|
if (dn_f32_f32(&op1, op2, fps)) return;
|
|
|
|
set_rounding_mode(fps);
|
|
*op2 = f32_mul(op1, *op2);
|
|
update_fpscr(fps);
|
|
}
|
|
|
|
static inline void fmul_d(float64_t op1, float64_t * op2, uint32_t * fps)
|
|
{
|
|
if (dn_f64_f64(&op1, op2, fps)) return;
|
|
|
|
set_rounding_mode(fps);
|
|
*op2 = f64_mul(op1, *op2);
|
|
update_fpscr(fps);
|
|
}
|
|
|
|
static inline void fdiv_s(float32_t * op2, float32_t op1, uint32_t * fps)
|
|
{
|
|
if (dn_f32_f32(op2, &op1, fps)) return;
|
|
|
|
set_rounding_mode(fps);
|
|
*op2 = f32_div(*op2, op1);
|
|
update_fpscr(fps);
|
|
}
|
|
|
|
static inline void fdiv_d(float64_t * op2, float64_t op1, uint32_t * fps)
|
|
{
|
|
if (dn_f64_f64(op2, &op1, fps)) return;
|
|
|
|
set_rounding_mode(fps);
|
|
*op2 = f64_div(*op2, op1);
|
|
update_fpscr(fps);
|
|
}
|
|
|
|
static inline float32_t float_ls(int32_t fpul, uint32_t * fps)
|
|
{
|
|
set_rounding_mode(fps);
|
|
float32_t value = i32_to_f32(fpul);
|
|
update_fpscr(fps);
|
|
return value;
|
|
}
|
|
|
|
static inline float64_t float_ld(int32_t fpul, uint32_t * fps)
|
|
{
|
|
set_rounding_mode(fps);
|
|
float64_t value = i32_to_f64(fpul);
|
|
update_fpscr(fps);
|
|
return value;
|
|
}
|
|
|
|
static inline int32_t ftrc_sl(float32_t op1, uint32_t * fps)
|
|
{
|
|
set_rounding_mode(fps);
|
|
int32_t value = f32_to_i32(op1, softfloat_round_minMag, false);
|
|
update_fpscr(fps);
|
|
return value;
|
|
}
|
|
|
|
static inline int32_t ftrc_dl(float64_t op1, uint32_t * fps)
|
|
{
|
|
set_rounding_mode(fps);
|
|
int32_t value = f64_to_i32(op1, softfloat_round_minMag, false);
|
|
update_fpscr(fps);
|
|
return value;
|
|
}
|
|
|
|
static inline float32_t fabs_s(float32_t op1)
|
|
{
|
|
op1.v &= 0x7fffffff;
|
|
return op1;
|
|
}
|
|
|
|
static inline float64_t fabs_d(float64_t op1)
|
|
{
|
|
op1.v &= 0x7fffffff'ffffffff;
|
|
return op1;
|
|
}
|
|
|
|
static inline float32_t fneg_s(float32_t op1)
|
|
{
|
|
op1.v ^= 0x80000000'00000000;
|
|
return op1;
|
|
}
|
|
|
|
static inline float64_t fneg_d(float64_t op1)
|
|
{
|
|
op1.v ^= 0x80000000'00000000;
|
|
return op1;
|
|
}
|
|
|
|
static inline uint32_t fcnv_ds(float64_t op1, uint32_t * fps)
|
|
{
|
|
if (dn_f64(&op1, fps)) return 0;
|
|
|
|
set_rounding_mode(fps);
|
|
float32_t result = f64_to_f32(op1);
|
|
update_fpscr(fps);
|
|
return result.v;
|
|
}
|
|
|
|
static inline float64_t fcnv_sd(int32_t fpul, uint32_t * fps)
|
|
{
|
|
float32_t a = { fpul };
|
|
if (dn_f32(&a, fps)) return (float64_t){ 0 };
|
|
|
|
set_rounding_mode(fps);
|
|
float64_t result = f32_to_f64(a);
|
|
update_fpscr(fps);
|
|
return result;
|
|
}
|
|
|
|
static inline bool fcmpeq_s(float32_t op1, float32_t op2, uint32_t * fps)
|
|
{
|
|
if (dn_f32_f32(&op1, &op2, fps)) return false;
|
|
|
|
set_rounding_mode(fps);
|
|
bool result = f32_eq(op1, op2);
|
|
update_fpscr(fps);
|
|
return result;
|
|
}
|
|
|
|
static inline bool fcmpeq_d(float64_t op1, float64_t op2, uint32_t * fps)
|
|
{
|
|
if (dn_f64_f64(&op1, &op2, fps)) return false;
|
|
|
|
set_rounding_mode(fps);
|
|
bool result = f64_eq(op1, op2);
|
|
update_fpscr(fps);
|
|
return result;
|
|
}
|
|
|
|
static inline bool fcmpgt_s(float32_t op2, float32_t op1, uint32_t * fps)
|
|
{
|
|
if (dn_f32_f32(&op2, &op1, fps)) return false;
|
|
|
|
set_rounding_mode(fps);
|
|
bool result = f32_le(op1, op2);
|
|
update_fpscr(fps);
|
|
return !result;
|
|
}
|
|
|
|
static inline bool fcmpgt_d(float64_t op2, float64_t op1, uint32_t * fps)
|
|
{
|
|
if (dn_f64_f64(&op2, &op1, fps)) return false;
|
|
|
|
set_rounding_mode(fps);
|
|
bool result = f64_le(op1, op2);
|
|
update_fpscr(fps);
|
|
return !result;
|
|
}
|
|
|
|
static inline void fmac_s(float32_t fr0, float32_t op1, float32_t * op2, uint32_t * fps)
|
|
{
|
|
if (dn_f32_f32_f32(&fr0, &op1, op2, fps)) return;
|
|
|
|
set_rounding_mode(fps);
|
|
*op2 = f32_mulAdd(fr0, op1, *op2);
|
|
update_fpscr(fps);
|
|
}
|
|
|
|
static inline void fsqrt_s(float32_t * op1, uint32_t * fps)
|
|
{
|
|
if (dn_f32(op1, fps)) return;
|
|
|
|
set_rounding_mode(fps);
|
|
*op1 = f32_sqrt(*op1);
|
|
update_fpscr(fps);
|
|
}
|
|
|
|
static inline void fsqrt_d(float64_t * op1, uint32_t * fps)
|
|
{
|
|
if (dn_f64(op1, fps)) return;
|
|
|
|
set_rounding_mode(fps);
|
|
*op1 = f64_sqrt(*op1);
|
|
update_fpscr(fps);
|
|
}
|