#pragma once

#include <assert.h>
#include <softfloat.h>

#include "status_bits.h"

/* floating point */
static inline struct fpscr_bits _fpscr_bits(uint32_t fpscr)
{
  union {
    struct fpscr_bits bits;
    uint32_t value;
  } fpscr_union;
  fpscr_union.value = fpscr;
  return fpscr_union.bits;
}

static inline bool fpu_flag_i(uint32_t fps)
{
  return _fpscr_bits(fps).flag_inexact;
}

static inline bool fpu_flag_u(uint32_t fps)
{
  return _fpscr_bits(fps).flag_underflow;
}

static inline bool fpu_flag_o(uint32_t fps)
{
  return _fpscr_bits(fps).flag_overflow;
}

static inline bool fpu_flag_z(uint32_t fps)
{
  return _fpscr_bits(fps).flag_divide_by_zero;
}

static inline bool fpu_flag_v(uint32_t fps)
{
  return _fpscr_bits(fps).flag_invalid;
}

static inline bool fpu_cause_i(uint32_t fps)
{
  return _fpscr_bits(fps).cause_inexact;
}

static inline bool fpu_cause_u(uint32_t fps)
{
  return _fpscr_bits(fps).cause_underflow;
}

static inline bool fpu_cause_o(uint32_t fps)
{
  return _fpscr_bits(fps).cause_overflow;
}

static inline bool fpu_cause_z(uint32_t fps)
{
  return _fpscr_bits(fps).cause_divide_by_zero;
}

static inline bool fpu_cause_v(uint32_t fps)
{
  return _fpscr_bits(fps).cause_invalid;
}

static inline bool fpu_cause_e(uint32_t fps)
{
  return _fpscr_bits(fps).cause_fpu_error;
}

static inline bool fpu_enable_i(uint32_t fps)
{
  return _fpscr_bits(fps).enable_inexact;
}

static inline bool fpu_enable_u(uint32_t fps)
{
  return _fpscr_bits(fps).enable_underflow;
}

static inline bool fpu_enable_o(uint32_t fps)
{
  return _fpscr_bits(fps).enable_overflow;
}

static inline bool fpu_enable_z(uint32_t fps)
{
  return _fpscr_bits(fps).enable_divide_by_zero;
}

static inline bool fpu_enable_v(uint32_t fps)
{
  return _fpscr_bits(fps).enable_invalid;
}

static inline void update_fpscr(uint32_t * fps)
{
  struct fpscr_bits * fpscr = (struct fpscr_bits *)fps;
  bool inexact   = (softfloat_exceptionFlags & softfloat_flag_inexact)   != 0;
  bool underflow = (softfloat_exceptionFlags & softfloat_flag_underflow) != 0;
  bool overflow  = (softfloat_exceptionFlags & softfloat_flag_overflow)  != 0;
  bool infinite  = (softfloat_exceptionFlags & softfloat_flag_infinite)  != 0;
  bool invalid   = (softfloat_exceptionFlags & softfloat_flag_invalid)   != 0;

  fpscr->flag_inexact        |= inexact;
  fpscr->flag_underflow      |= underflow;
  fpscr->flag_overflow       |= overflow;
  fpscr->flag_divide_by_zero |= infinite;
  fpscr->flag_invalid        |= invalid;

  fpscr->cause_inexact        = inexact;
  fpscr->cause_underflow      = underflow;
  fpscr->cause_overflow       = overflow;
  fpscr->cause_divide_by_zero = infinite;
  fpscr->cause_invalid        = invalid;
}

static inline void set_rounding_mode(uint32_t * fps)
{
  struct fpscr_bits * fpscr = (struct fpscr_bits *)fps;
  switch (fpscr->rm) {
  case 0b00:
    softfloat_roundingMode = softfloat_round_near_even;
    break;
  case 0b01:
    softfloat_roundingMode = softfloat_round_minMag;
    break;
  default:
    // undefined rounding mode
    break;
  }
}

static inline bool is_nan_f32(float32_t a)
{
  bool exp = (a.v & 0x7f800000) == 0x7f800000;
  bool sig = (a.v & 0x007fffff) != 0;
  return exp & sig;
}

static inline bool is_denormal_f32(float32_t a)
{
  bool exp = (a.v & 0x7f800000) == 0x00000000;
  bool sig = (a.v & 0x007fffff) != 0;
  return exp & sig;
}

static inline float32_t flush_to_zero_f32(float32_t a)
{
  return (float32_t){ a.v & 0x80000000 };
}

static inline bool dn_f32_f32_f32(float32_t * a, float32_t * b, float32_t * c, uint32_t * fps)
{
  struct fpscr_bits * fpscr = (struct fpscr_bits *)fps;
  switch (fpscr->dn) {
  case 0:
    /*
     * an FPU error is signaled if FPSCR.DN is zero, neither input is
     * a NaN and either input is a denormalized number.
     */
    if ((!is_nan_f32(*a)) && (!is_nan_f32(*b)) && (!is_nan_f32(*c))) { // neither input is a NaN
      if (is_denormal_f32(*a) || is_denormal_f32(*b) || is_denormal_f32(*c)) { // either input is denormalized
	fpscr->cause_fpu_error = 1;
	return true; // do not continue
      }
    }
    return false;
  case 1:
    /*
     * When FPSCR.DN is 1, a positive denormalized number is treated as
     * +0 and a negative denormalized number as -0. This flush-to-zero
     * treatment is applied before exception detection and special case
     * handling.
     */
    if (is_denormal_f32(*a)) *a = flush_to_zero_f32(*a);
    if (is_denormal_f32(*b)) *b = flush_to_zero_f32(*b);
    if (is_denormal_f32(*c)) *c = flush_to_zero_f32(*c);
    return false;
  default:
    assert(false);
  }
}

static inline bool dn_f32_f32(float32_t * a, float32_t * b, uint32_t * fps)
{
  struct fpscr_bits * fpscr = (struct fpscr_bits *)fps;
  switch (fpscr->dn) {
  case 0:
    /*
     * an FPU error is signaled if FPSCR.DN is zero, neither input is
     * a NaN and either input is a denormalized number.
     */
    if ((!is_nan_f32(*a)) && (!is_nan_f32(*b))) { // neither input is a NaN
      if (is_denormal_f32(*a) || is_denormal_f32(*b)) { // either input is denormalized
	fpscr->cause_fpu_error = 1;
	return true; // do not continue
      }
    }
    return false;
  case 1:
    /*
     * When FPSCR.DN is 1, a positive denormalized number is treated as
     * +0 and a negative denormalized number as -0. This flush-to-zero
     * treatment is applied before exception detection and special case
     * handling.
     */
    if (is_denormal_f32(*a)) *a = flush_to_zero_f32(*a);
    if (is_denormal_f32(*b)) *b = flush_to_zero_f32(*b);
    return false;
  default:
    assert(false);
  }
}

static inline bool dn_f32(float32_t * a,uint32_t * fps)
{
  struct fpscr_bits * fpscr = (struct fpscr_bits *)fps;
  switch (fpscr->dn) {
  case 0:
    /*
     * an FPU error is signaled if FPSCR.DN is zero, neither input is
     * a NaN and either input is a denormalized number.
     */
    if (is_denormal_f32(*a)) { // either input is denormalized
      fpscr->cause_fpu_error = 1;
      return true; // do not continue
    }
    return false;
  case 1:
    /*
     * When FPSCR.DN is 1, a positive denormalized number is treated as
     * +0 and a negative denormalized number as -0. This flush-to-zero
     * treatment is applied before exception detection and special case
     * handling.
     */
    if (is_denormal_f32(*a)) *a = flush_to_zero_f32(*a);
    return false;
  default:
    assert(false);
  }
}

static inline bool is_nan_f64(float64_t a)
{
  bool exp = (a.v & 0x7ff00000'00000000) == 0x7ff00000'00000000;
  bool sig = (a.v & 0x000fffff'ffffffff) != 0;
  return exp & sig;
}

static inline bool is_denormal_f64(float64_t a)
{
  bool exp = (a.v & 0x7ff00000'00000000) == 0x00000000'00000000;
  bool sig = (a.v & 0x000fffff'ffffffff) != 0;
  return exp & sig;
}

static inline float64_t flush_to_zero_f64(float64_t a)
{
  return (float64_t){ a.v & 0x80000000'00000000 };
}

static inline bool dn_f64_f64(float64_t * a, float64_t * b, uint32_t * fps)
{
  struct fpscr_bits * fpscr = (struct fpscr_bits *)fps;
  switch (fpscr->dn) {
  case 0:
    /*
     * an FPU error is signaled if FPSCR.DN is zero, neither input is
     * a NaN and either input is a denormalized number.
     */
    if ((!is_nan_f64(*a)) && (!is_nan_f64(*b))) { // neither input is a NaN
      if (is_denormal_f64(*a) || is_denormal_f64(*b)) { // either input is denormalized
	fpscr->cause_fpu_error = 1;
	return true; // do not continue
      }
    }
    return false;
  case 1:
    /*
     * When FPSCR.DN is 1, a positive denormalized number is treated as
     * +0 and a negative denormalized number as -0. This flush-to-zero
     * treatment is applied before exception detection and special case
     * handling.
     */
    if (is_denormal_f64(*a)) *a = flush_to_zero_f64(*a);
    if (is_denormal_f64(*b)) *b = flush_to_zero_f64(*b);
    return false;
  default:
    assert(false);
  }
}

static inline bool dn_f64(float64_t * a, uint32_t * fps)
{
  struct fpscr_bits * fpscr = (struct fpscr_bits *)fps;
  switch (fpscr->dn) {
  case 0:
    /*
     * an FPU error is signaled if FPSCR.DN is zero, neither input is
     * a NaN and either input is a denormalized number.
     */
    if (is_denormal_f64(*a)) { // either input is denormalized
      fpscr->cause_fpu_error = 1;
      return true; // do not continue
    }
    return false;
  case 1:
    /*
     * When FPSCR.DN is 1, a positive denormalized number is treated as
     * +0 and a negative denormalized number as -0. This flush-to-zero
     * treatment is applied before exception detection and special case
     * handling.
     */
    if (is_denormal_f64(*a)) *a = flush_to_zero_f64(*a);
    return false;
  default:
    assert(false);
  }
}

static inline void fadd_s(float32_t op1, float32_t * op2, uint32_t * fps)
{
  if (dn_f32_f32(&op1, op2, fps)) return;

  set_rounding_mode(fps);
  *op2 = f32_add(op1, *op2);
  update_fpscr(fps);
}

static inline void fadd_d(float64_t op1, float64_t * op2, uint32_t * fps)
{
  if (dn_f64_f64(&op1, op2, fps)) return;

  set_rounding_mode(fps);
  *op2 = f64_add(op1, *op2);
  update_fpscr(fps);
}

static inline void fsub_s(float32_t * op2, float32_t op1, uint32_t * fps)
{
  if (dn_f32_f32(op2, &op1, fps)) return;

  set_rounding_mode(fps);
  *op2 = f32_sub(*op2, op1);
  update_fpscr(fps);
}

static inline void fsub_d(float64_t * op2, float64_t op1, uint32_t * fps)
{
  if (dn_f64_f64(op2, &op1, fps)) return;

  set_rounding_mode(fps);
  *op2 = f64_sub(*op2, op1);
  update_fpscr(fps);
}

static inline void fmul_s(float32_t op1, float32_t * op2, uint32_t * fps)
{
  if (dn_f32_f32(&op1, op2, fps)) return;

  set_rounding_mode(fps);
  *op2 = f32_mul(op1, *op2);
  update_fpscr(fps);
}

static inline void fmul_d(float64_t op1, float64_t * op2, uint32_t * fps)
{
  if (dn_f64_f64(&op1, op2, fps)) return;

  set_rounding_mode(fps);
  *op2 = f64_mul(op1, *op2);
  update_fpscr(fps);
}

static inline void fdiv_s(float32_t * op2, float32_t op1, uint32_t * fps)
{
  if (dn_f32_f32(op2, &op1, fps)) return;

  set_rounding_mode(fps);
  *op2 = f32_div(*op2, op1);
  update_fpscr(fps);
}

static inline void fdiv_d(float64_t * op2, float64_t op1, uint32_t * fps)
{
  if (dn_f64_f64(op2, &op1, fps)) return;

  set_rounding_mode(fps);
  *op2 = f64_div(*op2, op1);
  update_fpscr(fps);
}

static inline float32_t float_ls(int32_t fpul, uint32_t * fps)
{
  set_rounding_mode(fps);
  float32_t value = i32_to_f32(fpul);
  update_fpscr(fps);
  return value;
}

static inline float64_t float_ld(int32_t fpul, uint32_t * fps)
{
  set_rounding_mode(fps);
  float64_t value = i32_to_f64(fpul);
  update_fpscr(fps);
  return value;
}

static inline int32_t ftrc_sl(float32_t op1, uint32_t * fps)
{
  set_rounding_mode(fps);
  int32_t value = f32_to_i32(op1, softfloat_round_minMag, false);
  update_fpscr(fps);
  return value;
}

static inline int32_t ftrc_dl(float64_t op1, uint32_t * fps)
{
  set_rounding_mode(fps);
  int32_t value = f64_to_i32(op1, softfloat_round_minMag, false);
  update_fpscr(fps);
  return value;
}

static inline float32_t fabs_s(float32_t op1)
{
  op1.v &= 0x7fffffff;
  return op1;
}

static inline float64_t fabs_d(float64_t op1)
{
  op1.v &= 0x7fffffff'ffffffff;
  return op1;
}

static inline float32_t fneg_s(float32_t op1)
{
  op1.v ^= 0x80000000;
  return op1;
}

static inline float64_t fneg_d(float64_t op1)
{
  op1.v ^= 0x80000000'00000000;
  return op1;
}

static inline uint32_t fcnv_ds(float64_t op1, uint32_t * fps)
{
  if (dn_f64(&op1, fps)) return 0;

  set_rounding_mode(fps);
  float32_t result = f64_to_f32(op1);
  update_fpscr(fps);
  return result.v;
}

static inline float64_t fcnv_sd(int32_t fpul, uint32_t * fps)
{
  float32_t a = { fpul };
  if (dn_f32(&a, fps)) return (float64_t){ 0 };

  set_rounding_mode(fps);
  float64_t result = f32_to_f64(a);
  update_fpscr(fps);
  return result;
}

static inline bool fcmpeq_s(float32_t op1, float32_t op2, uint32_t * fps)
{
  if (dn_f32_f32(&op1, &op2, fps)) return false;

  set_rounding_mode(fps);
  bool result = f32_eq(op1, op2);
  update_fpscr(fps);
  return result;
}

static inline bool fcmpeq_d(float64_t op1, float64_t op2, uint32_t * fps)
{
  if (dn_f64_f64(&op1, &op2, fps)) return false;

  set_rounding_mode(fps);
  bool result = f64_eq(op1, op2);
  update_fpscr(fps);
  return result;
}

static inline bool fcmpgt_s(float32_t op2, float32_t op1, uint32_t * fps)
{
  if (dn_f32_f32(&op2, &op1, fps)) return false;

  set_rounding_mode(fps);
  bool result = f32_le(op2, op1);
  update_fpscr(fps);
  return !result;
}

static inline bool fcmpgt_d(float64_t op2, float64_t op1, uint32_t * fps)
{
  if (dn_f64_f64(&op2, &op1, fps)) return false;

  set_rounding_mode(fps);
  bool result = f64_le(op2, op1);
  update_fpscr(fps);
  return !result;
}

static inline void fmac_s(float32_t fr0, float32_t op1, float32_t * op2, uint32_t * fps)
{
  if (dn_f32_f32_f32(&fr0, &op1, op2, fps)) return;

  set_rounding_mode(fps);
  *op2 = f32_mulAdd(fr0, op1, *op2);
  update_fpscr(fps);
}

static inline void fsqrt_s(float32_t * op1, uint32_t * fps)
{
  if (dn_f32(op1, fps)) return;

  set_rounding_mode(fps);
  *op1 = f32_sqrt(*op1);
  update_fpscr(fps);
}

static inline void fsqrt_d(float64_t * op1, uint32_t * fps)
{
  if (dn_f64(op1, fps)) return;

  set_rounding_mode(fps);
  *op1 = f64_sqrt(*op1);
  update_fpscr(fps);
}