From 27d9958a0a7cf561447f630f8a017780fe5deb53 Mon Sep 17 00:00:00 2001
From: Zack Buhman <zack@buhman.org>
Date: Tue, 23 Apr 2024 16:00:29 +0800
Subject: [PATCH] initial FPU implementation

---
 ast_to_c_source.py         |    8 +-
 ast_transformers.py        |   75 ++-
 c/Makefile                 |   10 +-
 c/decode_execute.c         |  366 +++++++++++++
 c/decode_print.c           |  420 ++++++++++++++
 c/exception.c              |    2 +-
 c/exception.h              |    2 +-
 c/fpu.h                    |  539 ++++++++++++++++++
 c/impl.c                   | 1056 +++++++++++++++++++++++++++++++++++-
 c/impl.h                   |  110 +++-
 c/operations.h             |   37 ++
 c/state.h                  |   12 +-
 c/state_helpers.h          |   21 +
 c/status_bits.h            |   16 +-
 disabled_instructions.py   |   23 -
 generate_bits.py           |    8 +-
 generate_impl.py           |    1 +
 identifier_substitution.py |   34 +-
 18 files changed, 2649 insertions(+), 91 deletions(-)
 create mode 100644 c/fpu.h

diff --git a/ast_to_c_source.py b/ast_to_c_source.py
index 35c0b3f..e3b4551 100644
--- a/ast_to_c_source.py
+++ b/ast_to_c_source.py
@@ -222,7 +222,13 @@ def identifier(token):
     yield token.token
 
 def constant(elem):
-    yield elem.token.lower()
+    if elem.token.lower() in {"0x00000000", "0x3f800000"}:
+        # hack for fldi0/fldi1
+        yield "(float32_t){ "
+        yield elem.token.lower()
+        yield " }"
+    else:
+        yield elem.token.lower()
 
 def generate(elem):
     mapping = {
diff --git a/ast_transformers.py b/ast_transformers.py
index a9b2294..67d93c9 100644
--- a/ast_transformers.py
+++ b/ast_transformers.py
@@ -1,4 +1,5 @@
 from pprint import pprint
+from collections import defaultdict
 
 from parser import Tree
 from lexer import Identifier, Punctuator, IntegerConstant
@@ -19,7 +20,9 @@ def find_locals__walk_assignment_lhs(tree):
 def find_locals__walk_assignment(tree):
     if type(tree) is Tree:
         if tree.operation == "assignment":
-            yield from find_locals__walk_assignment_lhs(tree.children[0])
+            for name in find_locals__walk_assignment_lhs(tree.children[0]):
+                yield name, tree.children[1]
+
             for child in tree.children[1:]:
                 yield from find_locals__walk_assignment(child)
         else:
@@ -106,33 +109,75 @@ def transform_assignment_list(tree):
     else:
         return tree
 
+function_types = {
+    "FloatValue32": "float32_t",
+    "FloatValue64": "float64_t",
+    "FLOAT_LS": "float32_t",
+    "FLOAT_LD": "float64_t",
+    "FCNV_DS": "uint32_t",
+    "FCNV_SD": "float64_t",
+}
+
+name_types = {
+    "fps": "uint32_t",
+    "sr": "uint32_t",
+}
+
+def guess_type(name, tree, declared):
+    if name in name_types:
+        return name_types[name]
+    elif type(tree) is Tree and tree.operation == 'function_call':
+        assert type(tree.children[0]) is Identifier, tree
+        function_name = tree.children[0].token
+        if function_name in function_types:
+            return function_types[function_name]
+    elif type(tree) is Identifier and tree.token in declared:
+        return declared[tree.token]
+    elif type(tree) is IntegerConstant:
+        if tree.token.lower() in {"0x00000000", "0x3f800000"}:
+            # hack for fldi0/fldi1
+            return "float32_t"
+    # fallback
+    return 'int64_t'
+
 def transform_local_declarations(statements):
-    all_locals = []
-    for statement in statements:
-        all_locals.extend(find_locals__walk_assignment(statement))
+    def all_locals():
+        for statement in statements:
+            yield from find_locals__walk_assignment(statement)
 
-    set_locals = []
-    for local in all_locals:
-        if not any(s.token == local for s in set_locals):
-            set_locals.append(Identifier(line=-1, token=local))
+    declared = dict()
+    set_locals = defaultdict(list)
 
-    if set_locals:
+    for name, tree in all_locals():
+        if name in declared:
+            continue
+        identifier_type = guess_type(name, tree, declared)
+        declared[name] = identifier_type
+        set_locals[identifier_type].append(Identifier(line=-1, token=name))
+
+    for identifier_type, identifiers in set_locals.items():
         yield Tree(operation="expression_statement",
                    children=[Tree(operation="declaration",
-                                  children=[Identifier(line=-1, token="int64_t"), *set_locals])])
+                                  children=[Identifier(line=-1, token=identifier_type), *identifiers])])
 
-def transform_identifiers(tree):
+def transform_identifiers(tree, parent):
     if type(tree) is Tree:
         return Tree(
             operation=tree.operation,
-            children=[transform_identifiers(child) for child in tree.children]
+            children=[transform_identifiers(child, tree) for child in tree.children]
         )
     elif type(tree) is Identifier:
         token = tree
         if token.token in identifier_substitution.mapping:
+            new_name = identifier_substitution.mapping[token.token]
+            if token.token == 'FPSCR':
+                assert type(parent) is Tree, parent
+                if parent.operation == 'member':
+                    new_name = 'state->fpscr.bits'
+
             return Identifier(
                 line=token.line,
-                token=identifier_substitution.mapping[token.token]
+                token=new_name
             )
         else:
             return token
@@ -149,6 +194,8 @@ require_extra_arguments = {
     "ReadMemory8"  : "map",
     "ReadMemory16" : "map",
     "ReadMemory32" : "map",
+    "WriteMemoryPair32": "map",
+    "ReadMemoryPair32" : "map",
 }
 
 def transform_function_arguments(tree):
@@ -190,5 +237,5 @@ def transform_statements(statements):
     for statement in statements:
         statement = transform_assignment_list(statement)
         statement = transform_function_arguments(statement)
-        statement = transform_identifiers(statement)
+        statement = transform_identifiers(statement, None)
         yield statement
diff --git a/c/Makefile b/c/Makefile
index 3faa0d7..09abbcb 100644
--- a/c/Makefile
+++ b/c/Makefile
@@ -3,11 +3,16 @@ DEBUG = -g -gdwarf-4
 AFLAGS += --fatal-warnings
 
 CFLAGS += -falign-functions=4 -ffunction-sections -fdata-sections -fshort-enums
-CFLAGS += -Wall -Werror -Wfatal-errors -Wno-error=dangling-else
+CFLAGS += -Wall -Werror -Wfatal-errors -Wno-dangling-else
 CFLAGS += -std=c2x
 
 DEPFLAGS = -MMD -MP
 
+SOFTFLOAT ?= ../../SoftFloat-3e
+SOFTFLOAT_A ?= $(SOFTFLOAT)/build/Linux-x86_64-GCC/softfloat.a
+SOFTFLOAT_I ?= $(SOFTFLOAT)/source/include
+CFLAGS += -I$(SOFTFLOAT_I)
+
 CC = $(TARGET)gcc
 
 OBJS = \
@@ -17,7 +22,8 @@ OBJS = \
 	execute.o \
 	impl.o \
 	main.o \
-	ram.o
+	ram.o \
+	$(SOFTFLOAT_A)
 
 all: main
 
diff --git a/c/decode_execute.c b/c/decode_execute.c
index 17c721c..3fc969f 100644
--- a/c/decode_execute.c
+++ b/c/decode_execute.c
@@ -454,6 +454,104 @@ enum decode_status decode_and_execute_instruction(struct architectural_state * s
       exts_w__source_and_destination_operands(state, map, m, n);
       return DECODE__DEFINED;
     }
+    case 0b1111000000000000: // FADD FRm,FRn
+    {
+      uint32_t m = (code >> 4) & ((1 << 4) - 1);
+      uint32_t n = (code >> 8) & ((1 << 4) - 1);
+      fadd__source_and_destination_operands(state, map, m, n);
+      return DECODE__DEFINED;
+    }
+    case 0b1111000000000001: // FSUB FRm,FRn
+    {
+      uint32_t m = (code >> 4) & ((1 << 4) - 1);
+      uint32_t n = (code >> 8) & ((1 << 4) - 1);
+      fsub__source_and_destination_operands(state, map, m, n);
+      return DECODE__DEFINED;
+    }
+    case 0b1111000000000010: // FMUL FRm,FRn
+    {
+      uint32_t m = (code >> 4) & ((1 << 4) - 1);
+      uint32_t n = (code >> 8) & ((1 << 4) - 1);
+      fmul__source_and_destination_operands(state, map, m, n);
+      return DECODE__DEFINED;
+    }
+    case 0b1111000000000011: // FDIV FRm,FRn
+    {
+      uint32_t m = (code >> 4) & ((1 << 4) - 1);
+      uint32_t n = (code >> 8) & ((1 << 4) - 1);
+      fdiv__source_and_destination_operands(state, map, m, n);
+      return DECODE__DEFINED;
+    }
+    case 0b1111000000000100: // FCMP/EQ FRm,FRn
+    {
+      uint32_t m = (code >> 4) & ((1 << 4) - 1);
+      uint32_t n = (code >> 8) & ((1 << 4) - 1);
+      fcmp_eq__source_and_destination_operands(state, map, m, n);
+      return DECODE__DEFINED;
+    }
+    case 0b1111000000000101: // FCMP/GT FRm,FRn
+    {
+      uint32_t m = (code >> 4) & ((1 << 4) - 1);
+      uint32_t n = (code >> 8) & ((1 << 4) - 1);
+      fcmp_gt__source_and_destination_operands(state, map, m, n);
+      return DECODE__DEFINED;
+    }
+    case 0b1111000000000110: // FMOV.S @(R0,Rm),FRn
+    {
+      uint32_t m = (code >> 4) & ((1 << 4) - 1);
+      uint32_t n = (code >> 8) & ((1 << 4) - 1);
+      fmov_s__load_indexed_register_indirect(state, map, m, n);
+      return DECODE__DEFINED;
+    }
+    case 0b1111000000000111: // FMOV.S FRm,@(R0,Rn)
+    {
+      uint32_t m = (code >> 4) & ((1 << 4) - 1);
+      uint32_t n = (code >> 8) & ((1 << 4) - 1);
+      fmov_s__store_indexed_register_indirect(state, map, m, n);
+      return DECODE__DEFINED;
+    }
+    case 0b1111000000001000: // FMOV.S @Rm,FRn
+    {
+      uint32_t m = (code >> 4) & ((1 << 4) - 1);
+      uint32_t n = (code >> 8) & ((1 << 4) - 1);
+      fmov_s__load_register_direct_data_transfer(state, map, m, n);
+      return DECODE__DEFINED;
+    }
+    case 0b1111000000001001: // FMOV.S @Rm+,FRn
+    {
+      uint32_t m = (code >> 4) & ((1 << 4) - 1);
+      uint32_t n = (code >> 8) & ((1 << 4) - 1);
+      fmov_s__load_direct_data_transfer_from_register(state, map, m, n);
+      return DECODE__DEFINED;
+    }
+    case 0b1111000000001010: // FMOV.S FRm,@Rn
+    {
+      uint32_t m = (code >> 4) & ((1 << 4) - 1);
+      uint32_t n = (code >> 8) & ((1 << 4) - 1);
+      fmov_s__store_register_direct_data_transfer(state, map, m, n);
+      return DECODE__DEFINED;
+    }
+    case 0b1111000000001011: // FMOV.S FRm,@-Rn
+    {
+      uint32_t m = (code >> 4) & ((1 << 4) - 1);
+      uint32_t n = (code >> 8) & ((1 << 4) - 1);
+      fmov_s__store_direct_data_transfer_from_register(state, map, m, n);
+      return DECODE__DEFINED;
+    }
+    case 0b1111000000001100: // FMOV FRm,FRn
+    {
+      uint32_t m = (code >> 4) & ((1 << 4) - 1);
+      uint32_t n = (code >> 8) & ((1 << 4) - 1);
+      fmov__source_and_destination_operands(state, map, m, n);
+      return DECODE__DEFINED;
+    }
+    case 0b1111000000001110: // FMAC FR0,FRm,FRn
+    {
+      uint32_t m = (code >> 4) & ((1 << 4) - 1);
+      uint32_t n = (code >> 8) & ((1 << 4) - 1);
+      fmac__fr0_frm_frn(state, map, m, n);
+      return DECODE__DEFINED;
+    }
   }
   switch (code & 0b1111111100000000) {
     case 0b1000000000000000: // MOV.B R0,@(disp,Rn)
@@ -1026,6 +1124,60 @@ enum decode_status decode_and_execute_instruction(struct architectural_state * s
       ldc__transfer_to_dbr(state, map, m);
       return DECODE__DEFINED;
     }
+    case 0b1111000000001101: // FSTS FPUL,FRn
+    {
+      uint32_t n = (code >> 8) & ((1 << 4) - 1);
+      fsts__fpul_to_frn(state, map, n);
+      return DECODE__DEFINED;
+    }
+    case 0b1111000000011101: // FLDS FRm,FPUL
+    {
+      uint32_t m = (code >> 8) & ((1 << 4) - 1);
+      flds__frm_to_fpul(state, map, m);
+      return DECODE__DEFINED;
+    }
+    case 0b1111000000101101: // FLOAT FPUL,FRn
+    {
+      uint32_t n = (code >> 8) & ((1 << 4) - 1);
+      float__fpul_to_frn(state, map, n);
+      return DECODE__DEFINED;
+    }
+    case 0b1111000000111101: // FTRC FRm,FPUL
+    {
+      uint32_t m = (code >> 8) & ((1 << 4) - 1);
+      ftrc__frm_to_fpul(state, map, m);
+      return DECODE__DEFINED;
+    }
+    case 0b1111000001001101: // FNEG FRn
+    {
+      uint32_t n = (code >> 8) & ((1 << 4) - 1);
+      fneg__destination_operand_only(state, map, n);
+      return DECODE__DEFINED;
+    }
+    case 0b1111000001011101: // FABS FRn
+    {
+      uint32_t n = (code >> 8) & ((1 << 4) - 1);
+      fabs__destination_operand_only(state, map, n);
+      return DECODE__DEFINED;
+    }
+    case 0b1111000001101101: // FSQRT FRn
+    {
+      uint32_t n = (code >> 8) & ((1 << 4) - 1);
+      fsqrt__destination_operand_only(state, map, n);
+      return DECODE__DEFINED;
+    }
+    case 0b1111000010001101: // FLDI0 FRn
+    {
+      uint32_t n = (code >> 8) & ((1 << 4) - 1);
+      fldi0__destination_operand_only(state, map, n);
+      return DECODE__DEFINED;
+    }
+    case 0b1111000010011101: // FLDI1 FRn
+    {
+      uint32_t n = (code >> 8) & ((1 << 4) - 1);
+      fldi1__destination_operand_only(state, map, n);
+      return DECODE__DEFINED;
+    }
   }
   switch (code & 0b1111111111111111) {
     case 0b0000000000001000: // CLRT 
@@ -1078,6 +1230,16 @@ enum decode_status decode_and_execute_instruction(struct architectural_state * s
       sets__no_operand(state, map);
       return DECODE__DEFINED;
     }
+    case 0b1111001111111101: // FSCHG 
+    {
+      fschg__no_operand(state, map);
+      return DECODE__DEFINED;
+    }
+    case 0b1111101111111101: // FRCHG 
+    {
+      frchg__no_operand(state, map);
+      return DECODE__DEFINED;
+    }
   }
   switch (code & 0b1111000010001111) {
     case 0b0000000010000010: // STC Rm_BANK,Rn
@@ -1109,5 +1271,209 @@ enum decode_status decode_and_execute_instruction(struct architectural_state * s
       return DECODE__DEFINED;
     }
   }
+  switch (code & 0b1111000100011111) {
+    case 0b1111000000000000: // FADD DRm,DRn
+    {
+      uint32_t m = (code >> 5) & ((1 << 3) - 1);
+      uint32_t n = (code >> 9) & ((1 << 3) - 1);
+      fadd__source_and_destination_operands_double(state, map, m, n);
+      return DECODE__DEFINED;
+    }
+    case 0b1111000000000001: // FSUB DRm,DRn
+    {
+      uint32_t m = (code >> 5) & ((1 << 3) - 1);
+      uint32_t n = (code >> 9) & ((1 << 3) - 1);
+      fsub__source_and_destination_operands_double(state, map, m, n);
+      return DECODE__DEFINED;
+    }
+    case 0b1111000000000010: // FMUL DRm,DRn
+    {
+      uint32_t m = (code >> 5) & ((1 << 3) - 1);
+      uint32_t n = (code >> 9) & ((1 << 3) - 1);
+      fmul__source_and_destination_operands_double(state, map, m, n);
+      return DECODE__DEFINED;
+    }
+    case 0b1111000000000011: // FDIV DRm,DRn
+    {
+      uint32_t m = (code >> 5) & ((1 << 3) - 1);
+      uint32_t n = (code >> 9) & ((1 << 3) - 1);
+      fdiv__source_and_destination_operands_double(state, map, m, n);
+      return DECODE__DEFINED;
+    }
+    case 0b1111000000000100: // FCMP/EQ DRm,DRn
+    {
+      uint32_t m = (code >> 5) & ((1 << 3) - 1);
+      uint32_t n = (code >> 9) & ((1 << 3) - 1);
+      fcmp_eq__source_and_destination_operands_double(state, map, m, n);
+      return DECODE__DEFINED;
+    }
+    case 0b1111000000000101: // FCMP/GT DRm,DRn
+    {
+      uint32_t m = (code >> 5) & ((1 << 3) - 1);
+      uint32_t n = (code >> 9) & ((1 << 3) - 1);
+      fcmp_gt__source_and_destination_operands_double(state, map, m, n);
+      return DECODE__DEFINED;
+    }
+    case 0b1111000000001100: // FMOV DRm,DRn
+    {
+      uint32_t m = (code >> 5) & ((1 << 3) - 1);
+      uint32_t n = (code >> 9) & ((1 << 3) - 1);
+      fmov__source_and_destination_operands_double(state, map, m, n);
+      return DECODE__DEFINED;
+    }
+    case 0b1111000000011100: // FMOV XDm,DRn
+    {
+      uint32_t m = (code >> 5) & ((1 << 3) - 1);
+      uint32_t n = (code >> 9) & ((1 << 3) - 1);
+      fmov__bank_to_double(state, map, m, n);
+      return DECODE__DEFINED;
+    }
+    case 0b1111000100001100: // FMOV DRm,XDn
+    {
+      uint32_t m = (code >> 5) & ((1 << 3) - 1);
+      uint32_t n = (code >> 9) & ((1 << 3) - 1);
+      fmov__double_to_bank(state, map, m, n);
+      return DECODE__DEFINED;
+    }
+    case 0b1111000100011100: // FMOV XDm,XDn
+    {
+      uint32_t m = (code >> 5) & ((1 << 3) - 1);
+      uint32_t n = (code >> 9) & ((1 << 3) - 1);
+      fmov__source_and_destination_operands_bank(state, map, m, n);
+      return DECODE__DEFINED;
+    }
+  }
+  switch (code & 0b1111000100001111) {
+    case 0b1111000000000110: // FMOV @(R0,Rm),DRn
+    {
+      uint32_t m = (code >> 4) & ((1 << 4) - 1);
+      uint32_t n = (code >> 9) & ((1 << 3) - 1);
+      fmov__load_indexed_register_indirect_double(state, map, m, n);
+      return DECODE__DEFINED;
+    }
+    case 0b1111000000001000: // FMOV @Rm,DRn
+    {
+      uint32_t m = (code >> 4) & ((1 << 4) - 1);
+      uint32_t n = (code >> 9) & ((1 << 3) - 1);
+      fmov__load_register_direct_data_transfer_double(state, map, m, n);
+      return DECODE__DEFINED;
+    }
+    case 0b1111000000001001: // FMOV @Rm+,DRn
+    {
+      uint32_t m = (code >> 4) & ((1 << 4) - 1);
+      uint32_t n = (code >> 9) & ((1 << 3) - 1);
+      fmov__load_direct_data_transfer_from_register_double(state, map, m, n);
+      return DECODE__DEFINED;
+    }
+    case 0b1111000100000110: // FMOV @(R0,Rm),XDn
+    {
+      uint32_t m = (code >> 4) & ((1 << 4) - 1);
+      uint32_t n = (code >> 9) & ((1 << 3) - 1);
+      fmov__load_indexed_register_indirect_bank(state, map, m, n);
+      return DECODE__DEFINED;
+    }
+    case 0b1111000100001000: // FMOV @Rm,XDn
+    {
+      uint32_t m = (code >> 4) & ((1 << 4) - 1);
+      uint32_t n = (code >> 9) & ((1 << 3) - 1);
+      fmov__load_register_direct_data_transfer_bank(state, map, m, n);
+      return DECODE__DEFINED;
+    }
+    case 0b1111000100001001: // FMOV @Rm+,XDn
+    {
+      uint32_t m = (code >> 4) & ((1 << 4) - 1);
+      uint32_t n = (code >> 9) & ((1 << 3) - 1);
+      fmov__load_direct_data_transfer_from_register_bank(state, map, m, n);
+      return DECODE__DEFINED;
+    }
+  }
+  switch (code & 0b1111000000011111) {
+    case 0b1111000000000111: // FMOV DRm,@(R0,Rn)
+    {
+      uint32_t m = (code >> 5) & ((1 << 3) - 1);
+      uint32_t n = (code >> 8) & ((1 << 4) - 1);
+      fmov__store_indexed_register_indirect_double(state, map, m, n);
+      return DECODE__DEFINED;
+    }
+    case 0b1111000000001010: // FMOV DRm,@Rn
+    {
+      uint32_t m = (code >> 5) & ((1 << 3) - 1);
+      uint32_t n = (code >> 8) & ((1 << 4) - 1);
+      fmov__store_register_direct_data_transfer_double(state, map, m, n);
+      return DECODE__DEFINED;
+    }
+    case 0b1111000000001011: // FMOV DRm,@-Rn
+    {
+      uint32_t m = (code >> 5) & ((1 << 3) - 1);
+      uint32_t n = (code >> 8) & ((1 << 4) - 1);
+      fmov__store_direct_data_transfer_from_register_double(state, map, m, n);
+      return DECODE__DEFINED;
+    }
+    case 0b1111000000010111: // FMOV XDm,@(R0,Rn)
+    {
+      uint32_t m = (code >> 5) & ((1 << 3) - 1);
+      uint32_t n = (code >> 8) & ((1 << 4) - 1);
+      fmov__store_indexed_register_indirect_bank(state, map, m, n);
+      return DECODE__DEFINED;
+    }
+    case 0b1111000000011010: // FMOV XDm,@Rn
+    {
+      uint32_t m = (code >> 5) & ((1 << 3) - 1);
+      uint32_t n = (code >> 8) & ((1 << 4) - 1);
+      fmov__store_register_direct_data_transfer_bank(state, map, m, n);
+      return DECODE__DEFINED;
+    }
+    case 0b1111000000011011: // FMOV XDm,@-Rn
+    {
+      uint32_t m = (code >> 5) & ((1 << 3) - 1);
+      uint32_t n = (code >> 8) & ((1 << 4) - 1);
+      fmov__store_direct_data_transfer_from_register_bank(state, map, m, n);
+      return DECODE__DEFINED;
+    }
+  }
+  switch (code & 0b1111000111111111) {
+    case 0b1111000000101101: // FLOAT FPUL,DRn
+    {
+      uint32_t n = (code >> 9) & ((1 << 3) - 1);
+      float__fpul_to_drn(state, map, n);
+      return DECODE__DEFINED;
+    }
+    case 0b1111000000111101: // FTRC DRm,FPUL
+    {
+      uint32_t m = (code >> 9) & ((1 << 3) - 1);
+      ftrc__drm_to_fpul(state, map, m);
+      return DECODE__DEFINED;
+    }
+    case 0b1111000001001101: // FNEG DRn
+    {
+      uint32_t n = (code >> 9) & ((1 << 3) - 1);
+      fneg__destination_operand_only_double(state, map, n);
+      return DECODE__DEFINED;
+    }
+    case 0b1111000001011101: // FABS DRn
+    {
+      uint32_t n = (code >> 9) & ((1 << 3) - 1);
+      fabs__destination_operand_only_double(state, map, n);
+      return DECODE__DEFINED;
+    }
+    case 0b1111000001101101: // FSQRT DRn
+    {
+      uint32_t n = (code >> 9) & ((1 << 3) - 1);
+      fsqrt__destination_operand_only_double(state, map, n);
+      return DECODE__DEFINED;
+    }
+    case 0b1111000010101101: // FCNVSD FPUL,DRn
+    {
+      uint32_t n = (code >> 9) & ((1 << 3) - 1);
+      fcnvsd__fpul_to_drn(state, map, n);
+      return DECODE__DEFINED;
+    }
+    case 0b1111000010111101: // FCNVDS DRm,FPUL
+    {
+      uint32_t m = (code >> 9) & ((1 << 3) - 1);
+      fcnvds__drm_to_fpul(state, map, m);
+      return DECODE__DEFINED;
+    }
+  }
   return DECODE__UNDEFINED;
 }
diff --git a/c/decode_print.c b/c/decode_print.c
index 8433960..d25fda1 100644
--- a/c/decode_print.c
+++ b/c/decode_print.c
@@ -519,6 +519,118 @@ enum decode_status decode_and_print_instruction(struct architectural_state * sta
       *instruction_buf = "EXTS.W";
       return DECODE__DEFINED;
     }
+    case 0b1111000000000000: // FADD FRm,FRn
+    {
+      uint32_t m = (code >> 4) & ((1 << 4) - 1);
+      uint32_t n = (code >> 8) & ((1 << 4) - 1);
+      snprintf(operand_buf, size, "FR%d,FR%d", m, n);
+      *instruction_buf = "FADD";
+      return DECODE__DEFINED;
+    }
+    case 0b1111000000000001: // FSUB FRm,FRn
+    {
+      uint32_t m = (code >> 4) & ((1 << 4) - 1);
+      uint32_t n = (code >> 8) & ((1 << 4) - 1);
+      snprintf(operand_buf, size, "FR%d,FR%d", m, n);
+      *instruction_buf = "FSUB";
+      return DECODE__DEFINED;
+    }
+    case 0b1111000000000010: // FMUL FRm,FRn
+    {
+      uint32_t m = (code >> 4) & ((1 << 4) - 1);
+      uint32_t n = (code >> 8) & ((1 << 4) - 1);
+      snprintf(operand_buf, size, "FR%d,FR%d", m, n);
+      *instruction_buf = "FMUL";
+      return DECODE__DEFINED;
+    }
+    case 0b1111000000000011: // FDIV FRm,FRn
+    {
+      uint32_t m = (code >> 4) & ((1 << 4) - 1);
+      uint32_t n = (code >> 8) & ((1 << 4) - 1);
+      snprintf(operand_buf, size, "FR%d,FR%d", m, n);
+      *instruction_buf = "FDIV";
+      return DECODE__DEFINED;
+    }
+    case 0b1111000000000100: // FCMP/EQ FRm,FRn
+    {
+      uint32_t m = (code >> 4) & ((1 << 4) - 1);
+      uint32_t n = (code >> 8) & ((1 << 4) - 1);
+      snprintf(operand_buf, size, "FR%d,FR%d", m, n);
+      *instruction_buf = "FCMP/EQ";
+      return DECODE__DEFINED;
+    }
+    case 0b1111000000000101: // FCMP/GT FRm,FRn
+    {
+      uint32_t m = (code >> 4) & ((1 << 4) - 1);
+      uint32_t n = (code >> 8) & ((1 << 4) - 1);
+      snprintf(operand_buf, size, "FR%d,FR%d", m, n);
+      *instruction_buf = "FCMP/GT";
+      return DECODE__DEFINED;
+    }
+    case 0b1111000000000110: // FMOV.S @(R0,Rm),FRn
+    {
+      uint32_t m = (code >> 4) & ((1 << 4) - 1);
+      uint32_t n = (code >> 8) & ((1 << 4) - 1);
+      snprintf(operand_buf, size, "@(R0,R%d),FR%d", m, n);
+      *instruction_buf = "FMOV.S";
+      return DECODE__DEFINED;
+    }
+    case 0b1111000000000111: // FMOV.S FRm,@(R0,Rn)
+    {
+      uint32_t m = (code >> 4) & ((1 << 4) - 1);
+      uint32_t n = (code >> 8) & ((1 << 4) - 1);
+      snprintf(operand_buf, size, "FR%d,@(R0,R%d)", m, n);
+      *instruction_buf = "FMOV.S";
+      return DECODE__DEFINED;
+    }
+    case 0b1111000000001000: // FMOV.S @Rm,FRn
+    {
+      uint32_t m = (code >> 4) & ((1 << 4) - 1);
+      uint32_t n = (code >> 8) & ((1 << 4) - 1);
+      snprintf(operand_buf, size, "@R%d,FR%d", m, n);
+      *instruction_buf = "FMOV.S";
+      return DECODE__DEFINED;
+    }
+    case 0b1111000000001001: // FMOV.S @Rm+,FRn
+    {
+      uint32_t m = (code >> 4) & ((1 << 4) - 1);
+      uint32_t n = (code >> 8) & ((1 << 4) - 1);
+      snprintf(operand_buf, size, "@R%d+,FR%d", m, n);
+      *instruction_buf = "FMOV.S";
+      return DECODE__DEFINED;
+    }
+    case 0b1111000000001010: // FMOV.S FRm,@Rn
+    {
+      uint32_t m = (code >> 4) & ((1 << 4) - 1);
+      uint32_t n = (code >> 8) & ((1 << 4) - 1);
+      snprintf(operand_buf, size, "FR%d,@R%d", m, n);
+      *instruction_buf = "FMOV.S";
+      return DECODE__DEFINED;
+    }
+    case 0b1111000000001011: // FMOV.S FRm,@-Rn
+    {
+      uint32_t m = (code >> 4) & ((1 << 4) - 1);
+      uint32_t n = (code >> 8) & ((1 << 4) - 1);
+      snprintf(operand_buf, size, "FR%d,@-R%d", m, n);
+      *instruction_buf = "FMOV.S";
+      return DECODE__DEFINED;
+    }
+    case 0b1111000000001100: // FMOV FRm,FRn
+    {
+      uint32_t m = (code >> 4) & ((1 << 4) - 1);
+      uint32_t n = (code >> 8) & ((1 << 4) - 1);
+      snprintf(operand_buf, size, "FR%d,FR%d", m, n);
+      *instruction_buf = "FMOV";
+      return DECODE__DEFINED;
+    }
+    case 0b1111000000001110: // FMAC FR0,FRm,FRn
+    {
+      uint32_t m = (code >> 4) & ((1 << 4) - 1);
+      uint32_t n = (code >> 8) & ((1 << 4) - 1);
+      snprintf(operand_buf, size, "FR0,FR%d,FR%d", m, n);
+      *instruction_buf = "FMAC";
+      return DECODE__DEFINED;
+    }
   }
   switch (code & 0b1111111100000000) {
     case 0b1000000000000000: // MOV.B R0,@(disp,Rn)
@@ -1185,6 +1297,69 @@ enum decode_status decode_and_print_instruction(struct architectural_state * sta
       *instruction_buf = "LDC";
       return DECODE__DEFINED;
     }
+    case 0b1111000000001101: // FSTS FPUL,FRn
+    {
+      uint32_t n = (code >> 8) & ((1 << 4) - 1);
+      snprintf(operand_buf, size, "FPUL,FR%d", n);
+      *instruction_buf = "FSTS";
+      return DECODE__DEFINED;
+    }
+    case 0b1111000000011101: // FLDS FRm,FPUL
+    {
+      uint32_t m = (code >> 8) & ((1 << 4) - 1);
+      snprintf(operand_buf, size, "FR%d,FPUL", m);
+      *instruction_buf = "FLDS";
+      return DECODE__DEFINED;
+    }
+    case 0b1111000000101101: // FLOAT FPUL,FRn
+    {
+      uint32_t n = (code >> 8) & ((1 << 4) - 1);
+      snprintf(operand_buf, size, "FPUL,FR%d", n);
+      *instruction_buf = "FLOAT";
+      return DECODE__DEFINED;
+    }
+    case 0b1111000000111101: // FTRC FRm,FPUL
+    {
+      uint32_t m = (code >> 8) & ((1 << 4) - 1);
+      snprintf(operand_buf, size, "FR%d,FPUL", m);
+      *instruction_buf = "FTRC";
+      return DECODE__DEFINED;
+    }
+    case 0b1111000001001101: // FNEG FRn
+    {
+      uint32_t n = (code >> 8) & ((1 << 4) - 1);
+      snprintf(operand_buf, size, "FR%d", n);
+      *instruction_buf = "FNEG";
+      return DECODE__DEFINED;
+    }
+    case 0b1111000001011101: // FABS FRn
+    {
+      uint32_t n = (code >> 8) & ((1 << 4) - 1);
+      snprintf(operand_buf, size, "FR%d", n);
+      *instruction_buf = "FABS";
+      return DECODE__DEFINED;
+    }
+    case 0b1111000001101101: // FSQRT FRn
+    {
+      uint32_t n = (code >> 8) & ((1 << 4) - 1);
+      snprintf(operand_buf, size, "FR%d", n);
+      *instruction_buf = "FSQRT";
+      return DECODE__DEFINED;
+    }
+    case 0b1111000010001101: // FLDI0 FRn
+    {
+      uint32_t n = (code >> 8) & ((1 << 4) - 1);
+      snprintf(operand_buf, size, "FR%d", n);
+      *instruction_buf = "FLDI0";
+      return DECODE__DEFINED;
+    }
+    case 0b1111000010011101: // FLDI1 FRn
+    {
+      uint32_t n = (code >> 8) & ((1 << 4) - 1);
+      snprintf(operand_buf, size, "FR%d", n);
+      *instruction_buf = "FLDI1";
+      return DECODE__DEFINED;
+    }
   }
   switch (code & 0b1111111111111111) {
     case 0b0000000000001000: // CLRT 
@@ -1247,6 +1422,18 @@ enum decode_status decode_and_print_instruction(struct architectural_state * sta
       *instruction_buf = "SETS";
       return DECODE__DEFINED;
     }
+    case 0b1111001111111101: // FSCHG 
+    {
+      operand_buf[0] = 0;
+      *instruction_buf = "FSCHG";
+      return DECODE__DEFINED;
+    }
+    case 0b1111101111111101: // FRCHG 
+    {
+      operand_buf[0] = 0;
+      *instruction_buf = "FRCHG";
+      return DECODE__DEFINED;
+    }
   }
   switch (code & 0b1111000010001111) {
     case 0b0000000010000010: // STC Rm_BANK,Rn
@@ -1282,5 +1469,238 @@ enum decode_status decode_and_print_instruction(struct architectural_state * sta
       return DECODE__DEFINED;
     }
   }
+  switch (code & 0b1111000100011111) {
+    case 0b1111000000000000: // FADD DRm,DRn
+    {
+      uint32_t m = (code >> 5) & ((1 << 3) - 1);
+      uint32_t n = (code >> 9) & ((1 << 3) - 1);
+      snprintf(operand_buf, size, "DR%d,DR%d", m, n);
+      *instruction_buf = "FADD";
+      return DECODE__DEFINED;
+    }
+    case 0b1111000000000001: // FSUB DRm,DRn
+    {
+      uint32_t m = (code >> 5) & ((1 << 3) - 1);
+      uint32_t n = (code >> 9) & ((1 << 3) - 1);
+      snprintf(operand_buf, size, "DR%d,DR%d", m, n);
+      *instruction_buf = "FSUB";
+      return DECODE__DEFINED;
+    }
+    case 0b1111000000000010: // FMUL DRm,DRn
+    {
+      uint32_t m = (code >> 5) & ((1 << 3) - 1);
+      uint32_t n = (code >> 9) & ((1 << 3) - 1);
+      snprintf(operand_buf, size, "DR%d,DR%d", m, n);
+      *instruction_buf = "FMUL";
+      return DECODE__DEFINED;
+    }
+    case 0b1111000000000011: // FDIV DRm,DRn
+    {
+      uint32_t m = (code >> 5) & ((1 << 3) - 1);
+      uint32_t n = (code >> 9) & ((1 << 3) - 1);
+      snprintf(operand_buf, size, "DR%d,DR%d", m, n);
+      *instruction_buf = "FDIV";
+      return DECODE__DEFINED;
+    }
+    case 0b1111000000000100: // FCMP/EQ DRm,DRn
+    {
+      uint32_t m = (code >> 5) & ((1 << 3) - 1);
+      uint32_t n = (code >> 9) & ((1 << 3) - 1);
+      snprintf(operand_buf, size, "DR%d,DR%d", m, n);
+      *instruction_buf = "FCMP/EQ";
+      return DECODE__DEFINED;
+    }
+    case 0b1111000000000101: // FCMP/GT DRm,DRn
+    {
+      uint32_t m = (code >> 5) & ((1 << 3) - 1);
+      uint32_t n = (code >> 9) & ((1 << 3) - 1);
+      snprintf(operand_buf, size, "DR%d,DR%d", m, n);
+      *instruction_buf = "FCMP/GT";
+      return DECODE__DEFINED;
+    }
+    case 0b1111000000001100: // FMOV DRm,DRn
+    {
+      uint32_t m = (code >> 5) & ((1 << 3) - 1);
+      uint32_t n = (code >> 9) & ((1 << 3) - 1);
+      snprintf(operand_buf, size, "DR%d,DR%d", m, n);
+      *instruction_buf = "FMOV";
+      return DECODE__DEFINED;
+    }
+    case 0b1111000000011100: // FMOV XDm,DRn
+    {
+      uint32_t m = (code >> 5) & ((1 << 3) - 1);
+      uint32_t n = (code >> 9) & ((1 << 3) - 1);
+      snprintf(operand_buf, size, "XD%d,DR%d", m, n);
+      *instruction_buf = "FMOV";
+      return DECODE__DEFINED;
+    }
+    case 0b1111000100001100: // FMOV DRm,XDn
+    {
+      uint32_t m = (code >> 5) & ((1 << 3) - 1);
+      uint32_t n = (code >> 9) & ((1 << 3) - 1);
+      snprintf(operand_buf, size, "DR%d,XD%d", m, n);
+      *instruction_buf = "FMOV";
+      return DECODE__DEFINED;
+    }
+    case 0b1111000100011100: // FMOV XDm,XDn
+    {
+      uint32_t m = (code >> 5) & ((1 << 3) - 1);
+      uint32_t n = (code >> 9) & ((1 << 3) - 1);
+      snprintf(operand_buf, size, "XD%d,XD%d", m, n);
+      *instruction_buf = "FMOV";
+      return DECODE__DEFINED;
+    }
+  }
+  switch (code & 0b1111000100001111) {
+    case 0b1111000000000110: // FMOV @(R0,Rm),DRn
+    {
+      uint32_t m = (code >> 4) & ((1 << 4) - 1);
+      uint32_t n = (code >> 9) & ((1 << 3) - 1);
+      snprintf(operand_buf, size, "@(R0,R%d),DR%d", m, n);
+      *instruction_buf = "FMOV";
+      return DECODE__DEFINED;
+    }
+    case 0b1111000000001000: // FMOV @Rm,DRn
+    {
+      uint32_t m = (code >> 4) & ((1 << 4) - 1);
+      uint32_t n = (code >> 9) & ((1 << 3) - 1);
+      snprintf(operand_buf, size, "@R%d,DR%d", m, n);
+      *instruction_buf = "FMOV";
+      return DECODE__DEFINED;
+    }
+    case 0b1111000000001001: // FMOV @Rm+,DRn
+    {
+      uint32_t m = (code >> 4) & ((1 << 4) - 1);
+      uint32_t n = (code >> 9) & ((1 << 3) - 1);
+      snprintf(operand_buf, size, "@R%d+,DR%d", m, n);
+      *instruction_buf = "FMOV";
+      return DECODE__DEFINED;
+    }
+    case 0b1111000100000110: // FMOV @(R0,Rm),XDn
+    {
+      uint32_t m = (code >> 4) & ((1 << 4) - 1);
+      uint32_t n = (code >> 9) & ((1 << 3) - 1);
+      snprintf(operand_buf, size, "@(R0,R%d),XD%d", m, n);
+      *instruction_buf = "FMOV";
+      return DECODE__DEFINED;
+    }
+    case 0b1111000100001000: // FMOV @Rm,XDn
+    {
+      uint32_t m = (code >> 4) & ((1 << 4) - 1);
+      uint32_t n = (code >> 9) & ((1 << 3) - 1);
+      snprintf(operand_buf, size, "@R%d,XD%d", m, n);
+      *instruction_buf = "FMOV";
+      return DECODE__DEFINED;
+    }
+    case 0b1111000100001001: // FMOV @Rm+,XDn
+    {
+      uint32_t m = (code >> 4) & ((1 << 4) - 1);
+      uint32_t n = (code >> 9) & ((1 << 3) - 1);
+      snprintf(operand_buf, size, "@R%d+,XD%d", m, n);
+      *instruction_buf = "FMOV";
+      return DECODE__DEFINED;
+    }
+  }
+  switch (code & 0b1111000000011111) {
+    case 0b1111000000000111: // FMOV DRm,@(R0,Rn)
+    {
+      uint32_t m = (code >> 5) & ((1 << 3) - 1);
+      uint32_t n = (code >> 8) & ((1 << 4) - 1);
+      snprintf(operand_buf, size, "DR%d,@(R0,R%d)", m, n);
+      *instruction_buf = "FMOV";
+      return DECODE__DEFINED;
+    }
+    case 0b1111000000001010: // FMOV DRm,@Rn
+    {
+      uint32_t m = (code >> 5) & ((1 << 3) - 1);
+      uint32_t n = (code >> 8) & ((1 << 4) - 1);
+      snprintf(operand_buf, size, "DR%d,@R%d", m, n);
+      *instruction_buf = "FMOV";
+      return DECODE__DEFINED;
+    }
+    case 0b1111000000001011: // FMOV DRm,@-Rn
+    {
+      uint32_t m = (code >> 5) & ((1 << 3) - 1);
+      uint32_t n = (code >> 8) & ((1 << 4) - 1);
+      snprintf(operand_buf, size, "DR%d,@-R%d", m, n);
+      *instruction_buf = "FMOV";
+      return DECODE__DEFINED;
+    }
+    case 0b1111000000010111: // FMOV XDm,@(R0,Rn)
+    {
+      uint32_t m = (code >> 5) & ((1 << 3) - 1);
+      uint32_t n = (code >> 8) & ((1 << 4) - 1);
+      snprintf(operand_buf, size, "XD%d,@(R0,R%d)", m, n);
+      *instruction_buf = "FMOV";
+      return DECODE__DEFINED;
+    }
+    case 0b1111000000011010: // FMOV XDm,@Rn
+    {
+      uint32_t m = (code >> 5) & ((1 << 3) - 1);
+      uint32_t n = (code >> 8) & ((1 << 4) - 1);
+      snprintf(operand_buf, size, "XD%d,@R%d", m, n);
+      *instruction_buf = "FMOV";
+      return DECODE__DEFINED;
+    }
+    case 0b1111000000011011: // FMOV XDm,@-Rn
+    {
+      uint32_t m = (code >> 5) & ((1 << 3) - 1);
+      uint32_t n = (code >> 8) & ((1 << 4) - 1);
+      snprintf(operand_buf, size, "XD%d,@-R%d", m, n);
+      *instruction_buf = "FMOV";
+      return DECODE__DEFINED;
+    }
+  }
+  switch (code & 0b1111000111111111) {
+    case 0b1111000000101101: // FLOAT FPUL,DRn
+    {
+      uint32_t n = (code >> 9) & ((1 << 3) - 1);
+      snprintf(operand_buf, size, "FPUL,DR%d", n);
+      *instruction_buf = "FLOAT";
+      return DECODE__DEFINED;
+    }
+    case 0b1111000000111101: // FTRC DRm,FPUL
+    {
+      uint32_t m = (code >> 9) & ((1 << 3) - 1);
+      snprintf(operand_buf, size, "DR%d,FPUL", m);
+      *instruction_buf = "FTRC";
+      return DECODE__DEFINED;
+    }
+    case 0b1111000001001101: // FNEG DRn
+    {
+      uint32_t n = (code >> 9) & ((1 << 3) - 1);
+      snprintf(operand_buf, size, "DR%d", n);
+      *instruction_buf = "FNEG";
+      return DECODE__DEFINED;
+    }
+    case 0b1111000001011101: // FABS DRn
+    {
+      uint32_t n = (code >> 9) & ((1 << 3) - 1);
+      snprintf(operand_buf, size, "DR%d", n);
+      *instruction_buf = "FABS";
+      return DECODE__DEFINED;
+    }
+    case 0b1111000001101101: // FSQRT DRn
+    {
+      uint32_t n = (code >> 9) & ((1 << 3) - 1);
+      snprintf(operand_buf, size, "DR%d", n);
+      *instruction_buf = "FSQRT";
+      return DECODE__DEFINED;
+    }
+    case 0b1111000010101101: // FCNVSD FPUL,DRn
+    {
+      uint32_t n = (code >> 9) & ((1 << 3) - 1);
+      snprintf(operand_buf, size, "FPUL,DR%d", n);
+      *instruction_buf = "FCNVSD";
+      return DECODE__DEFINED;
+    }
+    case 0b1111000010111101: // FCNVDS DRm,FPUL
+    {
+      uint32_t m = (code >> 9) & ((1 << 3) - 1);
+      snprintf(operand_buf, size, "DR%d,FPUL", m);
+      *instruction_buf = "FCNVDS";
+      return DECODE__DEFINED;
+    }
+  }
   return DECODE__UNDEFINED;
 }
diff --git a/c/exception.c b/c/exception.c
index 470aa9b..7e5112f 100644
--- a/c/exception.c
+++ b/c/exception.c
@@ -349,7 +349,7 @@ void UBRKAFTER(struct architectural_state * state)
   //PC = (BRCR.UBDE==1 ? DBR : VBR + H00000100);
 }
 
-void FPUEXC(struct architectural_state * state)
+void FPUEXC(struct architectural_state * state, uint32_t fps)
 {
   exception(state, "FPUEXC");
   SPC = PC;
diff --git a/c/exception.h b/c/exception.h
index 6a71a8e..6311e12 100644
--- a/c/exception.h
+++ b/c/exception.h
@@ -26,6 +26,6 @@ void FPUDIS(struct architectural_state * state);
 void SLOTFPUDIS(struct architectural_state * state);
 void UBRKBEFORE(struct architectural_state * state);
 void UBRKAFTER(struct architectural_state * state);
-void FPUEXC(struct architectural_state * state);
+void FPUEXC(struct architectural_state * state, uint32_t fps);
 void NMI(struct architectural_state * state);
 void IRLINT(struct architectural_state * state);
diff --git a/c/fpu.h b/c/fpu.h
new file mode 100644
index 0000000..417bec1
--- /dev/null
+++ b/c/fpu.h
@@ -0,0 +1,539 @@
+#pragma once
+
+#include <assert.h>
+#include <softfloat.h>
+
+#include "status_bits.h"
+
+/* floating point */
+static inline struct fpscr_bits _fpscr_bits(uint32_t fpscr)
+{
+  union {
+    struct fpscr_bits bits;
+    uint32_t value;
+  } fpscr_union;
+  fpscr_union.value = fpscr;
+  return fpscr_union.bits;
+}
+
+static inline bool fpu_flag_i(uint32_t fps)
+{
+  return _fpscr_bits(fps).flag_inexact;
+}
+
+static inline bool fpu_flag_u(uint32_t fps)
+{
+  return _fpscr_bits(fps).flag_underflow;
+}
+
+static inline bool fpu_flag_o(uint32_t fps)
+{
+  return _fpscr_bits(fps).flag_overflow;
+}
+
+static inline bool fpu_flag_z(uint32_t fps)
+{
+  return _fpscr_bits(fps).flag_divide_by_zero;
+}
+
+static inline bool fpu_flag_v(uint32_t fps)
+{
+  return _fpscr_bits(fps).flag_invalid;
+}
+
+static inline bool fpu_cause_i(uint32_t fps)
+{
+  return _fpscr_bits(fps).cause_inexact;
+}
+
+static inline bool fpu_cause_u(uint32_t fps)
+{
+  return _fpscr_bits(fps).cause_underflow;
+}
+
+static inline bool fpu_cause_o(uint32_t fps)
+{
+  return _fpscr_bits(fps).cause_overflow;
+}
+
+static inline bool fpu_cause_z(uint32_t fps)
+{
+  return _fpscr_bits(fps).cause_divide_by_zero;
+}
+
+static inline bool fpu_cause_v(uint32_t fps)
+{
+  return _fpscr_bits(fps).cause_invalid;
+}
+
+static inline bool fpu_cause_e(uint32_t fps)
+{
+  return _fpscr_bits(fps).cause_fpu_error;
+}
+
+static inline bool fpu_enable_i(uint32_t fps)
+{
+  return _fpscr_bits(fps).enable_inexact;
+}
+
+static inline bool fpu_enable_u(uint32_t fps)
+{
+  return _fpscr_bits(fps).enable_underflow;
+}
+
+static inline bool fpu_enable_o(uint32_t fps)
+{
+  return _fpscr_bits(fps).enable_overflow;
+}
+
+static inline bool fpu_enable_z(uint32_t fps)
+{
+  return _fpscr_bits(fps).enable_divide_by_zero;
+}
+
+static inline bool fpu_enable_v(uint32_t fps)
+{
+  return _fpscr_bits(fps).enable_invalid;
+}
+
+static inline void update_fpscr(uint32_t * fps)
+{
+  struct fpscr_bits * fpscr = (struct fpscr_bits *)fps;
+  bool inexact   = (softfloat_exceptionFlags & softfloat_flag_inexact)   != 0;
+  bool underflow = (softfloat_exceptionFlags & softfloat_flag_underflow) != 0;
+  bool overflow  = (softfloat_exceptionFlags & softfloat_flag_overflow)  != 0;
+  bool infinite  = (softfloat_exceptionFlags & softfloat_flag_infinite)  != 0;
+  bool invalid   = (softfloat_exceptionFlags & softfloat_flag_invalid)   != 0;
+
+  fpscr->flag_inexact        |= inexact;
+  fpscr->flag_underflow      |= underflow;
+  fpscr->flag_overflow       |= overflow;
+  fpscr->flag_divide_by_zero |= infinite;
+  fpscr->flag_invalid        |= invalid;
+
+  fpscr->cause_inexact        = inexact;
+  fpscr->cause_underflow      = underflow;
+  fpscr->cause_overflow       = overflow;
+  fpscr->cause_divide_by_zero = infinite;
+  fpscr->cause_invalid        = invalid;
+}
+
+static inline void set_rounding_mode(uint32_t * fps)
+{
+  struct fpscr_bits * fpscr = (struct fpscr_bits *)fps;
+  switch (fpscr->rm) {
+  case 0b00:
+    softfloat_roundingMode = softfloat_round_near_even;
+    break;
+  case 0b01:
+    softfloat_roundingMode = softfloat_round_minMag;
+    break;
+  default:
+    // undefined rounding mode
+    break;
+  }
+}
+
+static inline bool is_nan_f32(float32_t a)
+{
+  bool exp = (a.v & 0x7f800000) == 0x7f800000;
+  bool sig = (a.v & 0x007fffff) != 0;
+  return exp & sig;
+}
+
+static inline bool is_denormal_f32(float32_t a)
+{
+  bool exp = (a.v & 0x7f800000) == 0x00000000;
+  bool sig = (a.v & 0x007fffff) != 0;
+  return exp & sig;
+}
+
+static inline float32_t flush_to_zero_f32(float32_t a)
+{
+  return (float32_t){ a.v & 0x80000000 };
+}
+
+static inline bool dn_f32_f32_f32(float32_t * a, float32_t * b, float32_t * c, uint32_t * fps)
+{
+  struct fpscr_bits * fpscr = (struct fpscr_bits *)fps;
+  switch (fpscr->dn) {
+  case 0:
+    /*
+     * an FPU error is signaled if FPSCR.DN is zero, neither input is
+     * a NaN and either input is a denormalized number.
+     */
+    if ((!is_nan_f32(*a)) && (!is_nan_f32(*b)) && (!is_nan_f32(*c))) { // neither input is a NaN
+      if (is_denormal_f32(*a) || is_denormal_f32(*b) || is_denormal_f32(*c)) { // either input is denormalized
+	fpscr->cause_fpu_error = 1;
+	return false; // do not continue
+      }
+    }
+    return true;
+  case 1:
+    /*
+     * When FPSCR.DN is 1, a positive denormalized number is treated as
+     * +0 and a negative denormalized number as -0. This flush-to-zero
+     * treatment is applied before exception detection and special case
+     * handling.
+     */
+    if (is_denormal_f32(*a)) *a = flush_to_zero_f32(*a);
+    if (is_denormal_f32(*b)) *b = flush_to_zero_f32(*b);
+    if (is_denormal_f32(*c)) *c = flush_to_zero_f32(*c);
+    return true;
+  default:
+    assert(false);
+  }
+}
+
+static inline bool dn_f32_f32(float32_t * a, float32_t * b, uint32_t * fps)
+{
+  struct fpscr_bits * fpscr = (struct fpscr_bits *)fps;
+  switch (fpscr->dn) {
+  case 0:
+    /*
+     * an FPU error is signaled if FPSCR.DN is zero, neither input is
+     * a NaN and either input is a denormalized number.
+     */
+    if ((!is_nan_f32(*a)) && (!is_nan_f32(*b))) { // neither input is a NaN
+      if (is_denormal_f32(*a) || is_denormal_f32(*b)) { // either input is denormalized
+	fpscr->cause_fpu_error = 1;
+	return false; // do not continue
+      }
+    }
+    return true;
+  case 1:
+    /*
+     * When FPSCR.DN is 1, a positive denormalized number is treated as
+     * +0 and a negative denormalized number as -0. This flush-to-zero
+     * treatment is applied before exception detection and special case
+     * handling.
+     */
+    if (is_denormal_f32(*a)) *a = flush_to_zero_f32(*a);
+    if (is_denormal_f32(*b)) *b = flush_to_zero_f32(*b);
+    return true;
+  default:
+    assert(false);
+  }
+}
+
+static inline bool dn_f32(float32_t * a,uint32_t * fps)
+{
+  struct fpscr_bits * fpscr = (struct fpscr_bits *)fps;
+  switch (fpscr->dn) {
+  case 0:
+    /*
+     * an FPU error is signaled if FPSCR.DN is zero, neither input is
+     * a NaN and either input is a denormalized number.
+     */
+    if (is_denormal_f32(*a)) { // either input is denormalized
+      fpscr->cause_fpu_error = 1;
+      return true; // do not continue
+    }
+    return false;
+  case 1:
+    /*
+     * When FPSCR.DN is 1, a positive denormalized number is treated as
+     * +0 and a negative denormalized number as -0. This flush-to-zero
+     * treatment is applied before exception detection and special case
+     * handling.
+     */
+    if (is_denormal_f32(*a)) *a = flush_to_zero_f32(*a);
+    return false;
+  default:
+    assert(false);
+  }
+}
+
+static inline bool is_nan_f64(float64_t a)
+{
+  bool exp = (a.v & 0x7ff00000'00000000) == 0x7ff00000'00000000;
+  bool sig = (a.v & 0x000fffff'ffffffff) != 0;
+  return exp & sig;
+}
+
+static inline bool is_denormal_f64(float64_t a)
+{
+  bool exp = (a.v & 0x7ff00000'00000000) == 0x00000000'00000000;
+  bool sig = (a.v & 0x000fffff'ffffffff) != 0;
+  return exp & sig;
+}
+
+static inline float64_t flush_to_zero_f64(float64_t a)
+{
+  return (float64_t){ a.v & 0x80000000'00000000 };
+}
+
+static inline bool dn_f64_f64(float64_t * a, float64_t * b, uint32_t * fps)
+{
+  struct fpscr_bits * fpscr = (struct fpscr_bits *)fps;
+  switch (fpscr->dn) {
+  case 0:
+    /*
+     * an FPU error is signaled if FPSCR.DN is zero, neither input is
+     * a NaN and either input is a denormalized number.
+     */
+    if ((!is_nan_f64(*a)) && (!is_nan_f64(*b))) { // neither input is a NaN
+      if (is_denormal_f64(*a) || is_denormal_f64(*b)) { // either input is denormalized
+	fpscr->cause_fpu_error = 1;
+	return true; // do not continue
+      }
+    }
+    return false;
+  case 1:
+    /*
+     * When FPSCR.DN is 1, a positive denormalized number is treated as
+     * +0 and a negative denormalized number as -0. This flush-to-zero
+     * treatment is applied before exception detection and special case
+     * handling.
+     */
+    if (is_denormal_f64(*a)) *a = flush_to_zero_f64(*a);
+    if (is_denormal_f64(*b)) *b = flush_to_zero_f64(*b);
+    return false;
+  default:
+    assert(false);
+  }
+}
+
+static inline bool dn_f64(float64_t * a, uint32_t * fps)
+{
+  struct fpscr_bits * fpscr = (struct fpscr_bits *)fps;
+  switch (fpscr->dn) {
+  case 0:
+    /*
+     * an FPU error is signaled if FPSCR.DN is zero, neither input is
+     * a NaN and either input is a denormalized number.
+     */
+    if (is_denormal_f64(*a)) { // either input is denormalized
+      fpscr->cause_fpu_error = 1;
+      return true; // do not continue
+    }
+    return false;
+  case 1:
+    /*
+     * When FPSCR.DN is 1, a positive denormalized number is treated as
+     * +0 and a negative denormalized number as -0. This flush-to-zero
+     * treatment is applied before exception detection and special case
+     * handling.
+     */
+    if (is_denormal_f64(*a)) *a = flush_to_zero_f64(*a);
+    return false;
+  default:
+    assert(false);
+  }
+}
+
+static inline void fadd_s(float32_t op1, float32_t * op2, uint32_t * fps)
+{
+  if (dn_f32_f32(&op1, op2, fps)) return;
+
+  set_rounding_mode(fps);
+  *op2 = f32_add(op1, *op2);
+  update_fpscr(fps);
+}
+
+static inline void fadd_d(float64_t op1, float64_t * op2, uint32_t * fps)
+{
+  if (dn_f64_f64(&op1, op2, fps)) return;
+
+  set_rounding_mode(fps);
+  *op2 = f64_add(op1, *op2);
+  update_fpscr(fps);
+}
+
+static inline void fsub_s(float32_t * op2, float32_t op1, uint32_t * fps)
+{
+  if (dn_f32_f32(op2, &op1, fps)) return;
+
+  set_rounding_mode(fps);
+  *op2 = f32_sub(*op2, op1);
+  update_fpscr(fps);
+}
+
+static inline void fsub_d(float64_t * op2, float64_t op1, uint32_t * fps)
+{
+  if (dn_f64_f64(op2, &op1, fps)) return;
+
+  set_rounding_mode(fps);
+  *op2 = f64_sub(*op2, op1);
+  update_fpscr(fps);
+}
+
+static inline void fmul_s(float32_t op1, float32_t * op2, uint32_t * fps)
+{
+  if (dn_f32_f32(&op1, op2, fps)) return;
+
+  set_rounding_mode(fps);
+  *op2 = f32_mul(op1, *op2);
+  update_fpscr(fps);
+}
+
+static inline void fmul_d(float64_t op1, float64_t * op2, uint32_t * fps)
+{
+  if (dn_f64_f64(&op1, op2, fps)) return;
+
+  set_rounding_mode(fps);
+  *op2 = f64_mul(op1, *op2);
+  update_fpscr(fps);
+}
+
+static inline void fdiv_s(float32_t * op2, float32_t op1, uint32_t * fps)
+{
+  if (dn_f32_f32(op2, &op1, fps)) return;
+
+  set_rounding_mode(fps);
+  *op2 = f32_div(*op2, op1);
+  update_fpscr(fps);
+}
+
+static inline void fdiv_d(float64_t * op2, float64_t op1, uint32_t * fps)
+{
+  if (dn_f64_f64(op2, &op1, fps)) return;
+
+  set_rounding_mode(fps);
+  *op2 = f64_div(*op2, op1);
+  update_fpscr(fps);
+}
+
+static inline float32_t float_ls(int32_t fpul, uint32_t * fps)
+{
+  set_rounding_mode(fps);
+  float32_t value = i32_to_f32(fpul);
+  update_fpscr(fps);
+  return value;
+}
+
+static inline float64_t float_ld(int32_t fpul, uint32_t * fps)
+{
+  set_rounding_mode(fps);
+  float64_t value = i32_to_f64(fpul);
+  update_fpscr(fps);
+  return value;
+}
+
+static inline int32_t ftrc_sl(float32_t op1, uint32_t * fps)
+{
+  set_rounding_mode(fps);
+  int32_t value = f32_to_i32(op1, softfloat_round_minMag, false);
+  update_fpscr(fps);
+  return value;
+}
+
+static inline int32_t ftrc_dl(float64_t op1, uint32_t * fps)
+{
+  set_rounding_mode(fps);
+  int32_t value = f64_to_i32(op1, softfloat_round_minMag, false);
+  update_fpscr(fps);
+  return value;
+}
+
+static inline float32_t fabs_s(float32_t op1)
+{
+  op1.v &= 0x7fffffff;
+  return op1;
+}
+
+static inline float64_t fabs_d(float64_t op1)
+{
+  op1.v &= 0x7fffffff'ffffffff;
+  return op1;
+}
+
+static inline float32_t fneg_s(float32_t op1)
+{
+  op1.v ^= 0x80000000'00000000;
+  return op1;
+}
+
+static inline float64_t fneg_d(float64_t op1)
+{
+  op1.v ^= 0x80000000'00000000;
+  return op1;
+}
+
+static inline uint32_t fcnv_ds(float64_t op1, uint32_t * fps)
+{
+  if (dn_f64(&op1, fps)) return 0;
+
+  set_rounding_mode(fps);
+  float32_t result = f64_to_f32(op1);
+  update_fpscr(fps);
+  return result.v;
+}
+
+static inline float64_t fcnv_sd(int32_t fpul, uint32_t * fps)
+{
+  float32_t a = { fpul };
+  if (dn_f32(&a, fps)) return (float64_t){ 0 };
+
+  set_rounding_mode(fps);
+  float64_t result = f32_to_f64(a);
+  update_fpscr(fps);
+  return result;
+}
+
+static inline bool fcmpeq_s(float32_t op1, float32_t op2, uint32_t * fps)
+{
+  if (dn_f32_f32(&op1, &op2, fps)) return false;
+
+  set_rounding_mode(fps);
+  bool result = f32_eq(op1, op2);
+  update_fpscr(fps);
+  return result;
+}
+
+static inline bool fcmpeq_d(float64_t op1, float64_t op2, uint32_t * fps)
+{
+  if (dn_f64_f64(&op1, &op2, fps)) return false;
+
+  set_rounding_mode(fps);
+  bool result = f64_eq(op1, op2);
+  update_fpscr(fps);
+  return result;
+}
+
+static inline bool fcmpgt_s(float32_t op2, float32_t op1, uint32_t * fps)
+{
+  if (dn_f32_f32(&op2, &op1, fps)) return false;
+
+  set_rounding_mode(fps);
+  bool result = f32_le(op1, op2);
+  update_fpscr(fps);
+  return !result;
+}
+
+static inline bool fcmpgt_d(float64_t op2, float64_t op1, uint32_t * fps)
+{
+  if (dn_f64_f64(&op2, &op1, fps)) return false;
+
+  set_rounding_mode(fps);
+  bool result = f64_le(op1, op2);
+  update_fpscr(fps);
+  return !result;
+}
+
+static inline void fmac_s(float32_t fr0, float32_t op1, float32_t * op2, uint32_t * fps)
+{
+  if (dn_f32_f32_f32(&fr0, &op1, op2, fps)) return;
+
+  set_rounding_mode(fps);
+  *op2 = f32_mulAdd(fr0, op1, *op2);
+  update_fpscr(fps);
+}
+
+static inline void fsqrt_s(float32_t * op1, uint32_t * fps)
+{
+  if (dn_f32(op1, fps)) return;
+
+  set_rounding_mode(fps);
+  *op1 = f32_sqrt(*op1);
+  update_fpscr(fps);
+}
+
+static inline void fsqrt_d(float64_t * op1, uint32_t * fps)
+{
+  if (dn_f64(op1, fps)) return;
+
+  set_rounding_mode(fps);
+  *op1 = f64_sqrt(*op1);
+  update_fpscr(fps);
+}
diff --git a/c/impl.c b/c/impl.c
index aaa872f..7280e87 100644
--- a/c/impl.c
+++ b/c/impl.c
@@ -2,6 +2,7 @@
 #include "operations.h"
 #include "exception.h"
 #include "state_helpers.h"
+#include "fpu.h"
 
 /* MOV #imm,Rn */
 void mov__immediate(struct architectural_state * state, struct memory_map * map, const uint32_t i, const uint32_t n)
@@ -1713,7 +1714,8 @@ void clrt__no_operand(struct architectural_state * state, struct memory_map * ma
 /* LDC Rm,SR */
 void ldc__transfer_to_sr(struct architectural_state * state, struct memory_map * map, const uint32_t m)
 {
-  int64_t md, op1, sr;
+  int64_t md, op1;
+  uint32_t sr;
   md = zero_extend1(state->sr.bits.md);
   if (md == 0) return RESINST(state);
   op1 = sign_extend32(REG(state, m));
@@ -1809,7 +1811,8 @@ void ldc__transfer_to_rn_bank(struct architectural_state * state, struct memory_
 /* LDC.L @Rm+,SR */
 void ldc_l__load_to_sr(struct architectural_state * state, struct memory_map * map, const uint32_t m)
 {
-  int64_t md, op1, address, sr;
+  int64_t md, op1, address;
+  uint32_t sr;
   md = zero_extend1(state->sr.bits.md);
   if (md == 0) return RESINST(state);
   op1 = sign_extend32(REG(state, m));
@@ -2072,7 +2075,8 @@ void sleep__no_operand(struct architectural_state * state, struct memory_map * m
 /* STC SR,Rn */
 void stc__transfer_from_sr(struct architectural_state * state, struct memory_map * map, const uint32_t n)
 {
-  int64_t md, sr, op1;
+  int64_t md;
+  uint32_t sr, op1;
   md = zero_extend1(state->sr.bits.md);
   if (md == 0) return RESINST(state);
   sr = sign_extend32(state->sr.value);
@@ -2182,7 +2186,8 @@ void stc__transfer_from_rm_bank(struct architectural_state * state, struct memor
 /* STC.L SR,@-Rn */
 void stc_l__store_from_sr(struct architectural_state * state, struct memory_map * map, const uint32_t n)
 {
-  int64_t md, sr, op1, address;
+  int64_t md, op1, address;
+  uint32_t sr;
   md = zero_extend1(state->sr.bits.md);
   if (md == 0) return RESINST(state);
   sr = sign_extend32(state->sr.value);
@@ -2406,10 +2411,843 @@ void trapa__immediate(struct architectural_state * state, struct memory_map * ma
 }
 
 
+/* FLDI0 FRn */
+void fldi0__destination_operand_only(struct architectural_state * state, struct memory_map * map, const uint32_t n)
+{
+  uint32_t sr;
+  float32_t op1;
+  sr = zero_extend32(state->sr.value);
+  if (fpu_is_disabled(sr) && is_delay_slot(state)) return SLOTFPUDIS(state);
+  if (fpu_is_disabled(sr)) return FPUDIS(state);
+  op1 = (float32_t){ 0x00000000 };
+  FR_(state, n) = float_register32(op1);
+
+  state->is_delay_slot = false;
+}
+
+
+/* FLDI1 FRn */
+void fldi1__destination_operand_only(struct architectural_state * state, struct memory_map * map, const uint32_t n)
+{
+  uint32_t sr;
+  float32_t op1;
+  sr = zero_extend32(state->sr.value);
+  if (fpu_is_disabled(sr) && is_delay_slot(state)) return SLOTFPUDIS(state);
+  if (fpu_is_disabled(sr)) return FPUDIS(state);
+  op1 = (float32_t){ 0x3f800000 };
+  FR_(state, n) = float_register32(op1);
+
+  state->is_delay_slot = false;
+}
+
+
+/* FMOV FRm,FRn */
+void fmov__source_and_destination_operands(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n)
+{
+  uint32_t sr;
+  float32_t op1, op2;
+  sr = zero_extend32(state->sr.value);
+  op1 = float_value32(FR_(state, m));
+  if (fpu_is_disabled(sr) && is_delay_slot(state)) return SLOTFPUDIS(state);
+  if (fpu_is_disabled(sr)) return FPUDIS(state);
+  op2 = op1;
+  FR_(state, n) = float_register32(op2);
+
+  state->is_delay_slot = false;
+}
+
+
+/* FMOV.S @Rm,FRn */
+void fmov_s__load_register_direct_data_transfer(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n)
+{
+  uint32_t sr;
+  int64_t op1, address;
+  float32_t op2;
+  sr = zero_extend32(state->sr.value);
+  op1 = sign_extend32(REG(state, m));
+  if (fpu_is_disabled(sr) && is_delay_slot(state)) return SLOTFPUDIS(state);
+  if (fpu_is_disabled(sr)) return FPUDIS(state);
+  address = zero_extend32(op1);
+  op2 = float_value32(read_memory32(map, address));
+  FR_(state, n) = float_register32(op2);
+
+  state->is_delay_slot = false;
+}
+
+
+/* FMOV.S @(R0,Rm),FRn */
+void fmov_s__load_indexed_register_indirect(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n)
+{
+  uint32_t sr;
+  int64_t r0, op1, address;
+  float32_t op2;
+  sr = zero_extend32(state->sr.value);
+  r0 = sign_extend32(REG(state, 0));
+  op1 = sign_extend32(REG(state, m));
+  if (fpu_is_disabled(sr) && is_delay_slot(state)) return SLOTFPUDIS(state);
+  if (fpu_is_disabled(sr)) return FPUDIS(state);
+  address = zero_extend32(r0 + op1);
+  op2 = float_value32(read_memory32(map, address));
+  FR_(state, n) = float_register32(op2);
+
+  state->is_delay_slot = false;
+}
+
+
+/* FMOV.S @Rm+,FRn */
+void fmov_s__load_direct_data_transfer_from_register(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n)
+{
+  uint32_t sr;
+  int64_t op1, address;
+  float32_t op2;
+  sr = zero_extend32(state->sr.value);
+  op1 = sign_extend32(REG(state, m));
+  if (fpu_is_disabled(sr) && is_delay_slot(state)) return SLOTFPUDIS(state);
+  if (fpu_is_disabled(sr)) return FPUDIS(state);
+  address = zero_extend32(op1);
+  op2 = float_value32(read_memory32(map, address));
+  op1 = op1 + 4;
+  REG(state, m) = _register(op1);
+  FR_(state, n) = float_register32(op2);
+
+  state->is_delay_slot = false;
+}
+
+
+/* FMOV.S FRm,@Rn */
+void fmov_s__store_register_direct_data_transfer(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n)
+{
+  uint32_t sr;
+  float32_t op1;
+  int64_t op2, address, value;
+  sr = zero_extend32(state->sr.value);
+  op1 = float_value32(FR_(state, m));
+  op2 = sign_extend32(REG(state, n));
+  if (fpu_is_disabled(sr) && is_delay_slot(state)) return SLOTFPUDIS(state);
+  if (fpu_is_disabled(sr)) return FPUDIS(state);
+  address = zero_extend32(op2);
+  value = float_register32(op1);
+  write_memory32(map, address, value);
+
+  state->is_delay_slot = false;
+}
+
+
+/* FMOV.S FRm,@-Rn */
+void fmov_s__store_direct_data_transfer_from_register(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n)
+{
+  uint32_t sr;
+  float32_t op1;
+  int64_t op2, address, value;
+  sr = zero_extend32(state->sr.value);
+  op1 = float_value32(FR_(state, m));
+  op2 = sign_extend32(REG(state, n));
+  if (fpu_is_disabled(sr) && is_delay_slot(state)) return SLOTFPUDIS(state);
+  if (fpu_is_disabled(sr)) return FPUDIS(state);
+  address = zero_extend32(op2 - 4);
+  value = float_register32(op1);
+  write_memory32(map, address, value);
+  op2 = address;
+  REG(state, n) = _register(op2);
+
+  state->is_delay_slot = false;
+}
+
+
+/* FMOV.S FRm,@(R0,Rn) */
+void fmov_s__store_indexed_register_indirect(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n)
+{
+  uint32_t sr;
+  int64_t r0, op2, address, value;
+  float32_t op1;
+  sr = zero_extend32(state->sr.value);
+  r0 = sign_extend32(REG(state, 0));
+  op1 = float_value32(FR_(state, m));
+  op2 = sign_extend32(REG(state, n));
+  if (fpu_is_disabled(sr) && is_delay_slot(state)) return SLOTFPUDIS(state);
+  if (fpu_is_disabled(sr)) return FPUDIS(state);
+  address = zero_extend32(r0 + op2);
+  value = float_register32(op1);
+  write_memory32(map, address, value);
+
+  state->is_delay_slot = false;
+}
+
+
+/* FMOV DRm,DRn */
+void fmov__source_and_destination_operands_double(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n)
+{
+  uint32_t sr;
+  int64_t op1, op2;
+  sr = zero_extend32(state->sr.value);
+  op1 = float_value_pair32(FP2_(state, m));
+  if (fpu_is_disabled(sr) && is_delay_slot(state)) return SLOTFPUDIS(state);
+  if (fpu_is_disabled(sr)) return FPUDIS(state);
+  op2 = op1;
+  FP2_(state, n) = float_register_pair32(op2);
+
+  state->is_delay_slot = false;
+}
+
+
+/* FMOV @Rm,DRn */
+void fmov__load_register_direct_data_transfer_double(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n)
+{
+  uint32_t sr;
+  int64_t op1, address, op2;
+  sr = zero_extend32(state->sr.value);
+  op1 = sign_extend32(REG(state, m));
+  if (fpu_is_disabled(sr) && is_delay_slot(state)) return SLOTFPUDIS(state);
+  if (fpu_is_disabled(sr)) return FPUDIS(state);
+  address = zero_extend32(op1);
+  op2 = read_memory_pair32(map, address);
+  FP2_(state, n) = float_register_pair32(op2);
+
+  state->is_delay_slot = false;
+}
+
+
+/* FMOV @(R0,Rm),DRn */
+void fmov__load_indexed_register_indirect_double(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n)
+{
+  uint32_t sr;
+  int64_t r0, op1, address, op2;
+  sr = zero_extend32(state->sr.value);
+  r0 = sign_extend32(REG(state, 0));
+  op1 = sign_extend32(REG(state, m));
+  if (fpu_is_disabled(sr) && is_delay_slot(state)) return SLOTFPUDIS(state);
+  if (fpu_is_disabled(sr)) return FPUDIS(state);
+  address = zero_extend32(r0 + op1);
+  op2 = read_memory_pair32(map, address);
+  FP2_(state, n) = float_register_pair32(op2);
+
+  state->is_delay_slot = false;
+}
+
+
+/* FMOV @Rm+,DRn */
+void fmov__load_direct_data_transfer_from_register_double(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n)
+{
+  uint32_t sr;
+  int64_t op1, address, op2;
+  sr = zero_extend32(state->sr.value);
+  op1 = sign_extend32(REG(state, m));
+  if (fpu_is_disabled(sr) && is_delay_slot(state)) return SLOTFPUDIS(state);
+  if (fpu_is_disabled(sr)) return FPUDIS(state);
+  address = zero_extend32(op1);
+  op2 = read_memory_pair32(map, address);
+  op1 = op1 + 8;
+  REG(state, m) = _register(op1);
+  FP2_(state, n) = float_register_pair32(op2);
+
+  state->is_delay_slot = false;
+}
+
+
+/* FMOV DRm,@Rn */
+void fmov__store_register_direct_data_transfer_double(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n)
+{
+  uint32_t sr;
+  int64_t op1, op2, address;
+  sr = zero_extend32(state->sr.value);
+  op1 = float_value_pair32(FP2_(state, m));
+  op2 = sign_extend32(REG(state, n));
+  if (fpu_is_disabled(sr) && is_delay_slot(state)) return SLOTFPUDIS(state);
+  if (fpu_is_disabled(sr)) return FPUDIS(state);
+  address = zero_extend32(op2);
+  write_memory_pair32(map, address, op1);
+
+  state->is_delay_slot = false;
+}
+
+
+/* FMOV DRm,@-Rn */
+void fmov__store_direct_data_transfer_from_register_double(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n)
+{
+  uint32_t sr;
+  int64_t op1, op2, address;
+  sr = zero_extend32(state->sr.value);
+  op1 = float_value_pair32(FP2_(state, m));
+  op2 = sign_extend32(REG(state, n));
+  if (fpu_is_disabled(sr) && is_delay_slot(state)) return SLOTFPUDIS(state);
+  if (fpu_is_disabled(sr)) return FPUDIS(state);
+  address = zero_extend32(op2 - 8);
+  write_memory_pair32(map, address, op1);
+  op2 = address;
+  REG(state, n) = _register(op2);
+
+  state->is_delay_slot = false;
+}
+
+
+/* FMOV DRm,@(R0,Rn) */
+void fmov__store_indexed_register_indirect_double(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n)
+{
+  uint32_t sr;
+  int64_t r0, op1, op2, address;
+  sr = zero_extend32(state->sr.value);
+  r0 = sign_extend32(REG(state, 0));
+  op1 = float_value_pair32(FP2_(state, m));
+  op2 = sign_extend32(REG(state, n));
+  if (fpu_is_disabled(sr) && is_delay_slot(state)) return SLOTFPUDIS(state);
+  if (fpu_is_disabled(sr)) return FPUDIS(state);
+  address = zero_extend32(r0 + op2);
+  write_memory_pair32(map, address, op1);
+
+  state->is_delay_slot = false;
+}
+
+
+/* FLDS FRm,FPUL */
+void flds__frm_to_fpul(struct architectural_state * state, struct memory_map * map, const uint32_t m)
+{
+  uint32_t sr;
+  float32_t op1;
+  int64_t fpul;
+  sr = zero_extend32(state->sr.value);
+  op1 = float_value32(FR_(state, m));
+  if (fpu_is_disabled(sr) && is_delay_slot(state)) return SLOTFPUDIS(state);
+  if (fpu_is_disabled(sr)) return FPUDIS(state);
+  fpul = float_register32(op1);
+  state->fpul = zero_extend32(fpul);
+
+  state->is_delay_slot = false;
+}
+
+
+/* FSTS FPUL,FRn */
+void fsts__fpul_to_frn(struct architectural_state * state, struct memory_map * map, const uint32_t n)
+{
+  uint32_t sr;
+  int64_t fpul;
+  float32_t op1;
+  sr = zero_extend32(state->sr.value);
+  fpul = sign_extend32(state->fpul);
+  if (fpu_is_disabled(sr) && is_delay_slot(state)) return SLOTFPUDIS(state);
+  if (fpu_is_disabled(sr)) return FPUDIS(state);
+  op1 = float_value32(fpul);
+  FR_(state, n) = float_register32(op1);
+
+  state->is_delay_slot = false;
+}
+
+
+/* FABS FRn */
+void fabs__destination_operand_only(struct architectural_state * state, struct memory_map * map, const uint32_t n)
+{
+  uint32_t sr;
+  float32_t op1;
+  sr = zero_extend32(state->sr.value);
+  op1 = float_value32(FR_(state, n));
+  if (fpu_is_disabled(sr) && is_delay_slot(state)) return SLOTFPUDIS(state);
+  if (fpu_is_disabled(sr)) return FPUDIS(state);
+  op1 = fabs_s(op1);
+  FR_(state, n) = float_register32(op1);
+
+  state->is_delay_slot = false;
+}
+
+
+/* FADD FRm,FRn */
+void fadd__source_and_destination_operands(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n)
+{
+  uint32_t sr, fps;
+  float32_t op1, op2;
+  sr = zero_extend32(state->sr.value);
+  fps = zero_extend32(state->fpscr.value);
+  op1 = float_value32(FR_(state, m));
+  op2 = float_value32(FR_(state, n));
+  if (fpu_is_disabled(sr) && is_delay_slot(state)) return SLOTFPUDIS(state);
+  if (fpu_is_disabled(sr)) return FPUDIS(state);
+  fadd_s(op1, &op2, &fps);
+  if (fpu_enable_v(fps) && fpu_cause_v(fps)) return FPUEXC(state, fps);
+  if (fpu_cause_e(fps)) return FPUEXC(state, fps);
+  if ((fpu_enable_i(fps) || fpu_enable_o(fps)) || fpu_enable_u(fps)) return FPUEXC(state, fps);
+  FR_(state, n) = float_register32(op2);
+  state->fpscr.value = zero_extend32(fps);
+
+  state->is_delay_slot = false;
+}
+
+
+/* FCMP/EQ FRm,FRn */
+void fcmp_eq__source_and_destination_operands(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n)
+{
+  uint32_t sr, fps;
+  float32_t op1, op2;
+  int64_t t;
+  sr = zero_extend32(state->sr.value);
+  fps = zero_extend32(state->fpscr.value);
+  op1 = float_value32(FR_(state, m));
+  op2 = float_value32(FR_(state, n));
+  if (fpu_is_disabled(sr) && is_delay_slot(state)) return SLOTFPUDIS(state);
+  if (fpu_is_disabled(sr)) return FPUDIS(state);
+  t = fcmpeq_s(op1, op2, &fps);
+  if (fpu_enable_v(fps) && fpu_cause_v(fps)) return FPUEXC(state, fps);
+  state->fpscr.value = zero_extend32(fps);
+  state->sr.bits.t = bit(t);
+
+  state->is_delay_slot = false;
+}
+
+
+/* FCMP/GT FRm,FRn */
+void fcmp_gt__source_and_destination_operands(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n)
+{
+  uint32_t sr, fps;
+  float32_t op1, op2;
+  int64_t t;
+  sr = zero_extend32(state->sr.value);
+  fps = zero_extend32(state->fpscr.value);
+  op1 = float_value32(FR_(state, m));
+  op2 = float_value32(FR_(state, n));
+  if (fpu_is_disabled(sr) && is_delay_slot(state)) return SLOTFPUDIS(state);
+  if (fpu_is_disabled(sr)) return FPUDIS(state);
+  t = fcmpgt_s(op2, op1, &fps);
+  if (fpu_enable_v(fps) && fpu_cause_v(fps)) return FPUEXC(state, fps);
+  state->fpscr.value = zero_extend32(fps);
+  state->sr.bits.t = bit(t);
+
+  state->is_delay_slot = false;
+}
+
+
+/* FDIV FRm,FRn */
+void fdiv__source_and_destination_operands(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n)
+{
+  uint32_t sr, fps;
+  float32_t op1, op2;
+  sr = zero_extend32(state->sr.value);
+  fps = zero_extend32(state->fpscr.value);
+  op1 = float_value32(FR_(state, m));
+  op2 = float_value32(FR_(state, n));
+  if (fpu_is_disabled(sr) && is_delay_slot(state)) return SLOTFPUDIS(state);
+  if (fpu_is_disabled(sr)) return FPUDIS(state);
+  fdiv_s(&op2, op1, &fps);
+  if (fpu_enable_v(fps) && fpu_cause_v(fps)) return FPUEXC(state, fps);
+  if (fpu_enable_z(fps) && fpu_cause_z(fps)) return FPUEXC(state, fps);
+  if (fpu_cause_e(fps)) return FPUEXC(state, fps);
+  if ((fpu_enable_i(fps) || fpu_enable_o(fps)) || fpu_enable_u(fps)) return FPUEXC(state, fps);
+  FR_(state, n) = float_register32(op2);
+  state->fpscr.value = zero_extend32(fps);
+
+  state->is_delay_slot = false;
+}
+
+
+/* FLOAT FPUL,FRn */
+void float__fpul_to_frn(struct architectural_state * state, struct memory_map * map, const uint32_t n)
+{
+  uint32_t sr, fps;
+  int64_t fpul;
+  float32_t op1;
+  sr = zero_extend32(state->sr.value);
+  fps = zero_extend32(state->fpscr.value);
+  fpul = sign_extend32(state->fpul);
+  if (fpu_is_disabled(sr) && is_delay_slot(state)) return SLOTFPUDIS(state);
+  if (fpu_is_disabled(sr)) return FPUDIS(state);
+  op1 = float_ls(fpul, &fps);
+  if (fpu_enable_i(fps)) return FPUEXC(state, fps);
+  FR_(state, n) = float_register32(op1);
+  state->fpscr.value = zero_extend32(fps);
+
+  state->is_delay_slot = false;
+}
+
+
+/* FMAC FR0,FRm,FRn */
+void fmac__fr0_frm_frn(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n)
+{
+  uint32_t sr, fps;
+  float32_t fr0, op1, op2;
+  sr = zero_extend32(state->sr.value);
+  fps = zero_extend32(state->fpscr.value);
+  fr0 = float_value32(FR_(state, 0));
+  op1 = float_value32(FR_(state, m));
+  op2 = float_value32(FR_(state, n));
+  if (fpu_is_disabled(sr) && is_delay_slot(state)) return SLOTFPUDIS(state);
+  if (fpu_is_disabled(sr)) return FPUDIS(state);
+  fmac_s(fr0, op1, &op2, &fps);
+  if (fpu_enable_v(fps) && fpu_cause_v(fps)) return FPUEXC(state, fps);
+  if (fpu_cause_e(fps)) return FPUEXC(state, fps);
+  if ((fpu_enable_i(fps) || fpu_enable_o(fps)) || fpu_enable_u(fps)) return FPUEXC(state, fps);
+  FR_(state, n) = float_register32(op2);
+  state->fpscr.value = zero_extend32(fps);
+
+  state->is_delay_slot = false;
+}
+
+
+/* FMUL FRm,FRn */
+void fmul__source_and_destination_operands(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n)
+{
+  uint32_t sr, fps;
+  float32_t op1, op2;
+  sr = zero_extend32(state->sr.value);
+  fps = zero_extend32(state->fpscr.value);
+  op1 = float_value32(FR_(state, m));
+  op2 = float_value32(FR_(state, n));
+  if (fpu_is_disabled(sr) && is_delay_slot(state)) return SLOTFPUDIS(state);
+  if (fpu_is_disabled(sr)) return FPUDIS(state);
+  fmul_s(op1, &op2, &fps);
+  if (fpu_enable_v(fps) && fpu_cause_v(fps)) return FPUEXC(state, fps);
+  if (fpu_cause_e(fps)) return FPUEXC(state, fps);
+  if ((fpu_enable_i(fps) || fpu_enable_o(fps)) || fpu_enable_u(fps)) return FPUEXC(state, fps);
+  FR_(state, n) = float_register32(op2);
+  state->fpscr.value = zero_extend32(fps);
+
+  state->is_delay_slot = false;
+}
+
+
+/* FNEG FRn */
+void fneg__destination_operand_only(struct architectural_state * state, struct memory_map * map, const uint32_t n)
+{
+  uint32_t sr;
+  float32_t op1;
+  sr = zero_extend32(state->sr.value);
+  op1 = float_value32(FR_(state, n));
+  if (fpu_is_disabled(sr) && is_delay_slot(state)) return SLOTFPUDIS(state);
+  if (fpu_is_disabled(sr)) return FPUDIS(state);
+  op1 = fneg_s(op1);
+  FR_(state, n) = float_register32(op1);
+
+  state->is_delay_slot = false;
+}
+
+
+/* FSQRT FRn */
+void fsqrt__destination_operand_only(struct architectural_state * state, struct memory_map * map, const uint32_t n)
+{
+  uint32_t sr, fps;
+  float32_t op1;
+  sr = zero_extend32(state->sr.value);
+  fps = zero_extend32(state->fpscr.value);
+  op1 = float_value32(FR_(state, n));
+  if (fpu_is_disabled(sr) && is_delay_slot(state)) return SLOTFPUDIS(state);
+  if (fpu_is_disabled(sr)) return FPUDIS(state);
+  fsqrt_s(&op1, &fps);
+  if (fpu_enable_v(fps) && fpu_cause_v(fps)) return FPUEXC(state, fps);
+  if (fpu_cause_e(fps)) return FPUEXC(state, fps);
+  if (fpu_enable_i(fps)) return FPUEXC(state, fps);
+  FR_(state, n) = float_register32(op1);
+  state->fpscr.value = zero_extend32(fps);
+
+  state->is_delay_slot = false;
+}
+
+
+/* FSUB FRm,FRn */
+void fsub__source_and_destination_operands(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n)
+{
+  uint32_t sr, fps;
+  float32_t op1, op2;
+  sr = zero_extend32(state->sr.value);
+  fps = zero_extend32(state->fpscr.value);
+  op1 = float_value32(FR_(state, m));
+  op2 = float_value32(FR_(state, n));
+  if (fpu_is_disabled(sr) && is_delay_slot(state)) return SLOTFPUDIS(state);
+  if (fpu_is_disabled(sr)) return FPUDIS(state);
+  fsub_s(&op2, op1, &fps);
+  if (fpu_enable_v(fps) && fpu_cause_v(fps)) return FPUEXC(state, fps);
+  if (fpu_cause_e(fps)) return FPUEXC(state, fps);
+  if ((fpu_enable_i(fps) || fpu_enable_o(fps)) || fpu_enable_u(fps)) return FPUEXC(state, fps);
+  FR_(state, n) = float_register32(op2);
+  state->fpscr.value = zero_extend32(fps);
+
+  state->is_delay_slot = false;
+}
+
+
+/* FTRC FRm,FPUL */
+void ftrc__frm_to_fpul(struct architectural_state * state, struct memory_map * map, const uint32_t m)
+{
+  uint32_t sr, fps;
+  float32_t op1;
+  int64_t fpul;
+  sr = zero_extend32(state->sr.value);
+  fps = zero_extend32(state->fpscr.value);
+  op1 = float_value32(FR_(state, m));
+  if (fpu_is_disabled(sr) && is_delay_slot(state)) return SLOTFPUDIS(state);
+  if (fpu_is_disabled(sr)) return FPUDIS(state);
+  fpul = ftrc_sl(op1, &fps);
+  if (fpu_enable_v(fps) && fpu_cause_v(fps)) return FPUEXC(state, fps);
+  state->fpscr.value = zero_extend32(fps);
+  state->fpul = zero_extend32(fpul);
+
+  state->is_delay_slot = false;
+}
+
+
+/* FABS DRn */
+void fabs__destination_operand_only_double(struct architectural_state * state, struct memory_map * map, const uint32_t n)
+{
+  uint32_t sr;
+  float64_t op1;
+  sr = zero_extend32(state->sr.value);
+  op1 = float_value64(DR2_(state, n));
+  if (fpu_is_disabled(sr) && is_delay_slot(state)) return SLOTFPUDIS(state);
+  if (fpu_is_disabled(sr)) return FPUDIS(state);
+  op1 = fabs_d(op1);
+  DR2_(state, n) = float_register64(op1);
+
+  state->is_delay_slot = false;
+}
+
+
+/* FADD DRm,DRn */
+void fadd__source_and_destination_operands_double(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n)
+{
+  uint32_t sr, fps;
+  float64_t op1, op2;
+  sr = zero_extend32(state->sr.value);
+  fps = zero_extend32(state->fpscr.value);
+  op1 = float_value64(DR2_(state, m));
+  op2 = float_value64(DR2_(state, n));
+  if (fpu_is_disabled(sr) && is_delay_slot(state)) return SLOTFPUDIS(state);
+  if (fpu_is_disabled(sr)) return FPUDIS(state);
+  fadd_d(op1, &op2, &fps);
+  if (fpu_enable_v(fps) && fpu_cause_v(fps)) return FPUEXC(state, fps);
+  if (fpu_cause_e(fps)) return FPUEXC(state, fps);
+  if ((fpu_enable_i(fps) || fpu_enable_o(fps)) || fpu_enable_u(fps)) return FPUEXC(state, fps);
+  DR2_(state, n) = float_register64(op2);
+  state->fpscr.value = zero_extend32(fps);
+
+  state->is_delay_slot = false;
+}
+
+
+/* FCMP/EQ DRm,DRn */
+void fcmp_eq__source_and_destination_operands_double(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n)
+{
+  uint32_t sr, fps;
+  float64_t op1, op2;
+  int64_t t;
+  sr = zero_extend32(state->sr.value);
+  fps = zero_extend32(state->fpscr.value);
+  op1 = float_value64(DR2_(state, m));
+  op2 = float_value64(DR2_(state, n));
+  if (fpu_is_disabled(sr) && is_delay_slot(state)) return SLOTFPUDIS(state);
+  if (fpu_is_disabled(sr)) return FPUDIS(state);
+  t = fcmpeq_d(op1, op2, &fps);
+  if (fpu_enable_v(fps) && fpu_cause_v(fps)) return FPUEXC(state, fps);
+  state->fpscr.value = zero_extend32(fps);
+  state->sr.bits.t = bit(t);
+
+  state->is_delay_slot = false;
+}
+
+
+/* FCMP/GT DRm,DRn */
+void fcmp_gt__source_and_destination_operands_double(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n)
+{
+  uint32_t sr, fps;
+  float64_t op1, op2;
+  int64_t t;
+  sr = zero_extend32(state->sr.value);
+  fps = zero_extend32(state->fpscr.value);
+  op1 = float_value64(DR2_(state, m));
+  op2 = float_value64(DR2_(state, n));
+  if (fpu_is_disabled(sr) && is_delay_slot(state)) return SLOTFPUDIS(state);
+  if (fpu_is_disabled(sr)) return FPUDIS(state);
+  t = fcmpgt_d(op2, op1, &fps);
+  if (fpu_enable_v(fps) && fpu_cause_v(fps)) return FPUEXC(state, fps);
+  state->fpscr.value = zero_extend32(fps);
+  state->sr.bits.t = bit(t);
+
+  state->is_delay_slot = false;
+}
+
+
+/* FDIV DRm,DRn */
+void fdiv__source_and_destination_operands_double(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n)
+{
+  uint32_t sr, fps;
+  float64_t op1, op2;
+  sr = zero_extend32(state->sr.value);
+  fps = zero_extend32(state->fpscr.value);
+  op1 = float_value64(DR2_(state, m));
+  op2 = float_value64(DR2_(state, n));
+  if (fpu_is_disabled(sr) && is_delay_slot(state)) return SLOTFPUDIS(state);
+  if (fpu_is_disabled(sr)) return FPUDIS(state);
+  fdiv_d(&op2, op1, &fps);
+  if (fpu_enable_v(fps) && fpu_cause_v(fps)) return FPUEXC(state, fps);
+  if (fpu_enable_z(fps) && fpu_cause_z(fps)) return FPUEXC(state, fps);
+  if (fpu_cause_e(fps)) return FPUEXC(state, fps);
+  if ((fpu_enable_i(fps) || fpu_enable_o(fps)) || fpu_enable_u(fps)) return FPUEXC(state, fps);
+  DR2_(state, n) = float_register64(op2);
+  state->fpscr.value = zero_extend32(fps);
+
+  state->is_delay_slot = false;
+}
+
+
+/* FCNVDS DRm,FPUL */
+void fcnvds__drm_to_fpul(struct architectural_state * state, struct memory_map * map, const uint32_t m)
+{
+  uint32_t sr, fps, fpul;
+  float64_t op1;
+  sr = zero_extend32(state->sr.value);
+  fps = zero_extend32(state->fpscr.value);
+  op1 = float_value64(DR2_(state, m));
+  if (fpu_is_disabled(sr) && is_delay_slot(state)) return SLOTFPUDIS(state);
+  if (fpu_is_disabled(sr)) return FPUDIS(state);
+  fpul = fcnv_ds(op1, &fps);
+  if (fpu_enable_v(fps) && fpu_cause_v(fps)) return FPUEXC(state, fps);
+  if (fpu_cause_e(fps)) return FPUEXC(state, fps);
+  if ((fpu_enable_i(fps) || fpu_enable_o(fps)) || fpu_enable_u(fps)) return FPUEXC(state, fps);
+  state->fpscr.value = zero_extend32(fps);
+  state->fpul = zero_extend32(fpul);
+
+  state->is_delay_slot = false;
+}
+
+
+/* FCNVSD FPUL,DRn */
+void fcnvsd__fpul_to_drn(struct architectural_state * state, struct memory_map * map, const uint32_t n)
+{
+  uint32_t sr, fps;
+  int64_t fpul;
+  float64_t op1;
+  sr = zero_extend32(state->sr.value);
+  fps = zero_extend32(state->fpscr.value);
+  fpul = sign_extend32(state->fpul);
+  if (fpu_is_disabled(sr) && is_delay_slot(state)) return SLOTFPUDIS(state);
+  if (fpu_is_disabled(sr)) return FPUDIS(state);
+  op1 = fcnv_sd(fpul, &fps);
+  if (fpu_enable_v(fps) && fpu_cause_v(fps)) return FPUEXC(state, fps);
+  if (fpu_cause_e(fps)) return FPUEXC(state, fps);
+  DR2_(state, n) = float_register64(op1);
+  state->fpscr.value = zero_extend32(fps);
+
+  state->is_delay_slot = false;
+}
+
+
+/* FLOAT FPUL,DRn */
+void float__fpul_to_drn(struct architectural_state * state, struct memory_map * map, const uint32_t n)
+{
+  int64_t fpul;
+  uint32_t sr, fps;
+  float64_t op1;
+  fpul = sign_extend32(state->fpul);
+  sr = zero_extend32(state->sr.value);
+  fps = zero_extend32(state->fpscr.value);
+  if (fpu_is_disabled(sr) && is_delay_slot(state)) return SLOTFPUDIS(state);
+  if (fpu_is_disabled(sr)) return FPUDIS(state);
+  op1 = float_ld(fpul, &fps);
+  DR2_(state, n) = float_register64(op1);
+
+  state->is_delay_slot = false;
+}
+
+
+/* FMUL DRm,DRn */
+void fmul__source_and_destination_operands_double(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n)
+{
+  uint32_t sr, fps;
+  float64_t op1, op2;
+  sr = zero_extend32(state->sr.value);
+  fps = zero_extend32(state->fpscr.value);
+  op1 = float_value64(DR2_(state, m));
+  op2 = float_value64(DR2_(state, n));
+  if (fpu_is_disabled(sr) && is_delay_slot(state)) return SLOTFPUDIS(state);
+  if (fpu_is_disabled(sr)) return FPUDIS(state);
+  fmul_d(op1, &op2, &fps);
+  if (fpu_enable_v(fps) && fpu_cause_v(fps)) return FPUEXC(state, fps);
+  if (fpu_cause_e(fps)) return FPUEXC(state, fps);
+  if ((fpu_enable_i(fps) || fpu_enable_o(fps)) || fpu_enable_u(fps)) return FPUEXC(state, fps);
+  DR2_(state, n) = float_register64(op2);
+  state->fpscr.value = zero_extend32(fps);
+
+  state->is_delay_slot = false;
+}
+
+
+/* FNEG DRn */
+void fneg__destination_operand_only_double(struct architectural_state * state, struct memory_map * map, const uint32_t n)
+{
+  uint32_t sr;
+  float64_t op1;
+  sr = zero_extend32(state->sr.value);
+  op1 = float_value64(DR2_(state, n));
+  if (fpu_is_disabled(sr) && is_delay_slot(state)) return SLOTFPUDIS(state);
+  if (fpu_is_disabled(sr)) return FPUDIS(state);
+  op1 = fneg_d(op1);
+  DR2_(state, n) = float_register64(op1);
+
+  state->is_delay_slot = false;
+}
+
+
+/* FSQRT DRn */
+void fsqrt__destination_operand_only_double(struct architectural_state * state, struct memory_map * map, const uint32_t n)
+{
+  uint32_t sr, fps;
+  float64_t op1;
+  sr = zero_extend32(state->sr.value);
+  fps = zero_extend32(state->fpscr.value);
+  op1 = float_value64(DR2_(state, n));
+  if (fpu_is_disabled(sr) && is_delay_slot(state)) return SLOTFPUDIS(state);
+  if (fpu_is_disabled(sr)) return FPUDIS(state);
+  fsqrt_d(&op1, &fps);
+  if (fpu_enable_v(fps) && fpu_cause_v(fps)) return FPUEXC(state, fps);
+  if (fpu_cause_e(fps)) return FPUEXC(state, fps);
+  if (fpu_enable_i(fps)) return FPUEXC(state, fps);
+  DR2_(state, n) = float_register64(op1);
+  state->fpscr.value = zero_extend32(fps);
+
+  state->is_delay_slot = false;
+}
+
+
+/* FSUB DRm,DRn */
+void fsub__source_and_destination_operands_double(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n)
+{
+  uint32_t sr, fps;
+  float64_t op1, op2;
+  sr = zero_extend32(state->sr.value);
+  fps = zero_extend32(state->fpscr.value);
+  op1 = float_value64(DR2_(state, m));
+  op2 = float_value64(DR2_(state, n));
+  if (fpu_is_disabled(sr) && is_delay_slot(state)) return SLOTFPUDIS(state);
+  if (fpu_is_disabled(sr)) return FPUDIS(state);
+  fsub_d(&op2, op1, &fps);
+  if (fpu_enable_v(fps) && fpu_cause_v(fps)) return FPUEXC(state, fps);
+  if (fpu_cause_e(fps)) return FPUEXC(state, fps);
+  if ((fpu_enable_i(fps) || fpu_enable_o(fps)) || fpu_enable_u(fps)) return FPUEXC(state, fps);
+  DR2_(state, n) = float_register64(op2);
+  state->fpscr.value = zero_extend32(fps);
+
+  state->is_delay_slot = false;
+}
+
+
+/* FTRC DRm,FPUL */
+void ftrc__drm_to_fpul(struct architectural_state * state, struct memory_map * map, const uint32_t m)
+{
+  uint32_t sr, fps;
+  float64_t op1;
+  int64_t fpul;
+  sr = zero_extend32(state->sr.value);
+  fps = zero_extend32(state->fpscr.value);
+  op1 = float_value64(DR2_(state, m));
+  if (fpu_is_disabled(sr) && is_delay_slot(state)) return SLOTFPUDIS(state);
+  if (fpu_is_disabled(sr)) return FPUDIS(state);
+  fpul = ftrc_dl(op1, &fps);
+  if (fpu_enable_v(fps) && fpu_cause_v(fps)) return FPUEXC(state, fps);
+  state->fpul = zero_extend32(fpul);
+  state->fpscr.value = zero_extend32(fps);
+
+  state->is_delay_slot = false;
+}
+
+
 /* LDS Rm,FPSCR */
 void lds__transfer_to_fpscr(struct architectural_state * state, struct memory_map * map, const uint32_t m)
 {
-  int64_t sr, op1;
+  uint32_t sr;
+  int64_t op1;
   sr = zero_extend32(state->sr.value);
   op1 = sign_extend32(REG(state, m));
   if (fpu_is_disabled(sr) && is_delay_slot(state)) return SLOTFPUDIS(state);
@@ -2423,7 +3261,8 @@ void lds__transfer_to_fpscr(struct architectural_state * state, struct memory_ma
 /* LDS Rm,FPUL */
 void lds__transfer_to_fpul(struct architectural_state * state, struct memory_map * map, const uint32_t m)
 {
-  int64_t sr, op1, fpul;
+  uint32_t sr;
+  int64_t op1, fpul;
   sr = zero_extend32(state->sr.value);
   op1 = sign_extend32(REG(state, m));
   if (fpu_is_disabled(sr) && is_delay_slot(state)) return SLOTFPUDIS(state);
@@ -2438,7 +3277,8 @@ void lds__transfer_to_fpul(struct architectural_state * state, struct memory_map
 /* LDS.L @Rm+,FPSCR */
 void lds_l__load_to_fpscr(struct architectural_state * state, struct memory_map * map, const uint32_t m)
 {
-  int64_t sr, op1, address, value;
+  uint32_t sr;
+  int64_t op1, address, value;
   sr = zero_extend32(state->sr.value);
   op1 = sign_extend32(REG(state, m));
   if (fpu_is_disabled(sr) && is_delay_slot(state)) return SLOTFPUDIS(state);
@@ -2456,7 +3296,8 @@ void lds_l__load_to_fpscr(struct architectural_state * state, struct memory_map
 /* LDS.L @Rm+,FPUL */
 void lds_l__load_to_fpul(struct architectural_state * state, struct memory_map * map, const uint32_t m)
 {
-  int64_t sr, op1, address, fpul;
+  uint32_t sr;
+  int64_t op1, address, fpul;
   sr = zero_extend32(state->sr.value);
   op1 = sign_extend32(REG(state, m));
   if (fpu_is_disabled(sr) && is_delay_slot(state)) return SLOTFPUDIS(state);
@@ -2474,7 +3315,7 @@ void lds_l__load_to_fpul(struct architectural_state * state, struct memory_map *
 /* STS FPSCR,Rn */
 void sts__transfer_from_fpscr(struct architectural_state * state, struct memory_map * map, const uint32_t n)
 {
-  int64_t sr, fps, op1;
+  uint32_t sr, fps, op1;
   sr = zero_extend32(state->sr.value);
   fps = zero_extend32(state->fpscr.value);
   if (fpu_is_disabled(sr) && is_delay_slot(state)) return SLOTFPUDIS(state);
@@ -2489,7 +3330,8 @@ void sts__transfer_from_fpscr(struct architectural_state * state, struct memory_
 /* STS FPUL,Rn */
 void sts__transfer_from_fpul(struct architectural_state * state, struct memory_map * map, const uint32_t n)
 {
-  int64_t sr, fpul, op1;
+  uint32_t sr;
+  int64_t fpul, op1;
   sr = zero_extend32(state->sr.value);
   fpul = sign_extend32(state->fpul);
   if (fpu_is_disabled(sr) && is_delay_slot(state)) return SLOTFPUDIS(state);
@@ -2504,7 +3346,8 @@ void sts__transfer_from_fpul(struct architectural_state * state, struct memory_m
 /* STS.L FPSCR,@-Rn */
 void sts_l__store_from_fpscr(struct architectural_state * state, struct memory_map * map, const uint32_t n)
 {
-  int64_t sr, fps, op1, value, address;
+  uint32_t sr, fps, value;
+  int64_t op1, address;
   sr = zero_extend32(state->sr.value);
   fps = zero_extend32(state->fpscr.value);
   op1 = sign_extend32(REG(state, n));
@@ -2523,7 +3366,8 @@ void sts_l__store_from_fpscr(struct architectural_state * state, struct memory_m
 /* STS.L FPUL,@-Rn */
 void sts_l__store_from_fpul(struct architectural_state * state, struct memory_map * map, const uint32_t n)
 {
-  int64_t sr, fpul, op1, address;
+  uint32_t sr;
+  int64_t fpul, op1, address;
   sr = zero_extend32(state->sr.value);
   fpul = sign_extend32(state->fpul);
   op1 = sign_extend32(REG(state, n));
@@ -2537,3 +3381,191 @@ void sts_l__store_from_fpul(struct architectural_state * state, struct memory_ma
   state->is_delay_slot = false;
 }
 
+
+/* FMOV DRm,XDn */
+void fmov__double_to_bank(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n)
+{
+  uint32_t sr;
+  int64_t op1, op2;
+  sr = zero_extend32(state->sr.value);
+  op1 = float_value_pair32(DR2_(state, m));
+  if (fpu_is_disabled(sr) && is_delay_slot(state)) return SLOTFPUDIS(state);
+  if (fpu_is_disabled(sr)) return FPUDIS(state);
+  op2 = op1;
+  XD2_(state, n) = float_register_pair32(op2);
+
+  state->is_delay_slot = false;
+}
+
+
+/* FMOV XDm,DRn */
+void fmov__bank_to_double(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n)
+{
+  uint32_t sr;
+  int64_t op1, op2;
+  sr = zero_extend32(state->sr.value);
+  op1 = float_value_pair32(XD2_(state, m));
+  if (fpu_is_disabled(sr) && is_delay_slot(state)) return SLOTFPUDIS(state);
+  if (fpu_is_disabled(sr)) return FPUDIS(state);
+  op2 = op1;
+  DR2_(state, n) = float_register_pair32(op2);
+
+  state->is_delay_slot = false;
+}
+
+
+/* FMOV XDm,XDn */
+void fmov__source_and_destination_operands_bank(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n)
+{
+  uint32_t sr;
+  float64_t op1, op2;
+  sr = zero_extend32(state->sr.value);
+  op1 = float_value64(XD2_(state, m));
+  if (fpu_is_disabled(sr) && is_delay_slot(state)) return SLOTFPUDIS(state);
+  if (fpu_is_disabled(sr)) return FPUDIS(state);
+  op2 = op1;
+  XD2_(state, n) = float_register64(op2);
+
+  state->is_delay_slot = false;
+}
+
+
+/* FMOV @Rm,XDn */
+void fmov__load_register_direct_data_transfer_bank(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n)
+{
+  uint32_t sr;
+  int64_t op1, address, op2;
+  sr = zero_extend32(state->sr.value);
+  op1 = sign_extend32(REG(state, m));
+  if (fpu_is_disabled(sr) && is_delay_slot(state)) return SLOTFPUDIS(state);
+  if (fpu_is_disabled(sr)) return FPUDIS(state);
+  address = zero_extend32(op1);
+  op2 = read_memory_pair32(map, address);
+  XD2_(state, n) = float_register_pair32(op2);
+
+  state->is_delay_slot = false;
+}
+
+
+/* FMOV @Rm+,XDn */
+void fmov__load_direct_data_transfer_from_register_bank(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n)
+{
+  uint32_t sr;
+  int64_t op1, address, op2;
+  sr = zero_extend32(state->sr.value);
+  op1 = sign_extend32(REG(state, m));
+  if (fpu_is_disabled(sr) && is_delay_slot(state)) return SLOTFPUDIS(state);
+  if (fpu_is_disabled(sr)) return FPUDIS(state);
+  address = zero_extend32(op1);
+  op2 = read_memory_pair32(map, address);
+  op1 = op1 + 8;
+  REG(state, m) = _register(op1);
+  XD2_(state, n) = float_register_pair32(op2);
+
+  state->is_delay_slot = false;
+}
+
+
+/* FMOV @(R0,Rm),XDn */
+void fmov__load_indexed_register_indirect_bank(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n)
+{
+  uint32_t sr;
+  int64_t r0, op1, address, op2;
+  sr = zero_extend32(state->sr.value);
+  r0 = sign_extend32(REG(state, 0));
+  op1 = sign_extend32(REG(state, m));
+  if (fpu_is_disabled(sr) && is_delay_slot(state)) return SLOTFPUDIS(state);
+  if (fpu_is_disabled(sr)) return FPUDIS(state);
+  address = zero_extend32(r0 + op1);
+  op2 = read_memory_pair32(map, address);
+  XD2_(state, n) = float_register_pair32(op2);
+
+  state->is_delay_slot = false;
+}
+
+
+/* FMOV XDm,@Rn */
+void fmov__store_register_direct_data_transfer_bank(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n)
+{
+  uint32_t sr;
+  int64_t op1, op2, address;
+  sr = zero_extend32(state->sr.value);
+  op1 = float_value_pair32(XD2_(state, m));
+  op2 = sign_extend32(REG(state, n));
+  if (fpu_is_disabled(sr) && is_delay_slot(state)) return SLOTFPUDIS(state);
+  if (fpu_is_disabled(sr)) return FPUDIS(state);
+  address = zero_extend32(op2);
+  write_memory_pair32(map, address, op1);
+
+  state->is_delay_slot = false;
+}
+
+
+/* FMOV XDm,@-Rn */
+void fmov__store_direct_data_transfer_from_register_bank(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n)
+{
+  uint32_t sr;
+  int64_t op1, op2, address;
+  sr = zero_extend32(state->sr.value);
+  op1 = float_value_pair32(XD2_(state, m));
+  op2 = sign_extend32(REG(state, n));
+  if (fpu_is_disabled(sr) && is_delay_slot(state)) return SLOTFPUDIS(state);
+  if (fpu_is_disabled(sr)) return FPUDIS(state);
+  address = zero_extend32(op2 - 8);
+  write_memory_pair32(map, address, op1);
+  op2 = address;
+  REG(state, n) = _register(op2);
+
+  state->is_delay_slot = false;
+}
+
+
+/* FMOV XDm,@(R0,Rn) */
+void fmov__store_indexed_register_indirect_bank(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n)
+{
+  uint32_t sr;
+  int64_t r0, op1, op2, address;
+  sr = zero_extend32(state->sr.value);
+  r0 = sign_extend32(REG(state, 0));
+  op1 = float_value_pair32(XD2_(state, m));
+  op2 = sign_extend32(REG(state, n));
+  if (fpu_is_disabled(sr) && is_delay_slot(state)) return SLOTFPUDIS(state);
+  if (fpu_is_disabled(sr)) return FPUDIS(state);
+  address = zero_extend32(r0 + op2);
+  write_memory_pair32(map, address, op1);
+
+  state->is_delay_slot = false;
+}
+
+
+/* FRCHG */
+void frchg__no_operand(struct architectural_state * state, struct memory_map * map)
+{
+  uint32_t sr;
+  int64_t fr;
+  sr = zero_extend32(state->sr.value);
+  fr = zero_extend1(state->fpscr.bits.fr);
+  if (fpu_is_disabled(sr) && is_delay_slot(state)) return SLOTFPUDIS(state);
+  if (fpu_is_disabled(sr)) return FPUDIS(state);
+  fr = fr ^ 1;
+  state->fpscr.bits.fr = bit(fr);
+
+  state->is_delay_slot = false;
+}
+
+
+/* FSCHG */
+void fschg__no_operand(struct architectural_state * state, struct memory_map * map)
+{
+  uint32_t sr;
+  int64_t sz;
+  sr = zero_extend32(state->sr.value);
+  sz = zero_extend1(state->fpscr.bits.sz);
+  if (fpu_is_disabled(sr) && is_delay_slot(state)) return SLOTFPUDIS(state);
+  if (fpu_is_disabled(sr)) return FPUDIS(state);
+  sz = sz ^ 1;
+  state->fpscr.bits.sz = bit(sz);
+
+  state->is_delay_slot = false;
+}
+
diff --git a/c/impl.h b/c/impl.h
index 17eb185..d5796b6 100644
--- a/c/impl.h
+++ b/c/impl.h
@@ -331,6 +331,92 @@ void sts_l__store_from_macl(struct architectural_state * state, struct memory_ma
 void sts_l__store_from_pr(struct architectural_state * state, struct memory_map * map, const uint32_t n);
 /* TRAPA #imm */
 void trapa__immediate(struct architectural_state * state, struct memory_map * map, const uint32_t i);
+/* FLDI0 FRn */
+void fldi0__destination_operand_only(struct architectural_state * state, struct memory_map * map, const uint32_t n);
+/* FLDI1 FRn */
+void fldi1__destination_operand_only(struct architectural_state * state, struct memory_map * map, const uint32_t n);
+/* FMOV FRm,FRn */
+void fmov__source_and_destination_operands(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n);
+/* FMOV.S @Rm,FRn */
+void fmov_s__load_register_direct_data_transfer(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n);
+/* FMOV.S @(R0,Rm),FRn */
+void fmov_s__load_indexed_register_indirect(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n);
+/* FMOV.S @Rm+,FRn */
+void fmov_s__load_direct_data_transfer_from_register(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n);
+/* FMOV.S FRm,@Rn */
+void fmov_s__store_register_direct_data_transfer(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n);
+/* FMOV.S FRm,@-Rn */
+void fmov_s__store_direct_data_transfer_from_register(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n);
+/* FMOV.S FRm,@(R0,Rn) */
+void fmov_s__store_indexed_register_indirect(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n);
+/* FMOV DRm,DRn */
+void fmov__source_and_destination_operands_double(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n);
+/* FMOV @Rm,DRn */
+void fmov__load_register_direct_data_transfer_double(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n);
+/* FMOV @(R0,Rm),DRn */
+void fmov__load_indexed_register_indirect_double(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n);
+/* FMOV @Rm+,DRn */
+void fmov__load_direct_data_transfer_from_register_double(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n);
+/* FMOV DRm,@Rn */
+void fmov__store_register_direct_data_transfer_double(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n);
+/* FMOV DRm,@-Rn */
+void fmov__store_direct_data_transfer_from_register_double(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n);
+/* FMOV DRm,@(R0,Rn) */
+void fmov__store_indexed_register_indirect_double(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n);
+/* FLDS FRm,FPUL */
+void flds__frm_to_fpul(struct architectural_state * state, struct memory_map * map, const uint32_t m);
+/* FSTS FPUL,FRn */
+void fsts__fpul_to_frn(struct architectural_state * state, struct memory_map * map, const uint32_t n);
+/* FABS FRn */
+void fabs__destination_operand_only(struct architectural_state * state, struct memory_map * map, const uint32_t n);
+/* FADD FRm,FRn */
+void fadd__source_and_destination_operands(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n);
+/* FCMP/EQ FRm,FRn */
+void fcmp_eq__source_and_destination_operands(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n);
+/* FCMP/GT FRm,FRn */
+void fcmp_gt__source_and_destination_operands(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n);
+/* FDIV FRm,FRn */
+void fdiv__source_and_destination_operands(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n);
+/* FLOAT FPUL,FRn */
+void float__fpul_to_frn(struct architectural_state * state, struct memory_map * map, const uint32_t n);
+/* FMAC FR0,FRm,FRn */
+void fmac__fr0_frm_frn(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n);
+/* FMUL FRm,FRn */
+void fmul__source_and_destination_operands(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n);
+/* FNEG FRn */
+void fneg__destination_operand_only(struct architectural_state * state, struct memory_map * map, const uint32_t n);
+/* FSQRT FRn */
+void fsqrt__destination_operand_only(struct architectural_state * state, struct memory_map * map, const uint32_t n);
+/* FSUB FRm,FRn */
+void fsub__source_and_destination_operands(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n);
+/* FTRC FRm,FPUL */
+void ftrc__frm_to_fpul(struct architectural_state * state, struct memory_map * map, const uint32_t m);
+/* FABS DRn */
+void fabs__destination_operand_only_double(struct architectural_state * state, struct memory_map * map, const uint32_t n);
+/* FADD DRm,DRn */
+void fadd__source_and_destination_operands_double(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n);
+/* FCMP/EQ DRm,DRn */
+void fcmp_eq__source_and_destination_operands_double(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n);
+/* FCMP/GT DRm,DRn */
+void fcmp_gt__source_and_destination_operands_double(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n);
+/* FDIV DRm,DRn */
+void fdiv__source_and_destination_operands_double(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n);
+/* FCNVDS DRm,FPUL */
+void fcnvds__drm_to_fpul(struct architectural_state * state, struct memory_map * map, const uint32_t m);
+/* FCNVSD FPUL,DRn */
+void fcnvsd__fpul_to_drn(struct architectural_state * state, struct memory_map * map, const uint32_t n);
+/* FLOAT FPUL,DRn */
+void float__fpul_to_drn(struct architectural_state * state, struct memory_map * map, const uint32_t n);
+/* FMUL DRm,DRn */
+void fmul__source_and_destination_operands_double(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n);
+/* FNEG DRn */
+void fneg__destination_operand_only_double(struct architectural_state * state, struct memory_map * map, const uint32_t n);
+/* FSQRT DRn */
+void fsqrt__destination_operand_only_double(struct architectural_state * state, struct memory_map * map, const uint32_t n);
+/* FSUB DRm,DRn */
+void fsub__source_and_destination_operands_double(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n);
+/* FTRC DRm,FPUL */
+void ftrc__drm_to_fpul(struct architectural_state * state, struct memory_map * map, const uint32_t m);
 /* LDS Rm,FPSCR */
 void lds__transfer_to_fpscr(struct architectural_state * state, struct memory_map * map, const uint32_t m);
 /* LDS Rm,FPUL */
@@ -346,4 +432,26 @@ void sts__transfer_from_fpul(struct architectural_state * state, struct memory_m
 /* STS.L FPSCR,@-Rn */
 void sts_l__store_from_fpscr(struct architectural_state * state, struct memory_map * map, const uint32_t n);
 /* STS.L FPUL,@-Rn */
-void sts_l__store_from_fpul(struct architectural_state * state, struct memory_map * map, const uint32_t n);
\ No newline at end of file
+void sts_l__store_from_fpul(struct architectural_state * state, struct memory_map * map, const uint32_t n);
+/* FMOV DRm,XDn */
+void fmov__double_to_bank(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n);
+/* FMOV XDm,DRn */
+void fmov__bank_to_double(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n);
+/* FMOV XDm,XDn */
+void fmov__source_and_destination_operands_bank(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n);
+/* FMOV @Rm,XDn */
+void fmov__load_register_direct_data_transfer_bank(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n);
+/* FMOV @Rm+,XDn */
+void fmov__load_direct_data_transfer_from_register_bank(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n);
+/* FMOV @(R0,Rm),XDn */
+void fmov__load_indexed_register_indirect_bank(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n);
+/* FMOV XDm,@Rn */
+void fmov__store_register_direct_data_transfer_bank(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n);
+/* FMOV XDm,@-Rn */
+void fmov__store_direct_data_transfer_from_register_bank(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n);
+/* FMOV XDm,@(R0,Rn) */
+void fmov__store_indexed_register_indirect_bank(struct architectural_state * state, struct memory_map * map, const uint32_t m, const uint32_t n);
+/* FRCHG */
+void frchg__no_operand(struct architectural_state * state, struct memory_map * map);
+/* FSCHG */
+void fschg__no_operand(struct architectural_state * state, struct memory_map * map);
\ No newline at end of file
diff --git a/c/operations.h b/c/operations.h
index 8cfbcff..7287c6b 100644
--- a/c/operations.h
+++ b/c/operations.h
@@ -2,6 +2,8 @@
 #include <stdint.h>
 #include <assert.h>
 
+#include <softfloat.h>
+
 //
 // sign_extend
 //
@@ -107,6 +109,41 @@ static inline uint32_t signed_saturate32(uint32_t x)
   return signed_saturate(x, 32);
 }
 
+//
+// float functions
+//
+static inline float32_t float_value32(uint32_t x)
+{
+  float32_t f = { x };
+  return f;
+}
+
+static inline float64_t float_value64(uint64_t x)
+{
+  float64_t f = { x };
+  return f;
+}
+
+static inline uint64_t float_value_pair32(uint64_t x)
+{
+  return x;
+}
+
+static inline uint32_t float_register32(float32_t x)
+{
+  return x.v;
+}
+
+static inline uint64_t float_register64(float64_t x)
+{
+  return x.v;
+}
+
+static inline uint64_t float_register_pair32(uint64_t x)
+{
+  return x;
+}
+
 //
 // "convenience" functions
 //
diff --git a/c/state.h b/c/state.h
index b731587..7c1d3e8 100644
--- a/c/state.h
+++ b/c/state.h
@@ -35,7 +35,6 @@ static_assert(REGN_BANK(SR__MD | SR__RB, 15) == 15);
 
 union floating_point_registers {
   uint32_t fr[32];
-  uint32_t fp[16][2];
   uint64_t dr[16];
   uint32_t fv[8][4];
   uint32_t fm[2][16];
@@ -43,15 +42,14 @@ union floating_point_registers {
 
 static_assert((sizeof (union floating_point_registers)) == 32 * 4);
 
-#define FR_N(state, x) ((x) ^ ((state)->fpscr.fr << 4))
+#define FR_N(state, x) ((x) ^ ((state)->fpscr.bits.fr << 4))
 #define FR_(state, x) ((state)->floating_point_register.fr[FR_N(state, x)])
-#define FP_N(state, x) ((x) ^ ((state)->fpscr.fr << 3))
-#define FP_(state, x) ((state)->floating_point_register.fp[FP_N(state, x)])
-#define DR2_N(state, x) ((x) ^ ((state)->fpscr.fr << 3))
+#define DR2_N(state, x) ((x) ^ ((state)->fpscr.bits.fr << 3))
 #define DR2_(state, x) ((state)->floating_point_register.dr[DR2_N(state, x)])
-#define XD2_N(state, x) ((x) ^ ((!(state)->fpscr.fr) << 3))
+#define FP2_ DR2_
+#define XD2_N(state, x) ((x) ^ ((!(state)->fpscr.bits.fr) << 3))
 #define XD2_(state, x) ((state)->floating_point_register.dr[XD2_N(state, x)])
-#define FV4_N(state, x) ((x) ^ ((state)->fpscr.fr << 2))
+#define FV4_N(state, x) ((x) ^ ((state)->fpscr.bits.fr << 2))
 #define FV4_(state, x) ((state)->floating_point_register.dr[FV4_N(state, x)])
 #define XMTRX_N(state) (!(state)->fpscr.fr)
 #define XMTRX(state) ((state)->floating_point_register.fm[XMTRX_N(state)])
diff --git a/c/state_helpers.h b/c/state_helpers.h
index ccc08e4..0287b1c 100644
--- a/c/state_helpers.h
+++ b/c/state_helpers.h
@@ -61,6 +61,17 @@ static inline uint32_t read_memory32(struct memory_map * map, uint32_t address)
   return entry->access.read_memory32(entry->mem, relative_address);
 }
 
+static inline uint64_t read_memory_pair32(struct memory_map * map, uint32_t address)
+{
+  assert((address & 0b111) == 0);
+  struct memory_map_entry * entry = find_entry(map, address);
+  if (entry == NULL) return 0;
+  uint32_t relative_address = physical_address(address) - entry->start;
+  uint64_t low  = entry->access.read_memory32(entry->mem, relative_address);
+  uint64_t high = entry->access.read_memory32(entry->mem, relative_address+4);
+  return (high << 32) | (low << 0);
+}
+
 static inline void write_memory8(struct memory_map * map, uint32_t address, uint8_t value)
 {
   struct memory_map_entry * entry = find_entry(map, address);
@@ -86,3 +97,13 @@ static inline void write_memory32(struct memory_map * map, uint32_t address, uin
   uint32_t relative_address = physical_address(address) - entry->start;
   entry->access.write_memory32(entry->mem, relative_address, value);
 }
+
+static inline void write_memory_pair32(struct memory_map * map, uint32_t address, uint64_t value)
+{
+  assert((address & 0b111) == 0);
+  struct memory_map_entry * entry = find_entry(map, address);
+  if (entry == NULL) return;
+  uint32_t relative_address = physical_address(address) - entry->start;
+  entry->access.write_memory32(entry->mem, relative_address,   (value >> 0 ));
+  entry->access.write_memory32(entry->mem, relative_address+4, (value >> 32));
+}
diff --git a/c/status_bits.h b/c/status_bits.h
index f2db6d8..3d61c6a 100644
--- a/c/status_bits.h
+++ b/c/status_bits.h
@@ -44,17 +44,17 @@ struct fpscr_bits {
   uint32_t flag_inexact : 1;
   uint32_t flag_underflow : 1;
   uint32_t flag_overflow : 1;
-  uint32_t flag_division_by_zero : 1;
-  uint32_t flag_invalid_operation : 1;
+  uint32_t flag_divide_by_zero : 1;
+  uint32_t flag_invalid : 1;
   uint32_t enable_inexact : 1;
   uint32_t enable_underflow : 1;
   uint32_t enable_overflow : 1;
-  uint32_t enable_division_by_zero : 1;
+  uint32_t enable_divide_by_zero : 1;
   uint32_t enable_invalid : 1;
   uint32_t cause_inexact : 1;
   uint32_t cause_underflow : 1;
   uint32_t cause_overflow : 1;
-  uint32_t cause_division_by_zero : 1;
+  uint32_t cause_divide_by_zero : 1;
   uint32_t cause_invalid : 1;
   uint32_t cause_fpu_error : 1;
   uint32_t dn : 1;
@@ -70,17 +70,17 @@ struct fpscr_bits {
   uint32_t dn : 1;
   uint32_t cause_fpu_error : 1;
   uint32_t cause_invalid : 1;
-  uint32_t cause_division_by_zero : 1;
+  uint32_t cause_divide_by_zero : 1;
   uint32_t cause_overflow : 1;
   uint32_t cause_underflow : 1;
   uint32_t cause_inexact : 1;
   uint32_t enable_invalid : 1;
-  uint32_t enable_division_by_zero : 1;
+  uint32_t enable_divide_by_zero : 1;
   uint32_t enable_overflow : 1;
   uint32_t enable_underflow : 1;
   uint32_t enable_inexact : 1;
-  uint32_t flag_invalid_operation : 1;
-  uint32_t flag_division_by_zero : 1;
+  uint32_t flag_invalid : 1;
+  uint32_t flag_divide_by_zero : 1;
   uint32_t flag_overflow : 1;
   uint32_t flag_underflow : 1;
   uint32_t flag_inexact : 1;
diff --git a/disabled_instructions.py b/disabled_instructions.py
index 136e36b..371c4bb 100644
--- a/disabled_instructions.py
+++ b/disabled_instructions.py
@@ -1,32 +1,9 @@
 disabled_instructions = [
-    "FLDS",
-    "FSTS",
-    "FABS",
-    "FADD",
-    "FCMP/EQ",
-    "FCMP/GT",
-    "FDIV",
-    "FLOAT",
-    "FMAC",
-    "FMUL",
-    "FNEG",
-    "FSQRT",
-    "FSUB",
-    "FTRC",
-    "FCNVDS",
-    "FCNVSD",
-    "FRCHG",
-    "FSCHG",
     "FCSA",
     "FSRRA",
     "FIPR",
     "FTRV",
 
-    "FLDI0",
-    "FLDI1",
-    "FMOV",
-    "FMOV.S",
-
     "LDTLB",
     "OCBI",
     "OCBP",
diff --git a/generate_bits.py b/generate_bits.py
index db0fff0..3e8d917 100644
--- a/generate_bits.py
+++ b/generate_bits.py
@@ -15,17 +15,17 @@ fpscr_bits = (
     ("FLAG_INEXACT"           , 2,  1),
     ("FLAG_UNDERFLOW"         , 3,  1),
     ("FLAG_OVERFLOW"          , 4,  1),
-    ("FLAG_DIVISION_BY_ZERO"  , 5,  1),
-    ("FLAG_INVALID_OPERATION" , 6,  1),
+    ("FLAG_DIVIDE_BY_ZERO"    , 5,  1),
+    ("FLAG_INVALID"           , 6,  1),
     ("ENABLE_INEXACT"         , 7,  1),
     ("ENABLE_UNDERFLOW"       , 8,  1),
     ("ENABLE_OVERFLOW"        , 9,  1),
-    ("ENABLE_DIVISION_BY_ZERO", 10, 1),
+    ("ENABLE_DIVIDE_BY_ZERO"  , 10, 1),
     ("ENABLE_INVALID"         , 11, 1),
     ("CAUSE_INEXACT"          , 12, 1),
     ("CAUSE_UNDERFLOW"        , 13, 1),
     ("CAUSE_OVERFLOW"         , 14, 1),
-    ("CAUSE_DIVISION_BY_ZERO" , 15, 1),
+    ("CAUSE_DIVIDE_BY_ZERO"   , 15, 1),
     ("CAUSE_INVALID"          , 16, 1),
     ("CAUSE_FPU_ERROR"        , 17, 1),
     ("DN"                     , 18, 1), # Denormalization mode
diff --git a/generate_impl.py b/generate_impl.py
index 910b841..89eff1e 100644
--- a/generate_impl.py
+++ b/generate_impl.py
@@ -85,6 +85,7 @@ def main():
         '#include "operations.h"',
         '#include "exception.h"',
         '#include "state_helpers.h"',
+        '#include "fpu.h"',
         '',
     ]
 
diff --git a/identifier_substitution.py b/identifier_substitution.py
index 898a6be..3656e5f 100644
--- a/identifier_substitution.py
+++ b/identifier_substitution.py
@@ -75,7 +75,6 @@ mapping = {
     "ASID"            : "ASID",
     "VPN"             : "VPN",
     "PPN"             : "PPN",
-    "SZ"              : "SZ",
     "SZ0"             : "SZ0",
     "SZ1"             : "SZ1",
     "SH"              : "SH",
@@ -115,6 +114,7 @@ mapping = {
     "XMTRX"           : "XMTRX(state)",
 
     "FR"              : "fr",
+    "SZ"              : "sz",
 
     "ReadMemoryPair32"    : "read_memory_pair32",
     "WriteMemoryPair32"   : "write_memory_pair32",
@@ -162,20 +162,20 @@ mapping = {
     "FTRV_S"              : "ftrv_s",
 
     "FpuIsDisabled"       : "fpu_is_disabled",
-    "FpuFlagI"            : "fpu_flag_I",
-    "FpuFlagU"            : "fpu_flag_U",
-    "FpuFlagO"            : "fpu_flag_O",
-    "FpuFlagZ"            : "fpu_flag_Z",
-    "FpuFlagV"            : "fpu_flag_V",
-    "FpuCauseI"           : "fpu_cause_I",
-    "FpuCauseU"           : "fpu_cause_U",
-    "FpuCauseO"           : "fpu_cause_O",
-    "FpuCauseZ"           : "fpu_cause_Z",
-    "FpuCauseV"           : "fpu_cause_V",
-    "FpuCauseE"           : "fpu_cause_E",
-    "FpuEnableI"          : "fpu_enable_I",
-    "FpuEnableU"          : "fpu_enable_U",
-    "FpuEnableO"          : "fpu_enable_O",
-    "FpuEnableZ"          : "fpu_enable_Z",
-    "FpuEnableV"          : "fpu_enable_V",
+    "FpuFlagI"            : "fpu_flag_i",
+    "FpuFlagU"            : "fpu_flag_u",
+    "FpuFlagO"            : "fpu_flag_o",
+    "FpuFlagZ"            : "fpu_flag_z",
+    "FpuFlagV"            : "fpu_flag_v",
+    "FpuCauseI"           : "fpu_cause_i",
+    "FpuCauseU"           : "fpu_cause_u",
+    "FpuCauseO"           : "fpu_cause_o",
+    "FpuCauseZ"           : "fpu_cause_z",
+    "FpuCauseV"           : "fpu_cause_v",
+    "FpuCauseE"           : "fpu_cause_e",
+    "FpuEnableI"          : "fpu_enable_i",
+    "FpuEnableU"          : "fpu_enable_u",
+    "FpuEnableO"          : "fpu_enable_o",
+    "FpuEnableZ"          : "fpu_enable_z",
+    "FpuEnableV"          : "fpu_enable_v",
 }