From 118942521e9fc06d37e0bebeae36101c01991b6c Mon Sep 17 00:00:00 2001
From: Zack Buhman <zack@buhman.org>
Date: Wed, 23 Aug 2023 19:18:17 -0700
Subject: [PATCH] test: add initial test makefile

This also adds support for "#" characters prior to immediates.

nop may also now appear in an op_t. The parser no longer generates nop_t--this
is instead now represented as an op_t with a zero-length ops vector.
---
 .gitignore       |  3 ++-
 ast_emitter.cpp  | 14 ++++++-------
 ast_emitter.hpp  |  5 ++---
 ast_resolver.cpp |  4 ++--
 ast_resolver.hpp |  6 +++---
 lexer.cpp        |  1 +
 main.cpp         |  7 +++----
 parser.cpp       | 20 +++++++++++++++----
 parser.hpp       |  1 +
 stmt.hpp         | 10 +++++++++-
 test/Makefile    | 52 ++++++++++++++++++++++++++++++++++++++++++++++++
 test/bin-dump.py | 12 +++++++++++
 token.hpp        |  2 ++
 13 files changed, 112 insertions(+), 25 deletions(-)
 create mode 100644 test/Makefile
 create mode 100644 test/bin-dump.py

diff --git a/.gitignore b/.gitignore
index b5e0755..215d0b0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
 main
 *.o
 *.gch
-*.d
\ No newline at end of file
+*.d
+test/*.s
\ No newline at end of file
diff --git a/ast_emitter.cpp b/ast_emitter.cpp
index 1b804de..c75881d 100644
--- a/ast_emitter.cpp
+++ b/ast_emitter.cpp
@@ -131,7 +131,7 @@ uint32_t emitter_t::visit(const op::mov_ram_a_t * mov_ram_a) const
 
 uint32_t emitter_t::visit(const op::mov_imm_d1_t * mov_imm_d1) const
 {
-  num_t value = mov_imm_d1->imm.expr->accept(this);
+  num_t value = mov_imm_d1->imm.normalize(mov_imm_d1->imm.expr->accept(this));
   if (mov_imm_d1->imm.in_range(value))
     return mov_imm_d1->code() | mov_imm_d1->bits() | value;
   else
@@ -152,7 +152,7 @@ uint32_t emitter_t::visit(const op::control_word_t * control_word) const
 
 uint32_t emitter_t::visit(const load::mvi_t * mvi) const
 {
-  num_t value = mvi->imm.expr->accept(this);
+  num_t value = mvi->imm.normalize(mvi->imm.expr->accept(this));
   if (mvi->imm.in_range(value))
     return mvi->code() | mvi->bits() | value;
   else
@@ -161,7 +161,7 @@ uint32_t emitter_t::visit(const load::mvi_t * mvi) const
 
 uint32_t emitter_t::visit(const load::mvi_cond_t * mvi_cond) const
 {
-  num_t value = mvi_cond->imm.expr->accept(this);
+  num_t value = mvi_cond->imm.normalize(mvi_cond->imm.expr->accept(this));
   if (mvi_cond->imm.in_range(value))
     return mvi_cond->code() | mvi_cond->bits() | value;
   else
@@ -170,7 +170,7 @@ uint32_t emitter_t::visit(const load::mvi_cond_t * mvi_cond) const
 
 uint32_t emitter_t::visit(const dma::src_d0_imm_t * src_d0_imm) const
 {
-  num_t value = src_d0_imm->imm.expr->accept(this);
+  num_t value = src_d0_imm->imm.normalize(src_d0_imm->imm.expr->accept(this));
   if (src_d0_imm->imm.in_range(value))
     return src_d0_imm->code() | src_d0_imm->bits() | value;
   else
@@ -179,7 +179,7 @@ uint32_t emitter_t::visit(const dma::src_d0_imm_t * src_d0_imm) const
 
 uint32_t emitter_t::visit(const dma::d0_dst_imm_t * d0_dst_imm) const
 {
-  num_t value = d0_dst_imm->imm.expr->accept(this);
+  num_t value = d0_dst_imm->imm.normalize(d0_dst_imm->imm.expr->accept(this));
   if (d0_dst_imm->imm.in_range(value))
     return d0_dst_imm->code() | d0_dst_imm->bits() | value;
   else
@@ -198,7 +198,7 @@ uint32_t emitter_t::visit(const dma::d0_dst_ram_t * d0_dst_ram) const
 
 uint32_t emitter_t::visit(const jump::jmp_t * jmp) const
 {
-  num_t value = jmp->imm.expr->accept(this);
+  num_t value = jmp->imm.normalize(jmp->imm.expr->accept(this));
   if (jmp->imm.in_range(value))
     return jmp->code() | jmp->bits() | value;
   else
@@ -207,7 +207,7 @@ uint32_t emitter_t::visit(const jump::jmp_t * jmp) const
 
 uint32_t emitter_t::visit(const jump::jmp_cond_t * jmp_cond) const
 {
-  num_t value = jmp_cond->imm.expr->accept(this);
+  num_t value = jmp_cond->imm.normalize(jmp_cond->imm.expr->accept(this));
   if (jmp_cond->imm.in_range(value))
     return jmp_cond->code() | jmp_cond->bits() | value;
   else
diff --git a/ast_emitter.hpp b/ast_emitter.hpp
index 988041a..61a179a 100644
--- a/ast_emitter.hpp
+++ b/ast_emitter.hpp
@@ -22,11 +22,10 @@ struct emitter_error_t : std::runtime_error
 
 struct emitter_t : visitor_t<uint32_t>
 {
-  emitter_t(variables_t& variables, const addresses_t& addresses)
-    : variables(variables), addresses(addresses) {}
+  emitter_t(variables_t& variables)
+    : variables(variables) {}
 
   variables_t& variables;
-  const addresses_t& addresses;
 
   uint32_t visit(const binary_t * binary) const;
   uint32_t visit(const grouping_t * grouping) const;
diff --git a/ast_resolver.cpp b/ast_resolver.cpp
index c4f6db8..e9ace90 100644
--- a/ast_resolver.cpp
+++ b/ast_resolver.cpp
@@ -174,10 +174,10 @@ void resolver_t::visit(const assign_t * assign) const
 
 void resolver_t::visit(const label_t * label) const
 {
-  if (addresses.contains(label->name.lexeme)) {
+  if (variables.contains(label->name.lexeme)) {
     throw std::runtime_error("label redefinition is not allowed");
   } else {
-    addresses.insert({label->name.lexeme, pc.value});
+    variables.insert({label->name.lexeme, pc.value});
   }
 }
 
diff --git a/ast_resolver.hpp b/ast_resolver.hpp
index 5959b33..82cb7ae 100644
--- a/ast_resolver.hpp
+++ b/ast_resolver.hpp
@@ -25,11 +25,11 @@ struct pc_t
 
 struct resolver_t : visitor_t<void>
 {
-  resolver_t(pc_t& pc, addresses_t& addresses)
-    : pc(pc), addresses(addresses) {}
+  resolver_t(pc_t& pc, variables_t& variables)
+    : pc(pc), variables(variables) {}
 
   pc_t& pc;
-  addresses_t& addresses;
+  variables_t& variables;
 
   void visit(const binary_t * binary) const;
   void visit(const grouping_t * grouping) const;
diff --git a/lexer.cpp b/lexer.cpp
index 387f85c..cbc28f9 100644
--- a/lexer.cpp
+++ b/lexer.cpp
@@ -166,6 +166,7 @@ std::optional<token_t> lexer_t::lex_token()
     case '^': return {{pos, carot, lexeme()}};
     case '=': return {{pos, equal, lexeme()}};
     case ':': return {{pos, colon, lexeme()}};
+    case '#': return {{pos, hash, lexeme()}};
     case '<':
       if (match('<')) return {{pos, left_shift, lexeme()}};
       break;
diff --git a/main.cpp b/main.cpp
index 0efa78a..d9aba38 100644
--- a/main.cpp
+++ b/main.cpp
@@ -40,15 +40,14 @@ static void run(std::ostream& os, std::string source)
   parser_t pass2(tokens);
   ast::printer_t printer(std::cout);
   ast::pc_t pc;
-  ast::addresses_t addresses;
-  ast::resolver_t resolver(pc, addresses);
+  ast::variables_t variables;
+  ast::resolver_t resolver(pc, variables);
   while (auto stmt_o = pass1.statement()) {
     (*stmt_o)->accept(&printer);
     std::cout << std::endl << std::flush;
     (*stmt_o)->accept(&resolver);
   }
-  ast::variables_t variables;
-  ast::emitter_t emitter(variables, addresses);
+  ast::emitter_t emitter(variables);
   while (auto stmt_o = pass2.statement()) {
     uint32_t output = (*stmt_o)->accept(&emitter);
     if (output != 0xffff'ffff) {
diff --git a/parser.cpp b/parser.cpp
index 1d8cda0..07c582e 100644
--- a/parser.cpp
+++ b/parser.cpp
@@ -31,6 +31,11 @@ const token_t& parser_t::peek()
   return tokens[current_ix];
 }
 
+const token_t& parser_t::peek(int n)
+{
+  return tokens[current_ix + n];
+}
+
 const token_t& parser_t::advance()
 {
   if (!at_end_p()) current_ix++;
@@ -267,6 +272,7 @@ std::optional<op::op_t *> parser_t::xyd1_bus()
       else
 	throw error(peek(), "expected x-bus, y-bus, or d-bus destination operand");
     } else {
+      match(hash); // optionally consume a hash
       uimm_t<8> imm = uimm_t<8>(peek(), expression());
       consume(comma, "expected `,`");
       if (auto dest_o = d1_dest())
@@ -284,6 +290,7 @@ std::optional<op::op_t *> parser_t::xyd1_bus()
 
 std::optional<stmt_t *> parser_t::op()
 {
+  bool saw_nop = false;
   std::vector<const op::op_t *> ops;
   std::vector<const token_t *> tokens;
 
@@ -302,11 +309,12 @@ std::optional<stmt_t *> parser_t::op()
   while (true) {
     // fixme: check for emplacement here
     const token_t& token = peek();
-    if      (auto op_o = alu()     ) emplace_op(token, *op_o);
+    if      (match(_nop))            saw_nop = 1;
+    else if (auto op_o = alu()     ) emplace_op(token, *op_o);
     else if (auto op_o = xyd1_bus()) emplace_op(token, *op_o);
     else                               break;
   }
-  if (ops.size() != 0)
+  if (ops.size() != 0 || saw_nop)
     return {new op::control_word_t(ops)};
   else
     return {};
@@ -354,6 +362,7 @@ std::optional<stmt_t *> parser_t::load()
 {
   if (match(_mvi)) {
     const token_t& expr_token = peek();
+    match(hash); // optionally consume a hash
     expr_t * expr = expression();
     consume(comma, "expected `,`");
     load::dest_t dest = parser_t::load_dest();
@@ -499,6 +508,7 @@ std::optional<stmt_t *> parser_t::dma()
       if (auto length_ram_o = dma_length_ram()) {
 	return {new dma::d0_dst_ram_t(hold, add, dst, *length_ram_o)};
       } else {
+        match(hash); // optionally consume a hash
 	uimm_t<8> imm = uimm_t<8>(peek(), expression());
 	return {new dma::d0_dst_imm_t(hold, add, dst, imm)};
       }
@@ -510,6 +520,7 @@ std::optional<stmt_t *> parser_t::dma()
       if (auto length_ram_o = dma_length_ram()) {
 	return {new dma::src_d0_ram_t(hold, add, src, *length_ram_o)};
       } else {
+        match(hash); // optionally consume a hash
 	uimm_t<8> imm = uimm_t<8>(peek(), expression());
 	return {new dma::src_d0_imm_t(hold, add, src, imm)};
       }
@@ -540,9 +551,11 @@ std::optional<stmt_t *> parser_t::jump()
   if (match(_jmp)) {
     if (auto cond_o = jump_cond()) {
       consume(comma, "expected `,` after jump condition");
+      match(hash); // optionally consume a hash
       uimm_t<8> imm = uimm_t<8>(peek(), expression());
       return {new jump::jmp_cond_t(*cond_o, imm)};
     } else {
+      match(hash); // optionally consume a hash
       uimm_t<8> imm = uimm_t<8>(peek(), expression());
       return {new jump::jmp_t(imm)};
     }
@@ -566,8 +579,7 @@ std::optional<stmt_t *> parser_t::end()
 
 std::optional<stmt_t *> parser_t::instruction()
 {
-  if      (match(_nop))          return {new nop::nop_t()};
-  else if (auto op_o = op())     return op_o;
+  if      (auto op_o = op())     return op_o;
   else if (auto load_o = load()) return load_o;
   else if (auto dma_o = dma())   return dma_o;
   else if (auto jump_o = jump()) return jump_o;
diff --git a/parser.hpp b/parser.hpp
index 58e24ff..cf43082 100644
--- a/parser.hpp
+++ b/parser.hpp
@@ -31,6 +31,7 @@ struct parser_t
 
   const token_t& previous();
   const token_t& peek();
+  const token_t& peek(int n);
   const token_t& advance();
   bool check(enum token_t::type_t token_type);
   bool match(enum token_t::type_t token_type);
diff --git a/stmt.hpp b/stmt.hpp
index 191e142..7d024f8 100644
--- a/stmt.hpp
+++ b/stmt.hpp
@@ -40,6 +40,15 @@ struct imm_t {
   static constexpr num_t max = (1L << (bits - static_cast<num_t>(sign))) - 1;
   static constexpr num_t min = sign ? -(max + 1) : 0;
 
+  num_t normalize(num_t value) const
+  {
+    if (!S && value > 2147483648) { // fixme: hack
+      return value & max;
+    } else {
+      return value;
+    }
+  }
+
   bool in_range(num_t value) const
   {
     return value <= max && value >= min;
@@ -257,7 +266,6 @@ struct control_word_t : stmt_accept_t<control_word_t>
   control_word_t(std::vector<const op::op_t *> ops)
     : ops(ops)
   {
-    if (ops.size() == 0) throw std::runtime_error("zero-length ops");
   }
 
   const std::vector<const op_t *> ops;
diff --git a/test/Makefile b/test/Makefile
new file mode 100644
index 0000000..f1e94e8
--- /dev/null
+++ b/test/Makefile
@@ -0,0 +1,52 @@
+DSPASM = dspasm.exe
+
+SRC = sample1.asm sample2a.asm sample2b.asm sample3.asm
+SRC += cmpnm.asm fbtrans.asm loop_pr.asm udiv.asm
+EXPECT = $(patsubst %.asm,expect/%.bin,$(SRC))
+ACTUAL = $(patsubst %.asm,actual/%.bin,$(SRC))
+ALL = $(EXPECT) $(ACTUAL)
+ALL_TXT = $(patsubst %.bin,%.txt,$(ALL))
+
+all: $(ALL)
+all-txt: $(ALL_TXT)
+
+%.s: %.asm
+	@test -f $(DSPASM) || (echo $(DSPASM) does not exist--set the DSPASM make variable; exit 1)
+	@rm -f $@
+	echo '[autoexec]' > $@.conf
+	echo 'mount C $(dir $(DSPASM))' >> $@.conf
+	echo 'mount D $(dir $<)' >> $@.conf
+	echo 'D:' >> $@.conf
+	echo 'C:\$(notdir $(DSPASM)) $(notdir $<) $(notdir $@)' >> $@.conf
+	echo 'exit' >> $@.conf
+	dosbox -conf $@.conf
+	@rm $@.conf
+	mv $(shell echo '$@' | tr '[:lower:]' '[:upper:]') $@
+
+%.txt: %.bin
+	python bin-dump.py $< > $@
+
+%.txt: %.bin
+	python bin-dump.py $< > $@
+
+expect/%.bin: %.s
+	@mkdir -p $(dir $@)
+	srec_cat -Output $@ -Binary $<
+
+actual/%.bin: %.asm
+	@mkdir -p $(dir $@)
+	../main $< $@
+
+clean:
+	rm -f expect/*.{bin,txt} actual/*.{bin,txt} *.s
+
+.SUFFIXES:
+.INTERMEDIATE:
+.SECONDARY:
+.PHONY: all clean
+
+%: RCS/%,v
+%: RCS/%
+%: %,v
+%: s.%
+%: SCCS/s.%
diff --git a/test/bin-dump.py b/test/bin-dump.py
new file mode 100644
index 0000000..4923f34
--- /dev/null
+++ b/test/bin-dump.py
@@ -0,0 +1,12 @@
+import sys
+import struct
+
+with open(sys.argv[1], 'rb') as f:
+    b = f.read()
+
+assert len(b) % 4 == 0, len(b)
+
+for i in range(len(b) // 4):
+    word = b[i*4:i*4+4]
+    n, = struct.unpack('>I', word)
+    print(f'{n:>032b}')
diff --git a/token.hpp b/token.hpp
index 636d464..d6bf8c8 100644
--- a/token.hpp
+++ b/token.hpp
@@ -25,6 +25,7 @@ struct token_t {
 
     comma,
     dot,
+    hash,
 
     // operators
     plus,
@@ -81,6 +82,7 @@ struct token_t {
 
     case comma        : return os << "COMMA";
     case dot          : return os << "DOT";
+    case hash         : return os << "HASH";
 
     // operators
     case plus         : return os << "PLUS";