From 29428c7a92356c33c02392b8e60139c43b9dfb6b Mon Sep 17 00:00:00 2001
From: Zack Buhman <zack@buhman.org>
Date: Mon, 14 Aug 2023 16:52:56 +0000
Subject: [PATCH] initial

---
 .gitignore          |   4 +
 Makefile            |  33 ++++
 build_radix_tree.py |  99 ++++++++++++
 keyword.hpp         | 386 ++++++++++++++++++++++++++++++++++++++++++++
 lexer.cpp           | 218 +++++++++++++++++++++++++
 lexer.hpp           |   1 +
 main.cpp            |  64 ++++++++
 num.hpp             |   5 +
 token.hpp           | 194 ++++++++++++++++++++++
 9 files changed, 1004 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 Makefile
 create mode 100644 build_radix_tree.py
 create mode 100644 keyword.hpp
 create mode 100644 lexer.cpp
 create mode 100644 lexer.hpp
 create mode 100644 main.cpp
 create mode 100644 num.hpp
 create mode 100644 token.hpp
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..b5e0755
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,4 @@
+main
+*.o
+*.gch
+*.d
\ No newline at end of file
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..c4011ba
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,33 @@
+CXXFLAGS = -Og -g -Wall -Wextra -Werror -Wfatal-errors -Wpedantic -std=c++20
+LDFLAGS =
+
+TARGET =
+CXX = $(TARGET)g++
+
+SRC = main.cpp
+OBJ = $(patsubst %.cpp,%.o,$(SRC))
+DEP = $(patsubst %.cpp,%.d,$(SRC))
+
+all: main
+
+-include $(DEP)
+
+%.o: %.cpp
+	$(CXX) $(CXXFLAGS) -MMD -MF $(basename $<).d -c $< -o $@
+
+main: $(OBJ)
+	$(CXX) $(LDFLAGS) $^ -o $@
+
+clean:
+	rm -f *.o *.d *.gch
+
+.SUFFIXES:
+.INTERMEDIATE:
+.SECONDARY:
+.PHONY: all clean
+
+%: RCS/%,v
+%: RCS/%
+%: %,v
+%: s.%
+%: SCCS/s.%
diff --git a/build_radix_tree.py b/build_radix_tree.py
new file mode 100644
index 0000000..3efc81e
--- /dev/null
+++ b/build_radix_tree.py
@@ -0,0 +1,99 @@
+def build_radix_tree(ops: list[str]) -> dict:
+    root = dict()
+    for op in ops:
+        d = root
+        for i in range(len(op)):
+            if op[i] not in d:
+                d[op[i]] = (None,{})
+            if i == (len(op) - 1):
+                d[op[i]] = (op,d[op[i]][1])
+            else:
+                d = d[op[i]][1]
+    return root
+
+def indent(i):
+    return " " * (2 * i)
+
+def print_switch(d, level=0):
+    p = print
+    inden0 = indent(level+0)
+    inden1 = indent(level+1)
+    inden2 = indent(level+2)
+    p(inden0 + "switch (s[ix++]) {")
+    for key, (terminal, children) in d.items():
+        if key.upper() != key.lower():
+            p(inden0 + f"case '{key.upper()}': [[fallthrough]];")
+        p(inden0 + f"case '{key.lower()}':")
+        if terminal is not None:
+            p(inden1 + f"if (ix == s.length()) return {{ token::type_t::_{terminal} }};")
+            if children:
+                p(inden1 + "else {")
+        else:
+            if children:
+                p(inden1 + "if (ix < s.length()) {")
+        if children:
+            print_switch(children, level+2)
+            p(inden1 + "}")
+        p(inden1 + "break;")
+    p(inden0 + "}")
+
+def print_keyword_func(root):
+    p = print
+    inden1 = indent(1)
+    p("#include <optional>")
+    p('#include "token.hpp"')
+    p()
+    p("namespace dsp {")
+    p()
+    p("struct keyword {")
+    p()
+    p("inline static constexpr std::optional<enum token::type_t>")
+    p("find(const std::string_view s)")
+    p("{")
+    p(inden1 + "if (s.length() == 0) { return {}; }")
+    p()
+    p(inden1 + "std::string_view::size_type ix = 0;")
+    p()
+    print_switch(root, level=1)
+    p(inden1 + "return {};")
+    p("}")
+    p()
+    p("};")
+    p()
+    p("}")
+
+from pprint import pprint
+d = build_radix_tree([
+    "alh",
+    "all",
+    "alu",
+    "m0",  "m1",  "m2", "m3",
+    "mc0", "mc1", "mc2", "mc3",
+    "mul",
+    "nop",
+    "and",
+    "or",
+    "xor",
+    "add",
+    "sub",
+    "ad2",
+    "sr",
+    "rr",
+    "sl",
+    "rl",
+    "rl8",
+    "clr",
+    "mov",
+    "mvi",
+    "dma",
+    "dmah",
+    "jmp",
+    "btm",
+    "lps",
+    "end",
+    "endi",
+    "equ",
+    "org",
+    "ends",
+])
+print_keyword_func(d)
diff --git a/keyword.hpp b/keyword.hpp
new file mode 100644
index 0000000..90c16ee
--- /dev/null
+++ b/keyword.hpp
@@ -0,0 +1,386 @@
+#include <optional>
+#include "token.hpp"
+
+namespace dsp {
+
+struct keyword {
+
+inline static constexpr std::optional<enum token::type_t>
+find(const std::string_view s)
+{
+  if (s.length() == 0) { return {}; }
+
+  std::string_view::size_type ix = 0;
+
+  switch (s[ix++]) {
+  case 'A': [[fallthrough]];
+  case 'a':
+    if (ix < s.length()) {
+      switch (s[ix++]) {
+      case 'L': [[fallthrough]];
+      case 'l':
+        if (ix < s.length()) {
+          switch (s[ix++]) {
+          case 'H': [[fallthrough]];
+          case 'h':
+            if (ix == s.length()) return { token::type_t::_alh };
+            break;
+          case 'L': [[fallthrough]];
+          case 'l':
+            if (ix == s.length()) return { token::type_t::_all };
+            break;
+          case 'U': [[fallthrough]];
+          case 'u':
+            if (ix == s.length()) return { token::type_t::_alu };
+            break;
+          }
+        }
+        break;
+      case 'N': [[fallthrough]];
+      case 'n':
+        if (ix < s.length()) {
+          switch (s[ix++]) {
+          case 'D': [[fallthrough]];
+          case 'd':
+            if (ix == s.length()) return { token::type_t::_and };
+            break;
+          }
+        }
+        break;
+      case 'D': [[fallthrough]];
+      case 'd':
+        if (ix < s.length()) {
+          switch (s[ix++]) {
+          case 'D': [[fallthrough]];
+          case 'd':
+            if (ix == s.length()) return { token::type_t::_add };
+            break;
+          case '2':
+            if (ix == s.length()) return { token::type_t::_ad2 };
+            break;
+          }
+        }
+        break;
+      }
+    }
+    break;
+  case 'M': [[fallthrough]];
+  case 'm':
+    if (ix < s.length()) {
+      switch (s[ix++]) {
+      case '0':
+        if (ix == s.length()) return { token::type_t::_m0 };
+        break;
+      case '1':
+        if (ix == s.length()) return { token::type_t::_m1 };
+        break;
+      case '2':
+        if (ix == s.length()) return { token::type_t::_m2 };
+        break;
+      case '3':
+        if (ix == s.length()) return { token::type_t::_m3 };
+        break;
+      case 'C': [[fallthrough]];
+      case 'c':
+        if (ix < s.length()) {
+          switch (s[ix++]) {
+          case '0':
+            if (ix == s.length()) return { token::type_t::_mc0 };
+            break;
+          case '1':
+            if (ix == s.length()) return { token::type_t::_mc1 };
+            break;
+          case '2':
+            if (ix == s.length()) return { token::type_t::_mc2 };
+            break;
+          case '3':
+            if (ix == s.length()) return { token::type_t::_mc3 };
+            break;
+          }
+        }
+        break;
+      case 'U': [[fallthrough]];
+      case 'u':
+        if (ix < s.length()) {
+          switch (s[ix++]) {
+          case 'L': [[fallthrough]];
+          case 'l':
+            if (ix == s.length()) return { token::type_t::_mul };
+            break;
+          }
+        }
+        break;
+      case 'O': [[fallthrough]];
+      case 'o':
+        if (ix < s.length()) {
+          switch (s[ix++]) {
+          case 'V': [[fallthrough]];
+          case 'v':
+            if (ix == s.length()) return { token::type_t::_mov };
+            break;
+          }
+        }
+        break;
+      case 'V': [[fallthrough]];
+      case 'v':
+        if (ix < s.length()) {
+          switch (s[ix++]) {
+          case 'I': [[fallthrough]];
+          case 'i':
+            if (ix == s.length()) return { token::type_t::_mvi };
+            break;
+          }
+        }
+        break;
+      }
+    }
+    break;
+  case 'N': [[fallthrough]];
+  case 'n':
+    if (ix < s.length()) {
+      switch (s[ix++]) {
+      case 'O': [[fallthrough]];
+      case 'o':
+        if (ix < s.length()) {
+          switch (s[ix++]) {
+          case 'P': [[fallthrough]];
+          case 'p':
+            if (ix == s.length()) return { token::type_t::_nop };
+            break;
+          }
+        }
+        break;
+      }
+    }
+    break;
+  case 'O': [[fallthrough]];
+  case 'o':
+    if (ix < s.length()) {
+      switch (s[ix++]) {
+      case 'R': [[fallthrough]];
+      case 'r':
+        if (ix == s.length()) return { token::type_t::_or };
+        else {
+          switch (s[ix++]) {
+          case 'G': [[fallthrough]];
+          case 'g':
+            if (ix == s.length()) return { token::type_t::_org };
+            break;
+          }
+        }
+        break;
+      }
+    }
+    break;
+  case 'X': [[fallthrough]];
+  case 'x':
+    if (ix < s.length()) {
+      switch (s[ix++]) {
+      case 'O': [[fallthrough]];
+      case 'o':
+        if (ix < s.length()) {
+          switch (s[ix++]) {
+          case 'R': [[fallthrough]];
+          case 'r':
+            if (ix == s.length()) return { token::type_t::_xor };
+            break;
+          }
+        }
+        break;
+      }
+    }
+    break;
+  case 'S': [[fallthrough]];
+  case 's':
+    if (ix < s.length()) {
+      switch (s[ix++]) {
+      case 'U': [[fallthrough]];
+      case 'u':
+        if (ix < s.length()) {
+          switch (s[ix++]) {
+          case 'B': [[fallthrough]];
+          case 'b':
+            if (ix == s.length()) return { token::type_t::_sub };
+            break;
+          }
+        }
+        break;
+      case 'R': [[fallthrough]];
+      case 'r':
+        if (ix == s.length()) return { token::type_t::_sr };
+        break;
+      case 'L': [[fallthrough]];
+      case 'l':
+        if (ix == s.length()) return { token::type_t::_sl };
+        break;
+      }
+    }
+    break;
+  case 'R': [[fallthrough]];
+  case 'r':
+    if (ix < s.length()) {
+      switch (s[ix++]) {
+      case 'R': [[fallthrough]];
+      case 'r':
+        if (ix == s.length()) return { token::type_t::_rr };
+        break;
+      case 'L': [[fallthrough]];
+      case 'l':
+        if (ix == s.length()) return { token::type_t::_rl };
+        else {
+          switch (s[ix++]) {
+          case '8':
+            if (ix == s.length()) return { token::type_t::_rl8 };
+            break;
+          }
+        }
+        break;
+      }
+    }
+    break;
+  case 'C': [[fallthrough]];
+  case 'c':
+    if (ix < s.length()) {
+      switch (s[ix++]) {
+      case 'L': [[fallthrough]];
+      case 'l':
+        if (ix < s.length()) {
+          switch (s[ix++]) {
+          case 'R': [[fallthrough]];
+          case 'r':
+            if (ix == s.length()) return { token::type_t::_clr };
+            break;
+          }
+        }
+        break;
+      }
+    }
+    break;
+  case 'D': [[fallthrough]];
+  case 'd':
+    if (ix < s.length()) {
+      switch (s[ix++]) {
+      case 'M': [[fallthrough]];
+      case 'm':
+        if (ix < s.length()) {
+          switch (s[ix++]) {
+          case 'A': [[fallthrough]];
+          case 'a':
+            if (ix == s.length()) return { token::type_t::_dma };
+            else {
+              switch (s[ix++]) {
+              case 'H': [[fallthrough]];
+              case 'h':
+                if (ix == s.length()) return { token::type_t::_dmah };
+                break;
+              }
+            }
+            break;
+          }
+        }
+        break;
+      }
+    }
+    break;
+  case 'J': [[fallthrough]];
+  case 'j':
+    if (ix < s.length()) {
+      switch (s[ix++]) {
+      case 'M': [[fallthrough]];
+      case 'm':
+        if (ix < s.length()) {
+          switch (s[ix++]) {
+          case 'P': [[fallthrough]];
+          case 'p':
+            if (ix == s.length()) return { token::type_t::_jmp };
+            break;
+          }
+        }
+        break;
+      }
+    }
+    break;
+  case 'B': [[fallthrough]];
+  case 'b':
+    if (ix < s.length()) {
+      switch (s[ix++]) {
+      case 'T': [[fallthrough]];
+      case 't':
+        if (ix < s.length()) {
+          switch (s[ix++]) {
+          case 'M': [[fallthrough]];
+          case 'm':
+            if (ix == s.length()) return { token::type_t::_btm };
+            break;
+          }
+        }
+        break;
+      }
+    }
+    break;
+  case 'L': [[fallthrough]];
+  case 'l':
+    if (ix < s.length()) {
+      switch (s[ix++]) {
+      case 'P': [[fallthrough]];
+      case 'p':
+        if (ix < s.length()) {
+          switch (s[ix++]) {
+          case 'S': [[fallthrough]];
+          case 's':
+            if (ix == s.length()) return { token::type_t::_lps };
+            break;
+          }
+        }
+        break;
+      }
+    }
+    break;
+  case 'E': [[fallthrough]];
+  case 'e':
+    if (ix < s.length()) {
+      switch (s[ix++]) {
+      case 'N': [[fallthrough]];
+      case 'n':
+        if (ix < s.length()) {
+          switch (s[ix++]) {
+          case 'D': [[fallthrough]];
+          case 'd':
+            if (ix == s.length()) return { token::type_t::_end };
+            else {
+              switch (s[ix++]) {
+              case 'I': [[fallthrough]];
+              case 'i':
+                if (ix == s.length()) return { token::type_t::_endi };
+                break;
+              case 'S': [[fallthrough]];
+              case 's':
+                if (ix == s.length()) return { token::type_t::_ends };
+                break;
+              }
+            }
+            break;
+          }
+        }
+        break;
+      case 'Q': [[fallthrough]];
+      case 'q':
+        if (ix < s.length()) {
+          switch (s[ix++]) {
+          case 'U': [[fallthrough]];
+          case 'u':
+            if (ix == s.length()) return { token::type_t::_equ };
+            break;
+          }
+        }
+        break;
+      }
+    }
+    break;
+  }
+  return {};
+}
+
+};
+
+}
diff --git a/lexer.cpp b/lexer.cpp
new file mode 100644
index 0000000..a9fe88c
--- /dev/null
+++ b/lexer.cpp
@@ -0,0 +1,218 @@
+#include <string_view>
+#include <functional>
+#include <cstdint>
+#include <optional>
+
+#include "token.hpp"
+#include "num.hpp"
+#include "lexer.hpp"
+#include "keyword.hpp"
+
+namespace dsp {
+
+template <typename N>
+constexpr static N parse_digit(const char c)
+{
+  switch (c) {
+  default: [[fallthrough]];
+  case '0': return 0;
+  case '1': return 1;
+  case '2': return 2;
+  case '3': return 3;
+  case '4': return 4;
+  case '5': return 5;
+  case '6': return 6;
+  case '7': return 7;
+  case '8': return 8;
+  case '9': return 9;
+  case 'a': return 10;
+  case 'b': return 11;
+  case 'c': return 12;
+  case 'd': return 13;
+  case 'e': return 14;
+  case 'f': return 15;
+  case 'A': return 10;
+  case 'B': return 11;
+  case 'C': return 12;
+  case 'D': return 13;
+  case 'E': return 14;
+  case 'F': return 15;
+  }
+}
+
+template <typename N, int base>
+constexpr static N parse_number(const std::string_view s)
+{
+  N n = 0;
+  for (std::string_view::size_type ix = 0; ix < s.length(); ix++) {
+    n *= base;
+    n += parse_digit<N>(s[ix]);
+  }
+
+  return n;
+}
+
+struct dec_t {
+  constexpr static bool pred(const char c)
+  {
+    return c >= '0' && c <= '9';
+  }
+
+  template <typename N>
+  constexpr static token_t<N> parse(const std::string_view s)
+  {
+    return parse_number<N, 10>(s);
+  }
+};
+
+struct hex_t {
+  constexpr static bool pred(const char c)
+  {
+    return dec_t::pred(c)
+      || (c >= 'a' && c <= 'f')
+      || (c >= 'A' && c <= 'F');
+  }
+
+  template <typename N>
+  constexpr static token_t<N> parse(const std::string_view s)
+  {
+    return parse_number<N, 16>(s);
+  }
+};
+
+constexpr bool alpha_p(const char c)
+{
+  return (c >= 'a' && c <= 'z')
+    || (c >= 'A' && c <= 'Z');
+}
+
+constexpr bool alpha_numeric_p(const char c)
+{
+  return alpha_p(c) || dec_t::pred(c) || (c == '_');
+}
+
+struct lexer_t {
+  const std::string_view source;
+  std::string_view::size_type start_ix;
+  std::string_view::size_type current_ix;
+  token_pos_t pos;
+
+  lexer_t() = delete;
+
+  constexpr lexer_t(const std::string_view source)
+    : source(source), start_ix(0), pos{ .line = 1, .col = 0}
+  { }
+
+  bool at_end_p()
+  {
+    return current_ix >= source.length();
+  }
+
+  char peek()
+  {
+    if (at_end_p()) return '\0';
+    return source[current_ix];
+  }
+
+  bool match(const char expected)
+  {
+    if (at_end_p()) return false;
+    else if (source[current_ix] != expected) return false;
+    pos.col++;
+    current_ix++;
+    return true;
+  }
+
+  char advance()
+  {
+    pos.col++;
+    return source[current_ix++];
+  }
+
+  const std::string_view lexeme()
+  {
+    return source.substr(start_ix, current_ix);
+  }
+
+  template <typename T>
+  token _number()
+  {
+    while (T::pred(peek())) advance();
+
+    return {pos, token::number, lexeme(), T::parse(lexeme())};
+  }
+
+  token _identifier()
+  {
+    while (alpha_numeric_p(peek())) advance();
+    std::optional<enum token::type_t> keyword = keyword::find(lexeme());
+    if (keyword) return {pos, *keyword, lexeme()};
+    else         return {pos, token::identifier, lexeme()};
+  }
+
+  token scan_token()
+  {
+    using enum token::type_t;
+
+    start_ix = current_ix;
+
+    const char c = advance();
+    switch (c) {
+    case '(': return {pos, left_paren, lexeme()};
+    case ')': return {pos, right_paren, lexeme()};
+    case ',': return {pos, comma, lexeme()};
+    case '.': return {pos, dot, lexeme()};
+    case '+': return {pos, plus, lexeme()};
+    case '-': return {pos, minus, lexeme()};
+    case '*': return {pos, star, lexeme()};
+    case '/': return {pos, slash, lexeme()};
+    case '%': return {pos, percent, lexeme()};
+    case '~': return {pos, tilde, lexeme()};
+    case '&': return {pos, ampersand, lexeme()};
+    case '|': return {pos, bar, lexeme()};
+    case '^': return {pos, carot, lexeme()};
+    case '<':
+      if (match('<')) return {pos, left_shift, lexeme()};
+      break;
+    case '>':
+      if (match('>')) return {pos, right_shift, lexeme()};
+      break;
+    case ';':
+      while (!at_end_p() && peek() != '\n') advance();
+      break;
+    case ' ':
+    case '\r':
+    case '\t':
+      break;
+    case '\n':
+      pos.line++;
+      pos.col = 0;
+      break;
+    case '$':
+      if (hex_t::pred(peek())) {
+	start_ix += 1;
+	return _number<hex_t>();
+      }
+      [[fallthrough]];
+    case '0':
+      if (match('x')) {
+	if (hex_t::pred(peek())) {
+	  start_ix += 2;
+	  return _number<hex_t>();
+	}
+      }
+      [[fallthrough]];
+    default:
+      if (dec_t::pred(c)) {
+	return _number<dec_t>();
+      } else if (alpha_p(c)) {
+	return _identifier();
+      } else {
+	//error(pos.line, "Unexpected character.");
+      }
+      break;
+    }
+  }
+};
+
+}
diff --git a/lexer.hpp b/lexer.hpp
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/lexer.hpp
@@ -0,0 +1 @@
+
diff --git a/main.cpp b/main.cpp
new file mode 100644
index 0000000..475a1f3
--- /dev/null
+++ b/main.cpp
@@ -0,0 +1,64 @@
+#include <iostream>
+#include <fstream>
+#include <string>
+
+#include "token.hpp"
+
+static bool had_error = false;
+
+static void report(int line, std::string where, std::string message)
+{
+  std::cerr << "[line " << line << "] Error" << where << ": " << message;
+  had_error = true;
+}
+
+void error(int line, std::string message)
+{
+  report(line, "", message);
+}
+
+static void run(std::string source)
+{
+  std::string_view buf {source};
+  (void)buf;
+}
+
+static void run_prompt()
+{
+  constexpr auto prompt = "> ";
+  std::string line;
+  std::cout << prompt << std::flush;
+  while (std::getline(std::cin, line)) {
+    run(line);
+    std::cout << prompt << std::flush;
+  }
+}
+
+static int run_file(char const * const filename)
+{
+  std::ifstream is {filename, std::ios::binary | std::ios::ate};
+  if (!is.is_open()) {
+    std::cerr << "failed to open " << filename << std::endl;
+    return -1;
+  }
+  const std::streampos size = is.tellg();
+  std::string buf(size, '\0');
+  is.seekg(0);
+  if (!is.read(&buf[0], size)) {
+    std::cerr << "read failed" << std::endl;
+    return -1;
+  }
+  run(buf);
+  return had_error;
+}
+
+int main(const int argc, char const * const argv[])
+{
+  switch (argc) {
+  case 1: run_prompt(); return had_error;
+  case 2: return run_file(argv[1]);
+  default:
+    std::cerr << "Usage: " << argv[0] << " [filename]" << std::endl;
+    return -1;
+  }
+}
diff --git a/num.hpp b/num.hpp
new file mode 100644
index 0000000..e52fb35
--- /dev/null
+++ b/num.hpp
@@ -0,0 +1,5 @@
+#pragma once
+
+#include <cstdint>
+
+using num_t = int64_t;
diff --git a/token.hpp b/token.hpp
new file mode 100644
index 0000000..91edfd3
--- /dev/null
+++ b/token.hpp
@@ -0,0 +1,194 @@
+#pragma once
+
+#include <string_view>
+#include <ostream>
+#include <variant>
+
+#include "num.hpp"
+
+namespace dsp {
+
+struct object_t {
+};
+
+struct token_pos_t {
+  int line;
+  int col;
+};
+
+template <typename N>
+struct token_t {
+  enum type_t {
+    left_paren,
+    right_paren,
+
+    comma,
+    dot,
+
+    // operators
+    plus,
+    minus,
+    star,
+    slash,
+    percent,
+    tilde,
+    ampersand,
+    bar,
+    carot,
+    left_shift,
+    right_shift,
+    equal,
+
+    // literals
+    identifier,
+    string,
+    number,
+
+    // keywords
+    _alh,
+    _all,
+    _alu,
+    _m0,
+    _m1,
+    _m2,
+    _m3,
+    _mc0,
+    _mc1,
+    _mc2,
+    _mc3,
+    _mul,
+    _nop,
+    _and,
+    _or,
+    _xor,
+    _add,
+    _sub,
+    _ad2,
+    _sr,
+    _rr,
+    _sl,
+    _rl,
+    _rl8,
+    _clr,
+    _mov,
+    _mvi,
+    _dma,
+    _dmah,
+    _jmp,
+    _btm,
+    _lps,
+    _end,
+    _endi,
+    _equ,
+    _org,
+    _ends,
+
+    eof,
+  };
+
+  using literal_t = std::variant<std::monostate, N>;
+
+  const token_pos_t pos;
+  const type_t type;
+  const std::string_view lexeme;
+  const literal_t literal;
+
+  token_t() = delete;
+
+  constexpr token_t(token_pos_t pos, type_t type, const std::string_view lexeme, N number)
+    : pos(pos), type(type), lexeme(lexeme), literal(number)
+  { }
+
+  constexpr token_t(token_pos_t pos, type_t type, const std::string_view lexeme)
+    : pos(pos), type(type), lexeme(lexeme), literal()
+  { }
+
+  friend std::ostream& operator<<(std::ostream& os, const enum token_t<N>::type_t type)
+  {
+    switch (type) {
+    case left_paren   : return os << "LEFT_PAREN";
+    case right_paren  : return os << "RIGHT_PAREN";
+
+    case comma        : return os << "COMMA";
+    case dot          : return os << "DOT";
+
+      // operators
+    case plus         : return os << "PLUS";
+    case minus        : return os << "MINUS";
+    case star         : return os << "STAR";
+    case slash        : return os << "SLASH";
+    case percent      : return os << "PERCENT";
+    case tilde        : return os << "TILDE";
+    case ampersand    : return os << "AMPERSAND";
+    case bar          : return os << "BAR";
+    case carot        : return os << "CAROT";
+    case left_shift   : return os << "LEFT_SHIFT";
+    case right_shift  : return os << "RIGHT_SHIFT";
+    case equal        : return os << "EQUAL";
+
+      // literals
+    case identifier   : return os << "IDENTIFIER";
+    case string       : return os << "STRING";
+    case number       : return os << "NUMBER";
+
+      // keywords
+    case _alh         : return os << "ALH";
+    case _all         : return os << "ALL";
+    case _alu         : return os << "ALU";
+    case _m0          : return os << "M0";
+    case _m1          : return os << "M1";
+    case _m2          : return os << "M2";
+    case _m3          : return os << "M3";
+    case _mc0         : return os << "MC0";
+    case _mc1         : return os << "MC1";
+    case _mc2         : return os << "MC2";
+    case _mc3         : return os << "MC3";
+    case _mul         : return os << "MUL";
+    case _nop         : return os << "NOP";
+    case _and         : return os << "AND";
+    case _or          : return os << "OR";
+    case _xor         : return os << "XOR";
+    case _add         : return os << "ADD";
+    case _sub         : return os << "SUB";
+    case _ad2         : return os << "AD2";
+    case _sr          : return os << "SR";
+    case _rr          : return os << "RR";
+    case _sl          : return os << "SL";
+    case _rl          : return os << "RL";
+    case _rl8         : return os << "RL8";
+    case _clr         : return os << "CLR";
+    case _mov         : return os << "MOV";
+    case _mvi         : return os << "MVI";
+    case _dma         : return os << "DMA";
+    case _dmah        : return os << "DMAH";
+    case _jmp         : return os << "JMP";
+    case _btm         : return os << "BTM";
+    case _lps         : return os << "LPS";
+    case _end         : return os << "END";
+    case _endi        : return os << "ENDI";
+    case _equ         : return os << "EQU";
+    case _org         : return os << "ORG";
+    case _ends        : return os << "ENDS";
+
+    case eof          : return os << "EOF";
+    }
+    __builtin_unreachable();
+  }
+
+  friend std::ostream& operator<<(std::ostream& os, const token_t& token)
+  {
+    os << token.type << ' ' << token.lexeme;
+
+    if (auto* v = std::get_if<N>(&token.literal)) {
+      os << '/' << *v;
+    } else { // std::monostate
+    }
+
+    return os;
+  }
+
+};
+
+}
+
+using token = dsp::token_t<num_t>;