initial

2023-08-14 16:52:56 +00:00 · 2023-08-14 16:52:56 +00:00 · 29428c7a92
commit 29428c7a92
9 changed files with 1004 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,4 @@
 main
 *.o
 *.gch
 *.d
--- a/33
+++ b/33
@ -0,0 +1,33 @@
 CXXFLAGS = -Og -g -Wall -Wextra -Werror -Wfatal-errors -Wpedantic -std=c++20
 LDFLAGS =
 TARGET =
 CXX = $(TARGET)g++
 SRC = main.cpp
 OBJ = $(patsubst %.cpp,%.o,$(SRC))
 DEP = $(patsubst %.cpp,%.d,$(SRC))
 all: main
 -include $(DEP)
 %.o: %.cpp
 	$(CXX) $(CXXFLAGS) -MMD -MF $(basename $<).d -c $< -o $@
 main: $(OBJ)
 	$(CXX) $(LDFLAGS) $^ -o $@
 clean:
 	rm -f *.o *.d *.gch
 .SUFFIXES:
 .INTERMEDIATE:
 .SECONDARY:
 .PHONY: all clean
 %: RCS/%,v
 %: RCS/%
 %: %,v
 %: s.%
 %: SCCS/s.%
--- a/build_radix_tree.py
+++ b/build_radix_tree.py
@ -0,0 +1,99 @@
 def build_radix_tree(ops: list[str]) -> dict:
    root = dict()
    for op in ops:
        d = root
        for i in range(len(op)):
            if op[i] not in d:
                d[op[i]] = (None,{})
            if i == (len(op) - 1):
                d[op[i]] = (op,d[op[i]][1])
            else:
                d = d[op[i]][1]
    return root
 def indent(i):
    return " " * (2 * i)
 def print_switch(d, level=0):
    p = print
    inden0 = indent(level+0)
    inden1 = indent(level+1)
    inden2 = indent(level+2)
    p(inden0 + "switch (s[ix++]) {")
    for key, (terminal, children) in d.items():
        if key.upper() != key.lower():
            p(inden0 + f"case '{key.upper()}': [[fallthrough]];")
        p(inden0 + f"case '{key.lower()}':")
        if terminal is not None:
            p(inden1 + f"if (ix == s.length()) return {{ token::type_t::_{terminal} }};")
            if children:
                p(inden1 + "else {")
        else:
            if children:
                p(inden1 + "if (ix < s.length()) {")
        if children:
            print_switch(children, level+2)
            p(inden1 + "}")
        p(inden1 + "break;")
    p(inden0 + "}")
 def print_keyword_func(root):
    p = print
    inden1 = indent(1)
    p("#include <optional>")
    p('#include "token.hpp"')
    p()
    p("namespace dsp {")
    p()
    p("struct keyword {")
    p()
    p("inline static constexpr std::optional<enum token::type_t>")
    p("find(const std::string_view s)")
    p("{")
    p(inden1 + "if (s.length() == 0) { return {}; }")
    p()
    p(inden1 + "std::string_view::size_type ix = 0;")
    p()
    print_switch(root, level=1)
    p(inden1 + "return {};")
    p("}")
    p()
    p("};")
    p()
    p("}")
 from pprint import pprint
 d = build_radix_tree([
    "alh",
    "all",
    "alu",
    "m0",  "m1",  "m2", "m3",
    "mc0", "mc1", "mc2", "mc3",
    "mul",
    "nop",
    "and",
    "or",
    "xor",
    "add",
    "sub",
    "ad2",
    "sr",
    "rr",
    "sl",
    "rl",
    "rl8",
    "clr",
    "mov",
    "mvi",
    "dma",
    "dmah",
    "jmp",
    "btm",
    "lps",
    "end",
    "endi",
    "equ",
    "org",
    "ends",
 ])
 print_keyword_func(d)
--- a/keyword.hpp
+++ b/keyword.hpp
@ -0,0 +1,386 @@
 #include <optional>
 #include "token.hpp"
 namespace dsp {
 struct keyword {
 inline static constexpr std::optional<enum token::type_t>
 find(const std::string_view s)
 {
  if (s.length() == 0) { return {}; }
  std::string_view::size_type ix = 0;
  switch (s[ix++]) {
  case 'A': [[fallthrough]];
  case 'a':
    if (ix < s.length()) {
      switch (s[ix++]) {
      case 'L': [[fallthrough]];
      case 'l':
        if (ix < s.length()) {
          switch (s[ix++]) {
          case 'H': [[fallthrough]];
          case 'h':
            if (ix == s.length()) return { token::type_t::_alh };
            break;
          case 'L': [[fallthrough]];
          case 'l':
            if (ix == s.length()) return { token::type_t::_all };
            break;
          case 'U': [[fallthrough]];
          case 'u':
            if (ix == s.length()) return { token::type_t::_alu };
            break;
          }
        }
        break;
      case 'N': [[fallthrough]];
      case 'n':
        if (ix < s.length()) {
          switch (s[ix++]) {
          case 'D': [[fallthrough]];
          case 'd':
            if (ix == s.length()) return { token::type_t::_and };
            break;
          }
        }
        break;
      case 'D': [[fallthrough]];
      case 'd':
        if (ix < s.length()) {
          switch (s[ix++]) {
          case 'D': [[fallthrough]];
          case 'd':
            if (ix == s.length()) return { token::type_t::_add };
            break;
          case '2':
            if (ix == s.length()) return { token::type_t::_ad2 };
            break;
          }
        }
        break;
      }
    }
    break;
  case 'M': [[fallthrough]];
  case 'm':
    if (ix < s.length()) {
      switch (s[ix++]) {
      case '0':
        if (ix == s.length()) return { token::type_t::_m0 };
        break;
      case '1':
        if (ix == s.length()) return { token::type_t::_m1 };
        break;
      case '2':
        if (ix == s.length()) return { token::type_t::_m2 };
        break;
      case '3':
        if (ix == s.length()) return { token::type_t::_m3 };
        break;
      case 'C': [[fallthrough]];
      case 'c':
        if (ix < s.length()) {
          switch (s[ix++]) {
          case '0':
            if (ix == s.length()) return { token::type_t::_mc0 };
            break;
          case '1':
            if (ix == s.length()) return { token::type_t::_mc1 };
            break;
          case '2':
            if (ix == s.length()) return { token::type_t::_mc2 };
            break;
          case '3':
            if (ix == s.length()) return { token::type_t::_mc3 };
            break;
          }
        }
        break;
      case 'U': [[fallthrough]];
      case 'u':
        if (ix < s.length()) {
          switch (s[ix++]) {
          case 'L': [[fallthrough]];
          case 'l':
            if (ix == s.length()) return { token::type_t::_mul };
            break;
          }
        }
        break;
      case 'O': [[fallthrough]];
      case 'o':
        if (ix < s.length()) {
          switch (s[ix++]) {
          case 'V': [[fallthrough]];
          case 'v':
            if (ix == s.length()) return { token::type_t::_mov };
            break;
          }
        }
        break;
      case 'V': [[fallthrough]];
      case 'v':
        if (ix < s.length()) {
          switch (s[ix++]) {
          case 'I': [[fallthrough]];
          case 'i':
            if (ix == s.length()) return { token::type_t::_mvi };
            break;
          }
        }
        break;
      }
    }
    break;
  case 'N': [[fallthrough]];
  case 'n':
    if (ix < s.length()) {
      switch (s[ix++]) {
      case 'O': [[fallthrough]];
      case 'o':
        if (ix < s.length()) {
          switch (s[ix++]) {
          case 'P': [[fallthrough]];
          case 'p':
            if (ix == s.length()) return { token::type_t::_nop };
            break;
          }
        }
        break;
      }
    }
    break;
  case 'O': [[fallthrough]];
  case 'o':
    if (ix < s.length()) {
      switch (s[ix++]) {
      case 'R': [[fallthrough]];
      case 'r':
        if (ix == s.length()) return { token::type_t::_or };
        else {
          switch (s[ix++]) {
          case 'G': [[fallthrough]];
          case 'g':
            if (ix == s.length()) return { token::type_t::_org };
            break;
          }
        }
        break;
      }
    }
    break;
  case 'X': [[fallthrough]];
  case 'x':
    if (ix < s.length()) {
      switch (s[ix++]) {
      case 'O': [[fallthrough]];
      case 'o':
        if (ix < s.length()) {
          switch (s[ix++]) {
          case 'R': [[fallthrough]];
          case 'r':
            if (ix == s.length()) return { token::type_t::_xor };
            break;
          }
        }
        break;
      }
    }
    break;
  case 'S': [[fallthrough]];
  case 's':
    if (ix < s.length()) {
      switch (s[ix++]) {
      case 'U': [[fallthrough]];
      case 'u':
        if (ix < s.length()) {
          switch (s[ix++]) {
          case 'B': [[fallthrough]];
          case 'b':
            if (ix == s.length()) return { token::type_t::_sub };
            break;
          }
        }
        break;
      case 'R': [[fallthrough]];
      case 'r':
        if (ix == s.length()) return { token::type_t::_sr };
        break;
      case 'L': [[fallthrough]];
      case 'l':
        if (ix == s.length()) return { token::type_t::_sl };
        break;
      }
    }
    break;
  case 'R': [[fallthrough]];
  case 'r':
    if (ix < s.length()) {
      switch (s[ix++]) {
      case 'R': [[fallthrough]];
      case 'r':
        if (ix == s.length()) return { token::type_t::_rr };
        break;
      case 'L': [[fallthrough]];
      case 'l':
        if (ix == s.length()) return { token::type_t::_rl };
        else {
          switch (s[ix++]) {
          case '8':
            if (ix == s.length()) return { token::type_t::_rl8 };
            break;
          }
        }
        break;
      }
    }
    break;
  case 'C': [[fallthrough]];
  case 'c':
    if (ix < s.length()) {
      switch (s[ix++]) {
      case 'L': [[fallthrough]];
      case 'l':
        if (ix < s.length()) {
          switch (s[ix++]) {
          case 'R': [[fallthrough]];
          case 'r':
            if (ix == s.length()) return { token::type_t::_clr };
            break;
          }
        }
        break;
      }
    }
    break;
  case 'D': [[fallthrough]];
  case 'd':
    if (ix < s.length()) {
      switch (s[ix++]) {
      case 'M': [[fallthrough]];
      case 'm':
        if (ix < s.length()) {
          switch (s[ix++]) {
          case 'A': [[fallthrough]];
          case 'a':
            if (ix == s.length()) return { token::type_t::_dma };
            else {
              switch (s[ix++]) {
              case 'H': [[fallthrough]];
              case 'h':
                if (ix == s.length()) return { token::type_t::_dmah };
                break;
              }
            }
            break;
          }
        }
        break;
      }
    }
    break;
  case 'J': [[fallthrough]];
  case 'j':
    if (ix < s.length()) {
      switch (s[ix++]) {
      case 'M': [[fallthrough]];
      case 'm':
        if (ix < s.length()) {
          switch (s[ix++]) {
          case 'P': [[fallthrough]];
          case 'p':
            if (ix == s.length()) return { token::type_t::_jmp };
            break;
          }
        }
        break;
      }
    }
    break;
  case 'B': [[fallthrough]];
  case 'b':
    if (ix < s.length()) {
      switch (s[ix++]) {
      case 'T': [[fallthrough]];
      case 't':
        if (ix < s.length()) {
          switch (s[ix++]) {
          case 'M': [[fallthrough]];
          case 'm':
            if (ix == s.length()) return { token::type_t::_btm };
            break;
          }
        }
        break;
      }
    }
    break;
  case 'L': [[fallthrough]];
  case 'l':
    if (ix < s.length()) {
      switch (s[ix++]) {
      case 'P': [[fallthrough]];
      case 'p':
        if (ix < s.length()) {
          switch (s[ix++]) {
          case 'S': [[fallthrough]];
          case 's':
            if (ix == s.length()) return { token::type_t::_lps };
            break;
          }
        }
        break;
      }
    }
    break;
  case 'E': [[fallthrough]];
  case 'e':
    if (ix < s.length()) {
      switch (s[ix++]) {
      case 'N': [[fallthrough]];
      case 'n':
        if (ix < s.length()) {
          switch (s[ix++]) {
          case 'D': [[fallthrough]];
          case 'd':
            if (ix == s.length()) return { token::type_t::_end };
            else {
              switch (s[ix++]) {
              case 'I': [[fallthrough]];
              case 'i':
                if (ix == s.length()) return { token::type_t::_endi };
                break;
              case 'S': [[fallthrough]];
              case 's':
                if (ix == s.length()) return { token::type_t::_ends };
                break;
              }
            }
            break;
          }
        }
        break;
      case 'Q': [[fallthrough]];
      case 'q':
        if (ix < s.length()) {
          switch (s[ix++]) {
          case 'U': [[fallthrough]];
          case 'u':
            if (ix == s.length()) return { token::type_t::_equ };
            break;
          }
        }
        break;
      }
    }
    break;
  }
  return {};
 }
 };
 }
--- a/lexer.cpp
+++ b/lexer.cpp
@ -0,0 +1,218 @@
 #include <string_view>
 #include <functional>
 #include <cstdint>
 #include <optional>
 #include "token.hpp"
 #include "num.hpp"
 #include "lexer.hpp"
 #include "keyword.hpp"
 namespace dsp {
 template <typename N>
 constexpr static N parse_digit(const char c)
 {
  switch (c) {
  default: [[fallthrough]];
  case '0': return 0;
  case '1': return 1;
  case '2': return 2;
  case '3': return 3;
  case '4': return 4;
  case '5': return 5;
  case '6': return 6;
  case '7': return 7;
  case '8': return 8;
  case '9': return 9;
  case 'a': return 10;
  case 'b': return 11;
  case 'c': return 12;
  case 'd': return 13;
  case 'e': return 14;
  case 'f': return 15;
  case 'A': return 10;
  case 'B': return 11;
  case 'C': return 12;
  case 'D': return 13;
  case 'E': return 14;
  case 'F': return 15;
  }
 }
 template <typename N, int base>
 constexpr static N parse_number(const std::string_view s)
 {
  N n = 0;
  for (std::string_view::size_type ix = 0; ix < s.length(); ix++) {
    n *= base;
    n += parse_digit<N>(s[ix]);
  }
  return n;
 }
 struct dec_t {
  constexpr static bool pred(const char c)
  {
    return c >= '0' && c <= '9';
  }
  template <typename N>
  constexpr static token_t<N> parse(const std::string_view s)
  {
    return parse_number<N, 10>(s);
  }
 };
 struct hex_t {
  constexpr static bool pred(const char c)
  {
    return dec_t::pred(c)
      || (c >= 'a' && c <= 'f')
      || (c >= 'A' && c <= 'F');
  }
  template <typename N>
  constexpr static token_t<N> parse(const std::string_view s)
  {
    return parse_number<N, 16>(s);
  }
 };
 constexpr bool alpha_p(const char c)
 {
  return (c >= 'a' && c <= 'z')
    || (c >= 'A' && c <= 'Z');
 }
 constexpr bool alpha_numeric_p(const char c)
 {
  return alpha_p(c) || dec_t::pred(c) || (c == '_');
 }
 struct lexer_t {
  const std::string_view source;
  std::string_view::size_type start_ix;
  std::string_view::size_type current_ix;
  token_pos_t pos;
  lexer_t() = delete;
  constexpr lexer_t(const std::string_view source)
    : source(source), start_ix(0), pos{ .line = 1, .col = 0}
  { }
  bool at_end_p()
  {
    return current_ix >= source.length();
  }
  char peek()
  {
    if (at_end_p()) return '\0';
    return source[current_ix];
  }
  bool match(const char expected)
  {
    if (at_end_p()) return false;
    else if (source[current_ix] != expected) return false;
    pos.col++;
    current_ix++;
    return true;
  }
  char advance()
  {
    pos.col++;
    return source[current_ix++];
  }
  const std::string_view lexeme()
  {
    return source.substr(start_ix, current_ix);
  }
  template <typename T>
  token _number()
  {
    while (T::pred(peek())) advance();
    return {pos, token::number, lexeme(), T::parse(lexeme())};
  }
  token _identifier()
  {
    while (alpha_numeric_p(peek())) advance();
    std::optional<enum token::type_t> keyword = keyword::find(lexeme());
    if (keyword) return {pos, *keyword, lexeme()};
    else         return {pos, token::identifier, lexeme()};
  }
  token scan_token()
  {
    using enum token::type_t;
    start_ix = current_ix;
    const char c = advance();
    switch (c) {
    case '(': return {pos, left_paren, lexeme()};
    case ')': return {pos, right_paren, lexeme()};
    case ',': return {pos, comma, lexeme()};
    case '.': return {pos, dot, lexeme()};
    case '+': return {pos, plus, lexeme()};
    case '-': return {pos, minus, lexeme()};
    case '*': return {pos, star, lexeme()};
    case '/': return {pos, slash, lexeme()};
    case '%': return {pos, percent, lexeme()};
    case '~': return {pos, tilde, lexeme()};
    case '&': return {pos, ampersand, lexeme()};
    case '|': return {pos, bar, lexeme()};
    case '^': return {pos, carot, lexeme()};
    case '<':
      if (match('<')) return {pos, left_shift, lexeme()};
      break;
    case '>':
      if (match('>')) return {pos, right_shift, lexeme()};
      break;
    case ';':
      while (!at_end_p() && peek() != '\n') advance();
      break;
    case ' ':
    case '\r':
    case '\t':
      break;
    case '\n':
      pos.line++;
      pos.col = 0;
      break;
    case '$':
      if (hex_t::pred(peek())) {
 	start_ix += 1;
 	return _number<hex_t>();
      }
      [[fallthrough]];
    case '0':
      if (match('x')) {
 	if (hex_t::pred(peek())) {
 	  start_ix += 2;
 	  return _number<hex_t>();
 	}
      }
      [[fallthrough]];
    default:
      if (dec_t::pred(c)) {
 	return _number<dec_t>();
      } else if (alpha_p(c)) {
 	return _identifier();
      } else {
 	//error(pos.line, "Unexpected character.");
      }
      break;
    }
  }
 };
 }
--- a/lexer.hpp
+++ b/lexer.hpp
@ -0,0 +1 @@
--- a/main.cpp
+++ b/main.cpp
@ -0,0 +1,64 @@
 #include <iostream>
 #include <fstream>
 #include <string>
 #include "token.hpp"
 static bool had_error = false;
 static void report(int line, std::string where, std::string message)
 {
  std::cerr << "[line " << line << "] Error" << where << ": " << message;
  had_error = true;
 }
 void error(int line, std::string message)
 {
  report(line, "", message);
 }
 static void run(std::string source)
 {
  std::string_view buf {source};
  (void)buf;
 }
 static void run_prompt()
 {
  constexpr auto prompt = "> ";
  std::string line;
  std::cout << prompt << std::flush;
  while (std::getline(std::cin, line)) {
    run(line);
    std::cout << prompt << std::flush;
  }
 }
 static int run_file(char const * const filename)
 {
  std::ifstream is {filename, std::ios::binary | std::ios::ate};
  if (!is.is_open()) {
    std::cerr << "failed to open " << filename << std::endl;
    return -1;
  }
  const std::streampos size = is.tellg();
  std::string buf(size, '\0');
  is.seekg(0);
  if (!is.read(&buf[0], size)) {
    std::cerr << "read failed" << std::endl;
    return -1;
  }
  run(buf);
  return had_error;
 }
 int main(const int argc, char const * const argv[])
 {
  switch (argc) {
  case 1: run_prompt(); return had_error;
  case 2: return run_file(argv[1]);
  default:
    std::cerr << "Usage: " << argv[0] << " [filename]" << std::endl;
    return -1;
  }
 }
--- a/num.hpp
+++ b/num.hpp
@ -0,0 +1,5 @@
 #pragma once
 #include <cstdint>
 using num_t = int64_t;
--- a/token.hpp
+++ b/token.hpp
@ -0,0 +1,194 @@
 #pragma once
 #include <string_view>
 #include <ostream>
 #include <variant>
 #include "num.hpp"
 namespace dsp {
 struct object_t {
 };
 struct token_pos_t {
  int line;
  int col;
 };
 template <typename N>
 struct token_t {
  enum type_t {
    left_paren,
    right_paren,
    comma,
    dot,
    // operators
    plus,
    minus,
    star,
    slash,
    percent,
    tilde,
    ampersand,
    bar,
    carot,
    left_shift,
    right_shift,
    equal,
    // literals
    identifier,
    string,
    number,
    // keywords
    _alh,
    _all,
    _alu,
    _m0,
    _m1,
    _m2,
    _m3,
    _mc0,
    _mc1,
    _mc2,
    _mc3,
    _mul,
    _nop,
    _and,
    _or,
    _xor,
    _add,
    _sub,
    _ad2,
    _sr,
    _rr,
    _sl,
    _rl,
    _rl8,
    _clr,
    _mov,
    _mvi,
    _dma,
    _dmah,
    _jmp,
    _btm,
    _lps,
    _end,
    _endi,
    _equ,
    _org,
    _ends,
    eof,
  };
  using literal_t = std::variant<std::monostate, N>;
  const token_pos_t pos;
  const type_t type;
  const std::string_view lexeme;
  const literal_t literal;
  token_t() = delete;
  constexpr token_t(token_pos_t pos, type_t type, const std::string_view lexeme, N number)
    : pos(pos), type(type), lexeme(lexeme), literal(number)
  { }
  constexpr token_t(token_pos_t pos, type_t type, const std::string_view lexeme)
    : pos(pos), type(type), lexeme(lexeme), literal()
  { }
  friend std::ostream& operator<<(std::ostream& os, const enum token_t<N>::type_t type)
  {
    switch (type) {
    case left_paren   : return os << "LEFT_PAREN";
    case right_paren  : return os << "RIGHT_PAREN";
    case comma        : return os << "COMMA";
    case dot          : return os << "DOT";
      // operators
    case plus         : return os << "PLUS";
    case minus        : return os << "MINUS";
    case star         : return os << "STAR";
    case slash        : return os << "SLASH";
    case percent      : return os << "PERCENT";
    case tilde        : return os << "TILDE";
    case ampersand    : return os << "AMPERSAND";
    case bar          : return os << "BAR";
    case carot        : return os << "CAROT";
    case left_shift   : return os << "LEFT_SHIFT";
    case right_shift  : return os << "RIGHT_SHIFT";
    case equal        : return os << "EQUAL";
      // literals
    case identifier   : return os << "IDENTIFIER";
    case string       : return os << "STRING";
    case number       : return os << "NUMBER";
      // keywords
    case _alh         : return os << "ALH";
    case _all         : return os << "ALL";
    case _alu         : return os << "ALU";
    case _m0          : return os << "M0";
    case _m1          : return os << "M1";
    case _m2          : return os << "M2";
    case _m3          : return os << "M3";
    case _mc0         : return os << "MC0";
    case _mc1         : return os << "MC1";
    case _mc2         : return os << "MC2";
    case _mc3         : return os << "MC3";
    case _mul         : return os << "MUL";
    case _nop         : return os << "NOP";
    case _and         : return os << "AND";
    case _or          : return os << "OR";
    case _xor         : return os << "XOR";
    case _add         : return os << "ADD";
    case _sub         : return os << "SUB";
    case _ad2         : return os << "AD2";
    case _sr          : return os << "SR";
    case _rr          : return os << "RR";
    case _sl          : return os << "SL";
    case _rl          : return os << "RL";
    case _rl8         : return os << "RL8";
    case _clr         : return os << "CLR";
    case _mov         : return os << "MOV";
    case _mvi         : return os << "MVI";
    case _dma         : return os << "DMA";
    case _dmah        : return os << "DMAH";
    case _jmp         : return os << "JMP";
    case _btm         : return os << "BTM";
    case _lps         : return os << "LPS";
    case _end         : return os << "END";
    case _endi        : return os << "ENDI";
    case _equ         : return os << "EQU";
    case _org         : return os << "ORG";
    case _ends        : return os << "ENDS";
    case eof          : return os << "EOF";
    }
    __builtin_unreachable();
  }
  friend std::ostream& operator<<(std::ostream& os, const token_t& token)
  {
    os << token.type << ' ' << token.lexeme;
    if (auto* v = std::get_if<N>(&token.literal)) {
      os << '/' << *v;
    } else { // std::monostate
    }
    return os;
  }
 };
 }
 using token = dsp::token_t<num_t>;