This commit is contained in:
Zack Buhman 2023-08-14 16:52:56 +00:00
commit 29428c7a92
9 changed files with 1004 additions and 0 deletions

4
.gitignore vendored Normal file
View File

@ -0,0 +1,4 @@
main
*.o
*.gch
*.d

33
Makefile Normal file
View File

@ -0,0 +1,33 @@
CXXFLAGS = -Og -g -Wall -Wextra -Werror -Wfatal-errors -Wpedantic -std=c++20
LDFLAGS =
TARGET =
CXX = $(TARGET)g++
SRC = main.cpp
OBJ = $(patsubst %.cpp,%.o,$(SRC))
DEP = $(patsubst %.cpp,%.d,$(SRC))
all: main
-include $(DEP)
%.o: %.cpp
$(CXX) $(CXXFLAGS) -MMD -MF $(basename $<).d -c $< -o $@
main: $(OBJ)
$(CXX) $(LDFLAGS) $^ -o $@
clean:
rm -f *.o *.d *.gch
.SUFFIXES:
.INTERMEDIATE:
.SECONDARY:
.PHONY: all clean
%: RCS/%,v
%: RCS/%
%: %,v
%: s.%
%: SCCS/s.%

99
build_radix_tree.py Normal file
View File

@ -0,0 +1,99 @@
def build_radix_tree(ops: list[str]) -> dict:
root = dict()
for op in ops:
d = root
for i in range(len(op)):
if op[i] not in d:
d[op[i]] = (None,{})
if i == (len(op) - 1):
d[op[i]] = (op,d[op[i]][1])
else:
d = d[op[i]][1]
return root
def indent(i):
return " " * (2 * i)
def print_switch(d, level=0):
p = print
inden0 = indent(level+0)
inden1 = indent(level+1)
inden2 = indent(level+2)
p(inden0 + "switch (s[ix++]) {")
for key, (terminal, children) in d.items():
if key.upper() != key.lower():
p(inden0 + f"case '{key.upper()}': [[fallthrough]];")
p(inden0 + f"case '{key.lower()}':")
if terminal is not None:
p(inden1 + f"if (ix == s.length()) return {{ token::type_t::_{terminal} }};")
if children:
p(inden1 + "else {")
else:
if children:
p(inden1 + "if (ix < s.length()) {")
if children:
print_switch(children, level+2)
p(inden1 + "}")
p(inden1 + "break;")
p(inden0 + "}")
def print_keyword_func(root):
p = print
inden1 = indent(1)
p("#include <optional>")
p('#include "token.hpp"')
p()
p("namespace dsp {")
p()
p("struct keyword {")
p()
p("inline static constexpr std::optional<enum token::type_t>")
p("find(const std::string_view s)")
p("{")
p(inden1 + "if (s.length() == 0) { return {}; }")
p()
p(inden1 + "std::string_view::size_type ix = 0;")
p()
print_switch(root, level=1)
p(inden1 + "return {};")
p("}")
p()
p("};")
p()
p("}")
from pprint import pprint
d = build_radix_tree([
"alh",
"all",
"alu",
"m0", "m1", "m2", "m3",
"mc0", "mc1", "mc2", "mc3",
"mul",
"nop",
"and",
"or",
"xor",
"add",
"sub",
"ad2",
"sr",
"rr",
"sl",
"rl",
"rl8",
"clr",
"mov",
"mvi",
"dma",
"dmah",
"jmp",
"btm",
"lps",
"end",
"endi",
"equ",
"org",
"ends",
])
print_keyword_func(d)

386
keyword.hpp Normal file
View File

@ -0,0 +1,386 @@
#include <optional>
#include "token.hpp"
namespace dsp {
struct keyword {
inline static constexpr std::optional<enum token::type_t>
find(const std::string_view s)
{
if (s.length() == 0) { return {}; }
std::string_view::size_type ix = 0;
switch (s[ix++]) {
case 'A': [[fallthrough]];
case 'a':
if (ix < s.length()) {
switch (s[ix++]) {
case 'L': [[fallthrough]];
case 'l':
if (ix < s.length()) {
switch (s[ix++]) {
case 'H': [[fallthrough]];
case 'h':
if (ix == s.length()) return { token::type_t::_alh };
break;
case 'L': [[fallthrough]];
case 'l':
if (ix == s.length()) return { token::type_t::_all };
break;
case 'U': [[fallthrough]];
case 'u':
if (ix == s.length()) return { token::type_t::_alu };
break;
}
}
break;
case 'N': [[fallthrough]];
case 'n':
if (ix < s.length()) {
switch (s[ix++]) {
case 'D': [[fallthrough]];
case 'd':
if (ix == s.length()) return { token::type_t::_and };
break;
}
}
break;
case 'D': [[fallthrough]];
case 'd':
if (ix < s.length()) {
switch (s[ix++]) {
case 'D': [[fallthrough]];
case 'd':
if (ix == s.length()) return { token::type_t::_add };
break;
case '2':
if (ix == s.length()) return { token::type_t::_ad2 };
break;
}
}
break;
}
}
break;
case 'M': [[fallthrough]];
case 'm':
if (ix < s.length()) {
switch (s[ix++]) {
case '0':
if (ix == s.length()) return { token::type_t::_m0 };
break;
case '1':
if (ix == s.length()) return { token::type_t::_m1 };
break;
case '2':
if (ix == s.length()) return { token::type_t::_m2 };
break;
case '3':
if (ix == s.length()) return { token::type_t::_m3 };
break;
case 'C': [[fallthrough]];
case 'c':
if (ix < s.length()) {
switch (s[ix++]) {
case '0':
if (ix == s.length()) return { token::type_t::_mc0 };
break;
case '1':
if (ix == s.length()) return { token::type_t::_mc1 };
break;
case '2':
if (ix == s.length()) return { token::type_t::_mc2 };
break;
case '3':
if (ix == s.length()) return { token::type_t::_mc3 };
break;
}
}
break;
case 'U': [[fallthrough]];
case 'u':
if (ix < s.length()) {
switch (s[ix++]) {
case 'L': [[fallthrough]];
case 'l':
if (ix == s.length()) return { token::type_t::_mul };
break;
}
}
break;
case 'O': [[fallthrough]];
case 'o':
if (ix < s.length()) {
switch (s[ix++]) {
case 'V': [[fallthrough]];
case 'v':
if (ix == s.length()) return { token::type_t::_mov };
break;
}
}
break;
case 'V': [[fallthrough]];
case 'v':
if (ix < s.length()) {
switch (s[ix++]) {
case 'I': [[fallthrough]];
case 'i':
if (ix == s.length()) return { token::type_t::_mvi };
break;
}
}
break;
}
}
break;
case 'N': [[fallthrough]];
case 'n':
if (ix < s.length()) {
switch (s[ix++]) {
case 'O': [[fallthrough]];
case 'o':
if (ix < s.length()) {
switch (s[ix++]) {
case 'P': [[fallthrough]];
case 'p':
if (ix == s.length()) return { token::type_t::_nop };
break;
}
}
break;
}
}
break;
case 'O': [[fallthrough]];
case 'o':
if (ix < s.length()) {
switch (s[ix++]) {
case 'R': [[fallthrough]];
case 'r':
if (ix == s.length()) return { token::type_t::_or };
else {
switch (s[ix++]) {
case 'G': [[fallthrough]];
case 'g':
if (ix == s.length()) return { token::type_t::_org };
break;
}
}
break;
}
}
break;
case 'X': [[fallthrough]];
case 'x':
if (ix < s.length()) {
switch (s[ix++]) {
case 'O': [[fallthrough]];
case 'o':
if (ix < s.length()) {
switch (s[ix++]) {
case 'R': [[fallthrough]];
case 'r':
if (ix == s.length()) return { token::type_t::_xor };
break;
}
}
break;
}
}
break;
case 'S': [[fallthrough]];
case 's':
if (ix < s.length()) {
switch (s[ix++]) {
case 'U': [[fallthrough]];
case 'u':
if (ix < s.length()) {
switch (s[ix++]) {
case 'B': [[fallthrough]];
case 'b':
if (ix == s.length()) return { token::type_t::_sub };
break;
}
}
break;
case 'R': [[fallthrough]];
case 'r':
if (ix == s.length()) return { token::type_t::_sr };
break;
case 'L': [[fallthrough]];
case 'l':
if (ix == s.length()) return { token::type_t::_sl };
break;
}
}
break;
case 'R': [[fallthrough]];
case 'r':
if (ix < s.length()) {
switch (s[ix++]) {
case 'R': [[fallthrough]];
case 'r':
if (ix == s.length()) return { token::type_t::_rr };
break;
case 'L': [[fallthrough]];
case 'l':
if (ix == s.length()) return { token::type_t::_rl };
else {
switch (s[ix++]) {
case '8':
if (ix == s.length()) return { token::type_t::_rl8 };
break;
}
}
break;
}
}
break;
case 'C': [[fallthrough]];
case 'c':
if (ix < s.length()) {
switch (s[ix++]) {
case 'L': [[fallthrough]];
case 'l':
if (ix < s.length()) {
switch (s[ix++]) {
case 'R': [[fallthrough]];
case 'r':
if (ix == s.length()) return { token::type_t::_clr };
break;
}
}
break;
}
}
break;
case 'D': [[fallthrough]];
case 'd':
if (ix < s.length()) {
switch (s[ix++]) {
case 'M': [[fallthrough]];
case 'm':
if (ix < s.length()) {
switch (s[ix++]) {
case 'A': [[fallthrough]];
case 'a':
if (ix == s.length()) return { token::type_t::_dma };
else {
switch (s[ix++]) {
case 'H': [[fallthrough]];
case 'h':
if (ix == s.length()) return { token::type_t::_dmah };
break;
}
}
break;
}
}
break;
}
}
break;
case 'J': [[fallthrough]];
case 'j':
if (ix < s.length()) {
switch (s[ix++]) {
case 'M': [[fallthrough]];
case 'm':
if (ix < s.length()) {
switch (s[ix++]) {
case 'P': [[fallthrough]];
case 'p':
if (ix == s.length()) return { token::type_t::_jmp };
break;
}
}
break;
}
}
break;
case 'B': [[fallthrough]];
case 'b':
if (ix < s.length()) {
switch (s[ix++]) {
case 'T': [[fallthrough]];
case 't':
if (ix < s.length()) {
switch (s[ix++]) {
case 'M': [[fallthrough]];
case 'm':
if (ix == s.length()) return { token::type_t::_btm };
break;
}
}
break;
}
}
break;
case 'L': [[fallthrough]];
case 'l':
if (ix < s.length()) {
switch (s[ix++]) {
case 'P': [[fallthrough]];
case 'p':
if (ix < s.length()) {
switch (s[ix++]) {
case 'S': [[fallthrough]];
case 's':
if (ix == s.length()) return { token::type_t::_lps };
break;
}
}
break;
}
}
break;
case 'E': [[fallthrough]];
case 'e':
if (ix < s.length()) {
switch (s[ix++]) {
case 'N': [[fallthrough]];
case 'n':
if (ix < s.length()) {
switch (s[ix++]) {
case 'D': [[fallthrough]];
case 'd':
if (ix == s.length()) return { token::type_t::_end };
else {
switch (s[ix++]) {
case 'I': [[fallthrough]];
case 'i':
if (ix == s.length()) return { token::type_t::_endi };
break;
case 'S': [[fallthrough]];
case 's':
if (ix == s.length()) return { token::type_t::_ends };
break;
}
}
break;
}
}
break;
case 'Q': [[fallthrough]];
case 'q':
if (ix < s.length()) {
switch (s[ix++]) {
case 'U': [[fallthrough]];
case 'u':
if (ix == s.length()) return { token::type_t::_equ };
break;
}
}
break;
}
}
break;
}
return {};
}
};
}

218
lexer.cpp Normal file
View File

@ -0,0 +1,218 @@
#include <string_view>
#include <functional>
#include <cstdint>
#include <optional>
#include "token.hpp"
#include "num.hpp"
#include "lexer.hpp"
#include "keyword.hpp"
namespace dsp {
template <typename N>
constexpr static N parse_digit(const char c)
{
switch (c) {
default: [[fallthrough]];
case '0': return 0;
case '1': return 1;
case '2': return 2;
case '3': return 3;
case '4': return 4;
case '5': return 5;
case '6': return 6;
case '7': return 7;
case '8': return 8;
case '9': return 9;
case 'a': return 10;
case 'b': return 11;
case 'c': return 12;
case 'd': return 13;
case 'e': return 14;
case 'f': return 15;
case 'A': return 10;
case 'B': return 11;
case 'C': return 12;
case 'D': return 13;
case 'E': return 14;
case 'F': return 15;
}
}
template <typename N, int base>
constexpr static N parse_number(const std::string_view s)
{
N n = 0;
for (std::string_view::size_type ix = 0; ix < s.length(); ix++) {
n *= base;
n += parse_digit<N>(s[ix]);
}
return n;
}
struct dec_t {
constexpr static bool pred(const char c)
{
return c >= '0' && c <= '9';
}
template <typename N>
constexpr static token_t<N> parse(const std::string_view s)
{
return parse_number<N, 10>(s);
}
};
struct hex_t {
constexpr static bool pred(const char c)
{
return dec_t::pred(c)
|| (c >= 'a' && c <= 'f')
|| (c >= 'A' && c <= 'F');
}
template <typename N>
constexpr static token_t<N> parse(const std::string_view s)
{
return parse_number<N, 16>(s);
}
};
constexpr bool alpha_p(const char c)
{
return (c >= 'a' && c <= 'z')
|| (c >= 'A' && c <= 'Z');
}
constexpr bool alpha_numeric_p(const char c)
{
return alpha_p(c) || dec_t::pred(c) || (c == '_');
}
struct lexer_t {
const std::string_view source;
std::string_view::size_type start_ix;
std::string_view::size_type current_ix;
token_pos_t pos;
lexer_t() = delete;
constexpr lexer_t(const std::string_view source)
: source(source), start_ix(0), pos{ .line = 1, .col = 0}
{ }
bool at_end_p()
{
return current_ix >= source.length();
}
char peek()
{
if (at_end_p()) return '\0';
return source[current_ix];
}
bool match(const char expected)
{
if (at_end_p()) return false;
else if (source[current_ix] != expected) return false;
pos.col++;
current_ix++;
return true;
}
char advance()
{
pos.col++;
return source[current_ix++];
}
const std::string_view lexeme()
{
return source.substr(start_ix, current_ix);
}
template <typename T>
token _number()
{
while (T::pred(peek())) advance();
return {pos, token::number, lexeme(), T::parse(lexeme())};
}
token _identifier()
{
while (alpha_numeric_p(peek())) advance();
std::optional<enum token::type_t> keyword = keyword::find(lexeme());
if (keyword) return {pos, *keyword, lexeme()};
else return {pos, token::identifier, lexeme()};
}
token scan_token()
{
using enum token::type_t;
start_ix = current_ix;
const char c = advance();
switch (c) {
case '(': return {pos, left_paren, lexeme()};
case ')': return {pos, right_paren, lexeme()};
case ',': return {pos, comma, lexeme()};
case '.': return {pos, dot, lexeme()};
case '+': return {pos, plus, lexeme()};
case '-': return {pos, minus, lexeme()};
case '*': return {pos, star, lexeme()};
case '/': return {pos, slash, lexeme()};
case '%': return {pos, percent, lexeme()};
case '~': return {pos, tilde, lexeme()};
case '&': return {pos, ampersand, lexeme()};
case '|': return {pos, bar, lexeme()};
case '^': return {pos, carot, lexeme()};
case '<':
if (match('<')) return {pos, left_shift, lexeme()};
break;
case '>':
if (match('>')) return {pos, right_shift, lexeme()};
break;
case ';':
while (!at_end_p() && peek() != '\n') advance();
break;
case ' ':
case '\r':
case '\t':
break;
case '\n':
pos.line++;
pos.col = 0;
break;
case '$':
if (hex_t::pred(peek())) {
start_ix += 1;
return _number<hex_t>();
}
[[fallthrough]];
case '0':
if (match('x')) {
if (hex_t::pred(peek())) {
start_ix += 2;
return _number<hex_t>();
}
}
[[fallthrough]];
default:
if (dec_t::pred(c)) {
return _number<dec_t>();
} else if (alpha_p(c)) {
return _identifier();
} else {
//error(pos.line, "Unexpected character.");
}
break;
}
}
};
}

1
lexer.hpp Normal file
View File

@ -0,0 +1 @@

64
main.cpp Normal file
View File

@ -0,0 +1,64 @@
#include <iostream>
#include <fstream>
#include <string>
#include "token.hpp"
static bool had_error = false;
static void report(int line, std::string where, std::string message)
{
std::cerr << "[line " << line << "] Error" << where << ": " << message;
had_error = true;
}
void error(int line, std::string message)
{
report(line, "", message);
}
static void run(std::string source)
{
std::string_view buf {source};
(void)buf;
}
static void run_prompt()
{
constexpr auto prompt = "> ";
std::string line;
std::cout << prompt << std::flush;
while (std::getline(std::cin, line)) {
run(line);
std::cout << prompt << std::flush;
}
}
static int run_file(char const * const filename)
{
std::ifstream is {filename, std::ios::binary | std::ios::ate};
if (!is.is_open()) {
std::cerr << "failed to open " << filename << std::endl;
return -1;
}
const std::streampos size = is.tellg();
std::string buf(size, '\0');
is.seekg(0);
if (!is.read(&buf[0], size)) {
std::cerr << "read failed" << std::endl;
return -1;
}
run(buf);
return had_error;
}
int main(const int argc, char const * const argv[])
{
switch (argc) {
case 1: run_prompt(); return had_error;
case 2: return run_file(argv[1]);
default:
std::cerr << "Usage: " << argv[0] << " [filename]" << std::endl;
return -1;
}
}

5
num.hpp Normal file
View File

@ -0,0 +1,5 @@
#pragma once
#include <cstdint>
using num_t = int64_t;

194
token.hpp Normal file
View File

@ -0,0 +1,194 @@
#pragma once
#include <string_view>
#include <ostream>
#include <variant>
#include "num.hpp"
namespace dsp {
struct object_t {
};
struct token_pos_t {
int line;
int col;
};
template <typename N>
struct token_t {
enum type_t {
left_paren,
right_paren,
comma,
dot,
// operators
plus,
minus,
star,
slash,
percent,
tilde,
ampersand,
bar,
carot,
left_shift,
right_shift,
equal,
// literals
identifier,
string,
number,
// keywords
_alh,
_all,
_alu,
_m0,
_m1,
_m2,
_m3,
_mc0,
_mc1,
_mc2,
_mc3,
_mul,
_nop,
_and,
_or,
_xor,
_add,
_sub,
_ad2,
_sr,
_rr,
_sl,
_rl,
_rl8,
_clr,
_mov,
_mvi,
_dma,
_dmah,
_jmp,
_btm,
_lps,
_end,
_endi,
_equ,
_org,
_ends,
eof,
};
using literal_t = std::variant<std::monostate, N>;
const token_pos_t pos;
const type_t type;
const std::string_view lexeme;
const literal_t literal;
token_t() = delete;
constexpr token_t(token_pos_t pos, type_t type, const std::string_view lexeme, N number)
: pos(pos), type(type), lexeme(lexeme), literal(number)
{ }
constexpr token_t(token_pos_t pos, type_t type, const std::string_view lexeme)
: pos(pos), type(type), lexeme(lexeme), literal()
{ }
friend std::ostream& operator<<(std::ostream& os, const enum token_t<N>::type_t type)
{
switch (type) {
case left_paren : return os << "LEFT_PAREN";
case right_paren : return os << "RIGHT_PAREN";
case comma : return os << "COMMA";
case dot : return os << "DOT";
// operators
case plus : return os << "PLUS";
case minus : return os << "MINUS";
case star : return os << "STAR";
case slash : return os << "SLASH";
case percent : return os << "PERCENT";
case tilde : return os << "TILDE";
case ampersand : return os << "AMPERSAND";
case bar : return os << "BAR";
case carot : return os << "CAROT";
case left_shift : return os << "LEFT_SHIFT";
case right_shift : return os << "RIGHT_SHIFT";
case equal : return os << "EQUAL";
// literals
case identifier : return os << "IDENTIFIER";
case string : return os << "STRING";
case number : return os << "NUMBER";
// keywords
case _alh : return os << "ALH";
case _all : return os << "ALL";
case _alu : return os << "ALU";
case _m0 : return os << "M0";
case _m1 : return os << "M1";
case _m2 : return os << "M2";
case _m3 : return os << "M3";
case _mc0 : return os << "MC0";
case _mc1 : return os << "MC1";
case _mc2 : return os << "MC2";
case _mc3 : return os << "MC3";
case _mul : return os << "MUL";
case _nop : return os << "NOP";
case _and : return os << "AND";
case _or : return os << "OR";
case _xor : return os << "XOR";
case _add : return os << "ADD";
case _sub : return os << "SUB";
case _ad2 : return os << "AD2";
case _sr : return os << "SR";
case _rr : return os << "RR";
case _sl : return os << "SL";
case _rl : return os << "RL";
case _rl8 : return os << "RL8";
case _clr : return os << "CLR";
case _mov : return os << "MOV";
case _mvi : return os << "MVI";
case _dma : return os << "DMA";
case _dmah : return os << "DMAH";
case _jmp : return os << "JMP";
case _btm : return os << "BTM";
case _lps : return os << "LPS";
case _end : return os << "END";
case _endi : return os << "ENDI";
case _equ : return os << "EQU";
case _org : return os << "ORG";
case _ends : return os << "ENDS";
case eof : return os << "EOF";
}
__builtin_unreachable();
}
friend std::ostream& operator<<(std::ostream& os, const token_t& token)
{
os << token.type << ' ' << token.lexeme;
if (auto* v = std::get_if<N>(&token.literal)) {
os << '/' << *v;
} else { // std::monostate
}
return os;
}
};
}
using token = dsp::token_t<num_t>;