diff --git a/Makefile b/Makefile index fd006dd..5939e7d 100644 --- a/Makefile +++ b/Makefile @@ -6,6 +6,7 @@ CXX = $(TARGET)clang++ SRC = main.cpp SRC += lexer.cpp +SRC += ast.cpp OBJ = $(patsubst %.cpp,%.o,$(SRC)) DEP = $(patsubst %.cpp,%.d,$(SRC)) diff --git a/ast.cpp b/ast.cpp new file mode 100644 index 0000000..3002657 --- /dev/null +++ b/ast.cpp @@ -0,0 +1,31 @@ +#include "ast.hpp" + +namespace dsp { + +void ast_printer_t::parenthesize(const std::string_view s, const expr_t * a) const +{ + os << '(' << s << ' '; + a->accept(this); + os << ')'; +} + +void ast_printer_t::parenthesize(const std::string_view s, const expr_t * a, const expr_t * b) const +{ + os << '(' << s << ' '; + a->accept(this); + os << ' '; + b->accept(this); + os << ')'; +} + +void ast_printer_t::visit(const literal_t * literal) const +{ + os << std::to_string(literal->value); +} + +void ast_printer_t::visit(const assign_t * expr) const +{ + parenthesize((expr->name).lexeme, &(expr->value)); +} + +} diff --git a/ast.hpp b/ast.hpp new file mode 100644 index 0000000..21e2487 --- /dev/null +++ b/ast.hpp @@ -0,0 +1,26 @@ +#pragma once + +#include +#include + +#include "visitor.hpp" +#include "expr.hpp" +#include "num.hpp" + +namespace dsp { + +struct ast_printer_t : visitor_t +{ + ast_printer_t(std::ostream& os) + : os(os) {} + + std::ostream& os; + + void visit(const assign_t * expr) const; + void visit(const literal_t * literal) const; + + void parenthesize(const std::string_view s, const expr_t * a) const; + void parenthesize(const std::string_view s, const expr_t * a, const expr_t * b) const; +}; + +} diff --git a/build_radix_tree.py b/build_radix_tree.py index 3efc81e..089e4e8 100644 --- a/build_radix_tree.py +++ b/build_radix_tree.py @@ -25,7 +25,7 @@ def print_switch(d, level=0): p(inden0 + f"case '{key.upper()}': [[fallthrough]];") p(inden0 + f"case '{key.lower()}':") if terminal is not None: - p(inden1 + f"if (ix == s.length()) return {{ token::type_t::_{terminal} }};") + p(inden1 + f"if (ix == s.length()) return {{ token_t::type_t::_{terminal} }};") if children: p(inden1 + "else {") else: @@ -47,7 +47,7 @@ def print_keyword_func(root): p() p("struct keyword {") p() - p("inline static constexpr std::optional") + p("inline static constexpr std::optional") p("find(const std::string_view s)") p("{") p(inden1 + "if (s.length() == 0) { return {}; }") diff --git a/expr.hpp b/expr.hpp new file mode 100644 index 0000000..86e38f2 --- /dev/null +++ b/expr.hpp @@ -0,0 +1,44 @@ +#include + +#include "token.hpp" +#include "visitor.hpp" +#include "num.hpp" + +namespace dsp { + +struct expr_t +{ + virtual void accept(visitor_t const * visitor) const = 0; + virtual std::string accept(visitor_t const * visitor) const = 0; +}; + +template +struct expr_accept_t : expr_t { + virtual void accept(visitor_t const * visitor) const { + return visitor->visit(static_cast(this)); + } + + virtual std::string accept(visitor_t const * visitor) const + { + return visitor->visit(static_cast(this)); + } +}; + +struct assign_t : expr_accept_t +{ + assign_t(token_t& name, expr_t& value) + : name(name), value(value) {} + + const token_t name; + const expr_t& value; +}; + +struct literal_t : expr_accept_t +{ + literal_t(num_t value) + : value(value) {} + + const num_t value; +}; + +} diff --git a/keyword.hpp b/keyword.hpp index 90c16ee..0d057e5 100644 --- a/keyword.hpp +++ b/keyword.hpp @@ -5,7 +5,7 @@ namespace dsp { struct keyword { -inline static constexpr std::optional +inline static constexpr std::optional find(const std::string_view s) { if (s.length() == 0) { return {}; } @@ -23,15 +23,15 @@ find(const std::string_view s) switch (s[ix++]) { case 'H': [[fallthrough]]; case 'h': - if (ix == s.length()) return { token::type_t::_alh }; + if (ix == s.length()) return { token_t::type_t::_alh }; break; case 'L': [[fallthrough]]; case 'l': - if (ix == s.length()) return { token::type_t::_all }; + if (ix == s.length()) return { token_t::type_t::_all }; break; case 'U': [[fallthrough]]; case 'u': - if (ix == s.length()) return { token::type_t::_alu }; + if (ix == s.length()) return { token_t::type_t::_alu }; break; } } @@ -42,7 +42,7 @@ find(const std::string_view s) switch (s[ix++]) { case 'D': [[fallthrough]]; case 'd': - if (ix == s.length()) return { token::type_t::_and }; + if (ix == s.length()) return { token_t::type_t::_and }; break; } } @@ -53,10 +53,10 @@ find(const std::string_view s) switch (s[ix++]) { case 'D': [[fallthrough]]; case 'd': - if (ix == s.length()) return { token::type_t::_add }; + if (ix == s.length()) return { token_t::type_t::_add }; break; case '2': - if (ix == s.length()) return { token::type_t::_ad2 }; + if (ix == s.length()) return { token_t::type_t::_ad2 }; break; } } @@ -69,32 +69,32 @@ find(const std::string_view s) if (ix < s.length()) { switch (s[ix++]) { case '0': - if (ix == s.length()) return { token::type_t::_m0 }; + if (ix == s.length()) return { token_t::type_t::_m0 }; break; case '1': - if (ix == s.length()) return { token::type_t::_m1 }; + if (ix == s.length()) return { token_t::type_t::_m1 }; break; case '2': - if (ix == s.length()) return { token::type_t::_m2 }; + if (ix == s.length()) return { token_t::type_t::_m2 }; break; case '3': - if (ix == s.length()) return { token::type_t::_m3 }; + if (ix == s.length()) return { token_t::type_t::_m3 }; break; case 'C': [[fallthrough]]; case 'c': if (ix < s.length()) { switch (s[ix++]) { case '0': - if (ix == s.length()) return { token::type_t::_mc0 }; + if (ix == s.length()) return { token_t::type_t::_mc0 }; break; case '1': - if (ix == s.length()) return { token::type_t::_mc1 }; + if (ix == s.length()) return { token_t::type_t::_mc1 }; break; case '2': - if (ix == s.length()) return { token::type_t::_mc2 }; + if (ix == s.length()) return { token_t::type_t::_mc2 }; break; case '3': - if (ix == s.length()) return { token::type_t::_mc3 }; + if (ix == s.length()) return { token_t::type_t::_mc3 }; break; } } @@ -105,7 +105,7 @@ find(const std::string_view s) switch (s[ix++]) { case 'L': [[fallthrough]]; case 'l': - if (ix == s.length()) return { token::type_t::_mul }; + if (ix == s.length()) return { token_t::type_t::_mul }; break; } } @@ -116,7 +116,7 @@ find(const std::string_view s) switch (s[ix++]) { case 'V': [[fallthrough]]; case 'v': - if (ix == s.length()) return { token::type_t::_mov }; + if (ix == s.length()) return { token_t::type_t::_mov }; break; } } @@ -127,7 +127,7 @@ find(const std::string_view s) switch (s[ix++]) { case 'I': [[fallthrough]]; case 'i': - if (ix == s.length()) return { token::type_t::_mvi }; + if (ix == s.length()) return { token_t::type_t::_mvi }; break; } } @@ -145,7 +145,7 @@ find(const std::string_view s) switch (s[ix++]) { case 'P': [[fallthrough]]; case 'p': - if (ix == s.length()) return { token::type_t::_nop }; + if (ix == s.length()) return { token_t::type_t::_nop }; break; } } @@ -159,12 +159,12 @@ find(const std::string_view s) switch (s[ix++]) { case 'R': [[fallthrough]]; case 'r': - if (ix == s.length()) return { token::type_t::_or }; + if (ix == s.length()) return { token_t::type_t::_or }; else { switch (s[ix++]) { case 'G': [[fallthrough]]; case 'g': - if (ix == s.length()) return { token::type_t::_org }; + if (ix == s.length()) return { token_t::type_t::_org }; break; } } @@ -182,7 +182,7 @@ find(const std::string_view s) switch (s[ix++]) { case 'R': [[fallthrough]]; case 'r': - if (ix == s.length()) return { token::type_t::_xor }; + if (ix == s.length()) return { token_t::type_t::_xor }; break; } } @@ -200,18 +200,18 @@ find(const std::string_view s) switch (s[ix++]) { case 'B': [[fallthrough]]; case 'b': - if (ix == s.length()) return { token::type_t::_sub }; + if (ix == s.length()) return { token_t::type_t::_sub }; break; } } break; case 'R': [[fallthrough]]; case 'r': - if (ix == s.length()) return { token::type_t::_sr }; + if (ix == s.length()) return { token_t::type_t::_sr }; break; case 'L': [[fallthrough]]; case 'l': - if (ix == s.length()) return { token::type_t::_sl }; + if (ix == s.length()) return { token_t::type_t::_sl }; break; } } @@ -222,15 +222,15 @@ find(const std::string_view s) switch (s[ix++]) { case 'R': [[fallthrough]]; case 'r': - if (ix == s.length()) return { token::type_t::_rr }; + if (ix == s.length()) return { token_t::type_t::_rr }; break; case 'L': [[fallthrough]]; case 'l': - if (ix == s.length()) return { token::type_t::_rl }; + if (ix == s.length()) return { token_t::type_t::_rl }; else { switch (s[ix++]) { case '8': - if (ix == s.length()) return { token::type_t::_rl8 }; + if (ix == s.length()) return { token_t::type_t::_rl8 }; break; } } @@ -248,7 +248,7 @@ find(const std::string_view s) switch (s[ix++]) { case 'R': [[fallthrough]]; case 'r': - if (ix == s.length()) return { token::type_t::_clr }; + if (ix == s.length()) return { token_t::type_t::_clr }; break; } } @@ -266,12 +266,12 @@ find(const std::string_view s) switch (s[ix++]) { case 'A': [[fallthrough]]; case 'a': - if (ix == s.length()) return { token::type_t::_dma }; + if (ix == s.length()) return { token_t::type_t::_dma }; else { switch (s[ix++]) { case 'H': [[fallthrough]]; case 'h': - if (ix == s.length()) return { token::type_t::_dmah }; + if (ix == s.length()) return { token_t::type_t::_dmah }; break; } } @@ -292,7 +292,7 @@ find(const std::string_view s) switch (s[ix++]) { case 'P': [[fallthrough]]; case 'p': - if (ix == s.length()) return { token::type_t::_jmp }; + if (ix == s.length()) return { token_t::type_t::_jmp }; break; } } @@ -310,7 +310,7 @@ find(const std::string_view s) switch (s[ix++]) { case 'M': [[fallthrough]]; case 'm': - if (ix == s.length()) return { token::type_t::_btm }; + if (ix == s.length()) return { token_t::type_t::_btm }; break; } } @@ -328,7 +328,7 @@ find(const std::string_view s) switch (s[ix++]) { case 'S': [[fallthrough]]; case 's': - if (ix == s.length()) return { token::type_t::_lps }; + if (ix == s.length()) return { token_t::type_t::_lps }; break; } } @@ -346,16 +346,16 @@ find(const std::string_view s) switch (s[ix++]) { case 'D': [[fallthrough]]; case 'd': - if (ix == s.length()) return { token::type_t::_end }; + if (ix == s.length()) return { token_t::type_t::_end }; else { switch (s[ix++]) { case 'I': [[fallthrough]]; case 'i': - if (ix == s.length()) return { token::type_t::_endi }; + if (ix == s.length()) return { token_t::type_t::_endi }; break; case 'S': [[fallthrough]]; case 's': - if (ix == s.length()) return { token::type_t::_ends }; + if (ix == s.length()) return { token_t::type_t::_ends }; break; } } @@ -369,7 +369,7 @@ find(const std::string_view s) switch (s[ix++]) { case 'U': [[fallthrough]]; case 'u': - if (ix == s.length()) return { token::type_t::_equ }; + if (ix == s.length()) return { token_t::type_t::_equ }; break; } } diff --git a/lexer.cpp b/lexer.cpp index c4b3ca4..e3e87b1 100644 --- a/lexer.cpp +++ b/lexer.cpp @@ -124,24 +124,24 @@ const std::string_view lexer_t::lexeme() } template -token lexer_t::_number() +token_t lexer_t::_number() { while (T::pred(peek())) advance(); - return {pos, token::number, lexeme(), T::template parse(lexeme())}; + return {pos, token_t::number, lexeme(), T::template parse(lexeme())}; } -token lexer_t::_identifier() +token_t lexer_t::_identifier() { while (alpha_numeric_p(peek())) advance(); - std::optional keyword = keyword::find(lexeme()); + std::optional keyword = keyword::find(lexeme()); if (keyword) return {pos, *keyword, lexeme()}; - else return {pos, token::identifier, lexeme()}; + else return {pos, token_t::identifier, lexeme()}; } -std::optional lexer_t::scan_token() +std::optional lexer_t::scan_token() { - using enum token::type_t; + using enum token_t::type_t; if (at_end_p()) return {{pos, eof, ""}}; @@ -163,6 +163,7 @@ std::optional lexer_t::scan_token() case '&': return {{pos, ampersand, lexeme()}}; case '|': return {{pos, bar, lexeme()}}; case '^': return {{pos, carot, lexeme()}}; + case '=': return {{pos, equal, lexeme()}}; case '<': if (match('<')) return {{pos, left_shift, lexeme()}}; break; diff --git a/lexer.hpp b/lexer.hpp index fd5ec61..0665962 100644 --- a/lexer.hpp +++ b/lexer.hpp @@ -19,7 +19,7 @@ struct lexer_t { : source(source), start_ix(0), current_ix(0), pos{ .line = 1, .col = 0 } { } - std::optional scan_token(); + std::optional scan_token(); private: bool at_end_p(); @@ -33,9 +33,9 @@ private: const std::string_view lexeme(); template - token _number(); + token_t _number(); - token _identifier(); + token_t _identifier(); }; } diff --git a/main.cpp b/main.cpp index 58f4785..2472531 100644 --- a/main.cpp +++ b/main.cpp @@ -4,6 +4,7 @@ #include "lexer.hpp" #include "token.hpp" +#include "ast.hpp" namespace dsp { @@ -11,15 +12,25 @@ bool had_error = false; } +static void print() +{ + dsp::literal_t l(56); + std::string_view s("asdf"); + dsp::token_t t({0, 0}, dsp::token_t::identifier, s); + dsp::assign_t a(t, l); + dsp::ast_printer_t p(std::cout); + p.visit(&a); +} + static void run(std::string source) { using namespace dsp; std::string_view buf {source}; lexer_t lexer {buf}; - while (std::optional token_o = lexer.scan_token()) { + while (std::optional token_o = lexer.scan_token()) { std::cout << *token_o << std::endl; - if (token_o->type == token::type_t::eof) { + if (token_o->type == token_t::type_t::eof) { break; } } @@ -56,6 +67,7 @@ static int run_file(char const * const filename) int main(const int argc, char const * const argv[]) { + print(); switch (argc) { case 1: run_prompt(); return dsp::had_error; case 2: return run_file(argv[1]); diff --git a/token.hpp b/token.hpp index 82cc27f..3e874d1 100644 --- a/token.hpp +++ b/token.hpp @@ -16,8 +16,9 @@ struct token_pos_t { int col; }; -template struct token_t { + using num_type = num_t; + enum type_t { left_paren, right_paren, @@ -86,7 +87,7 @@ struct token_t { eof, }; - using literal_t = std::variant; + using literal_t = std::variant; const token_pos_t pos; const type_t type; @@ -95,7 +96,7 @@ struct token_t { token_t() = delete; - constexpr token_t(token_pos_t pos, type_t type, const std::string_view lexeme, N number) + constexpr token_t(token_pos_t pos, type_t type, const std::string_view lexeme, num_type number) : pos(pos), type(type), lexeme(lexeme), literal(number) { } @@ -103,7 +104,7 @@ struct token_t { : pos(pos), type(type), lexeme(lexeme), literal() { } - friend std::ostream& operator<<(std::ostream& os, const enum token_t::type_t type) + friend std::ostream& operator<<(std::ostream& os, const enum token_t::type_t type) { switch (type) { case left_paren : return os << "LEFT_PAREN"; @@ -179,7 +180,7 @@ struct token_t { { os << token.type << ' ' << token.lexeme; - if (auto* v = std::get_if(&token.literal)) { + if (auto* v = std::get_if(&token.literal)) { os << '/' << *v; } else { // std::monostate } @@ -189,6 +190,4 @@ struct token_t { }; -using token = dsp::token_t; - } diff --git a/visitable.hpp b/visitable.hpp new file mode 100644 index 0000000..2da8c02 --- /dev/null +++ b/visitable.hpp @@ -0,0 +1,8 @@ +#pragma once + +namespace dsp { + +struct assign_t; +struct literal_t; + +} diff --git a/visitor.hpp b/visitor.hpp new file mode 100644 index 0000000..55b2175 --- /dev/null +++ b/visitor.hpp @@ -0,0 +1,15 @@ +#pragma once + +#include "visitable.hpp" +#include "num.hpp" + +namespace dsp { + +template +struct visitor_t +{ + virtual T visit(const assign_t * expr) const = 0; + virtual T visit(const literal_t * expr) const = 0; +}; + +}