From b6d4ae5e8ea5618562ac856a75b6e7d654bb2920 Mon Sep 17 00:00:00 2001 From: Zack Buhman Date: Tue, 15 Aug 2023 20:18:35 -0700 Subject: [PATCH] parser: initial expression parser --- error.hpp | 12 ++++ expr.hpp | 6 +- parser.cpp | 160 ++++++++++++++++++++++++++++++++++++++++++++++++++++- parser.hpp | 50 +++++++++++++++++ 4 files changed, 224 insertions(+), 4 deletions(-) create mode 100644 parser.hpp diff --git a/error.hpp b/error.hpp index 976f699..3777e72 100644 --- a/error.hpp +++ b/error.hpp @@ -1,6 +1,7 @@ #pragma once #include +#include namespace dsp { @@ -21,4 +22,15 @@ static inline void error(const int line, const int col, std::string message) report(line, col, "", message); } +static inline void error(const token_t& token, const std::string message) +{ + using enum token_t::type_t; + + if (token.type == eof) { + report(token.pos.line, token.pos.col, " at end", message); + } else { + report(token.pos.line, token.pos.col, " at '" + std::string(token.lexeme) + "'", message); + } +} + } diff --git a/expr.hpp b/expr.hpp index bb179c6..d425858 100644 --- a/expr.hpp +++ b/expr.hpp @@ -52,11 +52,11 @@ struct literal_t : expr_accept_t struct unary_t : expr_accept_t { - unary_t(token_t oper, expr_t * expr) - : oper(oper), expr(expr) {} + unary_t(token_t oper, expr_t * right) + : oper(oper), right(right) {} const token_t oper; - const expr_t * expr; + const expr_t * right; }; } diff --git a/parser.cpp b/parser.cpp index 41f57dd..8656f71 100644 --- a/parser.cpp +++ b/parser.cpp @@ -1,4 +1,162 @@ -struct parser_t +/* +expression → term ; +term → factor ( ( "-" | "+" ) factor )* ; +factor → unary ( ( "/" | "*" | "%" ) unary )* ; +unary → ( "~" | "+" | "-" ) unary + | shift ; +shift → andl ( ( "<<" | ">>" ) andl )* +andl → orl ( "&" orl )* +orl → andl ( ( "|" | "^" ) andl )* +primary → NUMBER + | "(" expression ")" ; +*/ + +#include + +#include "parser.hpp" +#include "num.hpp" +#include "error.hpp" + +namespace dsp { + +using enum token_t::type_t; + +bool parser_t::at_end_p() { + return peek().type == eof; +} + +parse_error_t parser_t::error(const token_t& token, const std::string message) +{ + dsp::error(token, message); + return parse_error_t(message); +} + +token_t& parser_t::previous() +{ + return tokens[current_ix-1]; +} + +token_t& parser_t::peek() +{ + return tokens[current_ix]; +} + +token_t& parser_t::advance() +{ + if (!at_end_p()) current_ix++; + return previous(); +} + +bool parser_t::check(enum token_t::type_t token_type) +{ + if (at_end_p()) return false; + return peek().type == token_type; +} + +bool parser_t::match(enum token_t::type_t token_type) +{ + if (check(token_type)) { + advance(); + return true; + } + return false; +} + +template +bool parser_t::match(enum token_t::type_t token_type, Targs... args) +{ + return match(token_type) || match(args...); +} + +token_t parser_t::consume(enum token_t::type_t token_type, const std::string error_message) +{ + if (check(token_type)) return advance(); + + throw error(peek(), error_message); +} + +expr_t * parser_t::expression() +{ + return term(); +} + +expr_t * parser_t::term() +{ + expr_t * left = factor(); + while (match(minus, plus)) { + token_t oper = previous(); + expr_t * right = factor(); + left = new binary_t(left, oper, right); + } + return left; +} + +expr_t * parser_t::factor() +{ + expr_t * left = unary(); + while (match(slash, star, percent)) { + token_t oper = previous(); + expr_t * right = unary(); + left = new binary_t(left, oper, right); + } + return left; +} + +expr_t * parser_t::unary() +{ + if (match(tilde, plus, minus)) { + token_t oper = previous(); + expr_t * right = unary(); + return new unary_t(oper, right); + } + return shift(); +} + +expr_t * parser_t::shift() +{ + expr_t * left = andl(); + while (match(left_shift, right_shift)) { + token_t oper = previous(); + expr_t * right = andl(); + left = new binary_t(left, oper, right); + } + return left; +} + +expr_t * parser_t::andl() +{ + expr_t * left = orl(); + while (match(ampersand)) { + token_t oper = previous(); + expr_t * right = orl(); + left = new binary_t(left, oper, right); + } + return left; +} + +expr_t * parser_t::orl() +{ + expr_t * left = primary(); + while (match(bar, carot)) { + token_t oper = previous(); + expr_t * right = primary(); + left = new binary_t(left, oper, right); + } + return left; +} + +expr_t * parser_t::primary() +{ + if (match(number)) return new literal_t(std::get(previous().literal)); + + if (match(left_paren)) { + expr_t * expr = expression(); + consume(right_paren, "expected ')' after expression."); + return new grouping_t(expr); + } + + throw error(peek(), "expected expression"); +} } diff --git a/parser.hpp b/parser.hpp new file mode 100644 index 0000000..efe501e --- /dev/null +++ b/parser.hpp @@ -0,0 +1,50 @@ +#include +#include +#include + +#include "token.hpp" +#include "expr.hpp" + +namespace dsp { + +struct parse_error_t : std::runtime_error +{ + parse_error_t(const std::string& msg) + : std::runtime_error(msg) + { } +}; + +struct parser_t +{ + int current_ix = 0; + std::vector tokens; + + parser_t(const std::vector& tokens) + : tokens(tokens) + { } + + static parse_error_t error(const token_t& token, const std::string message); + + bool at_end_p(); + + token_t& previous(); + token_t& peek(); + token_t& advance(); + bool check(enum token_t::type_t token_type); + bool match(enum token_t::type_t token_type); + template + bool match(enum token_t::type_t token_type, Targs... args); + token_t consume(enum token_t::type_t token_type, const std::string error_message); + + expr_t * expression(); + expr_t * term(); + expr_t * factor(); + expr_t * unary(); + expr_t * shift(); + expr_t * andl(); + expr_t * orl(); + expr_t * primary(); + +}; + +}