From cc7345ec33d1481a977b543a3c3f34044b62cce5 Mon Sep 17 00:00:00 2001 From: Zack Buhman Date: Wed, 16 Aug 2023 19:52:02 +0000 Subject: [PATCH] lexer: loop if the current character does not produce a token --- lexer.cpp | 122 ++++++++++++++++++++++++++++------------------------- parser.cpp | 11 +++++ token.hpp | 2 + 3 files changed, 77 insertions(+), 58 deletions(-) diff --git a/lexer.cpp b/lexer.cpp index e3e87b1..8deb0d3 100644 --- a/lexer.cpp +++ b/lexer.cpp @@ -143,68 +143,74 @@ std::optional lexer_t::scan_token() { using enum token_t::type_t; - if (at_end_p()) - return {{pos, eof, ""}}; + while (true) { + if (at_end_p()) + return {{pos, eof, ""}}; - start_ix = current_ix; + start_ix = current_ix; - const char c = advance(); - switch (c) { - case '(': return {{pos, left_paren, lexeme()}}; - case ')': return {{pos, right_paren, lexeme()}}; - case ',': return {{pos, comma, lexeme()}}; - case '.': return {{pos, dot, lexeme()}}; - case '+': return {{pos, plus, lexeme()}}; - case '-': return {{pos, minus, lexeme()}}; - case '*': return {{pos, star, lexeme()}}; - case '/': return {{pos, slash, lexeme()}}; - case '%': return {{pos, percent, lexeme()}}; - case '~': return {{pos, tilde, lexeme()}}; - case '&': return {{pos, ampersand, lexeme()}}; - case '|': return {{pos, bar, lexeme()}}; - case '^': return {{pos, carot, lexeme()}}; - case '=': return {{pos, equal, lexeme()}}; - case '<': - if (match('<')) return {{pos, left_shift, lexeme()}}; - break; - case '>': - if (match('>')) return {{pos, right_shift, lexeme()}}; - break; - case ';': - while (!at_end_p() && peek() != '\n') advance(); - break; - case ' ': - case '\r': - case '\t': - break; - case '\n': - pos.line++; - pos.col = 0; - break; - case '$': - if (hex_t::pred(peek())) { - start_ix += 1; - return {_number()}; - } - [[fallthrough]]; - case '0': - if (match('x')) { - if (hex_t::pred(peek())) { - start_ix += 2; - return {_number()}; + const char c = advance(); + switch (c) { + case '(': return {{pos, left_paren, lexeme()}}; + case ')': return {{pos, right_paren, lexeme()}}; + case ',': return {{pos, comma, lexeme()}}; + case '.': return {{pos, dot, lexeme()}}; + case '+': return {{pos, plus, lexeme()}}; + case '-': return {{pos, minus, lexeme()}}; + case '*': return {{pos, star, lexeme()}}; + case '/': return {{pos, slash, lexeme()}}; + case '%': return {{pos, percent, lexeme()}}; + case '~': return {{pos, tilde, lexeme()}}; + case '&': return {{pos, ampersand, lexeme()}}; + case '|': return {{pos, bar, lexeme()}}; + case '^': return {{pos, carot, lexeme()}}; + case '=': return {{pos, equal, lexeme()}}; + case '<': + if (match('<')) return {{pos, left_shift, lexeme()}}; + break; + case '>': + if (match('>')) return {{pos, right_shift, lexeme()}}; + break; + case ';': + while (!at_end_p() && peek() != '\n') advance(); + break; + case ' ': + case '\r': + case '\t': + break; + case '\n': + { + token_pos_t tmp = pos; + pos.line++; + pos.col = 0; + return {{tmp, eol, lexeme()}}; } + break; + case '$': + if (hex_t::pred(peek())) { + start_ix += 1; + return {_number()}; + } + [[fallthrough]]; + case '0': + if (match('x')) { + if (hex_t::pred(peek())) { + start_ix += 2; + return {_number()}; + } + } + [[fallthrough]]; + default: + if (dec_t::pred(c)) { + return {_number()}; + } else if (alpha_p(c)) { + return {_identifier()}; + } else { + error(pos.line, pos.col - 1, "Unexpected character."); + return {}; + } + break; } - [[fallthrough]]; - default: - if (dec_t::pred(c)) { - return {_number()}; - } else if (alpha_p(c)) { - return {_identifier()}; - } else { - error(pos.line, pos.col - 1, "Unexpected character."); - return {}; - } - break; } __builtin_unreachable(); } diff --git a/parser.cpp b/parser.cpp index 8656f71..e117b7d 100644 --- a/parser.cpp +++ b/parser.cpp @@ -159,4 +159,15 @@ expr_t * parser_t::primary() throw error(peek(), "expected expression"); } +/* +void parser_t::synchronize() +{ + advance(); + while (!at_end_p()) { + if (previous().type == eol) return; + advance(); + } +} +*/ + } diff --git a/token.hpp b/token.hpp index 3e874d1..6a7e184 100644 --- a/token.hpp +++ b/token.hpp @@ -85,6 +85,7 @@ struct token_t { _ends, eof, + eol, }; using literal_t = std::variant; @@ -172,6 +173,7 @@ struct token_t { case _ends : return os << "ENDS"; case eof : return os << "EOF"; + case eol : return os << "EOL"; } __builtin_unreachable(); }