lexer: loop if the current character does not produce a token

This commit is contained in:
Zack Buhman 2023-08-16 19:52:02 +00:00
parent b6d4ae5e8e
commit cc7345ec33
3 changed files with 77 additions and 58 deletions

122
lexer.cpp
View File

@ -143,68 +143,74 @@ std::optional<token_t> lexer_t::scan_token()
{ {
using enum token_t::type_t; using enum token_t::type_t;
if (at_end_p()) while (true) {
return {{pos, eof, ""}}; if (at_end_p())
return {{pos, eof, ""}};
start_ix = current_ix; start_ix = current_ix;
const char c = advance(); const char c = advance();
switch (c) { switch (c) {
case '(': return {{pos, left_paren, lexeme()}}; case '(': return {{pos, left_paren, lexeme()}};
case ')': return {{pos, right_paren, lexeme()}}; case ')': return {{pos, right_paren, lexeme()}};
case ',': return {{pos, comma, lexeme()}}; case ',': return {{pos, comma, lexeme()}};
case '.': return {{pos, dot, lexeme()}}; case '.': return {{pos, dot, lexeme()}};
case '+': return {{pos, plus, lexeme()}}; case '+': return {{pos, plus, lexeme()}};
case '-': return {{pos, minus, lexeme()}}; case '-': return {{pos, minus, lexeme()}};
case '*': return {{pos, star, lexeme()}}; case '*': return {{pos, star, lexeme()}};
case '/': return {{pos, slash, lexeme()}}; case '/': return {{pos, slash, lexeme()}};
case '%': return {{pos, percent, lexeme()}}; case '%': return {{pos, percent, lexeme()}};
case '~': return {{pos, tilde, lexeme()}}; case '~': return {{pos, tilde, lexeme()}};
case '&': return {{pos, ampersand, lexeme()}}; case '&': return {{pos, ampersand, lexeme()}};
case '|': return {{pos, bar, lexeme()}}; case '|': return {{pos, bar, lexeme()}};
case '^': return {{pos, carot, lexeme()}}; case '^': return {{pos, carot, lexeme()}};
case '=': return {{pos, equal, lexeme()}}; case '=': return {{pos, equal, lexeme()}};
case '<': case '<':
if (match('<')) return {{pos, left_shift, lexeme()}}; if (match('<')) return {{pos, left_shift, lexeme()}};
break; break;
case '>': case '>':
if (match('>')) return {{pos, right_shift, lexeme()}}; if (match('>')) return {{pos, right_shift, lexeme()}};
break; break;
case ';': case ';':
while (!at_end_p() && peek() != '\n') advance(); while (!at_end_p() && peek() != '\n') advance();
break; break;
case ' ': case ' ':
case '\r': case '\r':
case '\t': case '\t':
break; break;
case '\n': case '\n':
pos.line++; {
pos.col = 0; token_pos_t tmp = pos;
break; pos.line++;
case '$': pos.col = 0;
if (hex_t::pred(peek())) { return {{tmp, eol, lexeme()}};
start_ix += 1;
return {_number<hex_t>()};
}
[[fallthrough]];
case '0':
if (match('x')) {
if (hex_t::pred(peek())) {
start_ix += 2;
return {_number<hex_t>()};
} }
break;
case '$':
if (hex_t::pred(peek())) {
start_ix += 1;
return {_number<hex_t>()};
}
[[fallthrough]];
case '0':
if (match('x')) {
if (hex_t::pred(peek())) {
start_ix += 2;
return {_number<hex_t>()};
}
}
[[fallthrough]];
default:
if (dec_t::pred(c)) {
return {_number<dec_t>()};
} else if (alpha_p(c)) {
return {_identifier()};
} else {
error(pos.line, pos.col - 1, "Unexpected character.");
return {};
}
break;
} }
[[fallthrough]];
default:
if (dec_t::pred(c)) {
return {_number<dec_t>()};
} else if (alpha_p(c)) {
return {_identifier()};
} else {
error(pos.line, pos.col - 1, "Unexpected character.");
return {};
}
break;
} }
__builtin_unreachable(); __builtin_unreachable();
} }

View File

@ -159,4 +159,15 @@ expr_t * parser_t::primary()
throw error(peek(), "expected expression"); throw error(peek(), "expected expression");
} }
/*
void parser_t::synchronize()
{
advance();
while (!at_end_p()) {
if (previous().type == eol) return;
advance();
}
}
*/
} }

View File

@ -85,6 +85,7 @@ struct token_t {
_ends, _ends,
eof, eof,
eol,
}; };
using literal_t = std::variant<std::monostate, num_type>; using literal_t = std::variant<std::monostate, num_type>;
@ -172,6 +173,7 @@ struct token_t {
case _ends : return os << "ENDS"; case _ends : return os << "ENDS";
case eof : return os << "EOF"; case eof : return os << "EOF";
case eol : return os << "EOL";
} }
__builtin_unreachable(); __builtin_unreachable();
} }