This also adds support for "#" characters prior to immediates. nop may also now appear in an op_t. The parser no longer generates nop_t--this is instead now represented as an op_t with a zero-length ops vector.
234 lines
4.9 KiB
C++
234 lines
4.9 KiB
C++
#include <string_view>
|
|
#include <functional>
|
|
#include <cstdint>
|
|
#include <optional>
|
|
|
|
#include "token.hpp"
|
|
#include "num.hpp"
|
|
#include "lexer.hpp"
|
|
#include "keyword.hpp"
|
|
#include "error.hpp"
|
|
|
|
namespace dsp {
|
|
|
|
template <typename N>
|
|
constexpr static N parse_digit(const char c)
|
|
{
|
|
switch (c) {
|
|
default: [[fallthrough]];
|
|
case '0': return 0;
|
|
case '1': return 1;
|
|
case '2': return 2;
|
|
case '3': return 3;
|
|
case '4': return 4;
|
|
case '5': return 5;
|
|
case '6': return 6;
|
|
case '7': return 7;
|
|
case '8': return 8;
|
|
case '9': return 9;
|
|
case 'a': return 10;
|
|
case 'b': return 11;
|
|
case 'c': return 12;
|
|
case 'd': return 13;
|
|
case 'e': return 14;
|
|
case 'f': return 15;
|
|
case 'A': return 10;
|
|
case 'B': return 11;
|
|
case 'C': return 12;
|
|
case 'D': return 13;
|
|
case 'E': return 14;
|
|
case 'F': return 15;
|
|
}
|
|
}
|
|
|
|
template <typename N, int base>
|
|
constexpr static N parse_number(const std::string_view s)
|
|
{
|
|
N n = 0;
|
|
for (std::string_view::size_type ix = 0; ix < s.length(); ix++) {
|
|
n *= base;
|
|
n += parse_digit<N>(s[ix]);
|
|
}
|
|
|
|
return n;
|
|
}
|
|
|
|
struct dec_t {
|
|
constexpr static bool pred(const char c)
|
|
{
|
|
return c >= '0' && c <= '9';
|
|
}
|
|
|
|
template <typename N>
|
|
constexpr static N parse(const std::string_view s)
|
|
{
|
|
return parse_number<N, 10>(s);
|
|
}
|
|
};
|
|
|
|
struct hex_t {
|
|
constexpr static bool pred(const char c)
|
|
{
|
|
return dec_t::pred(c)
|
|
|| (c >= 'a' && c <= 'f')
|
|
|| (c >= 'A' && c <= 'F');
|
|
}
|
|
|
|
template <typename N>
|
|
constexpr static N parse(const std::string_view s)
|
|
{
|
|
return parse_number<N, 16>(s);
|
|
}
|
|
};
|
|
|
|
inline constexpr bool alpha_p(const char c)
|
|
{
|
|
return (c >= 'a' && c <= 'z')
|
|
|| (c >= 'A' && c <= 'Z');
|
|
}
|
|
|
|
inline constexpr bool alpha_numeric_p(const char c)
|
|
{
|
|
return alpha_p(c) || dec_t::pred(c) || (c == '_');
|
|
}
|
|
|
|
bool lexer_t::at_end_p()
|
|
{
|
|
return current_ix >= source.length();
|
|
}
|
|
|
|
char lexer_t::peek()
|
|
{
|
|
if (at_end_p()) return '\0';
|
|
return source[current_ix];
|
|
}
|
|
|
|
bool lexer_t::match(const char expected)
|
|
{
|
|
if (at_end_p()) return false;
|
|
else if (source[current_ix] != expected) return false;
|
|
pos.col++;
|
|
current_ix++;
|
|
return true;
|
|
}
|
|
|
|
char lexer_t::advance()
|
|
{
|
|
pos.col++;
|
|
return source[current_ix++];
|
|
}
|
|
|
|
const std::string_view lexer_t::lexeme()
|
|
{
|
|
return source.substr(start_ix, current_ix - start_ix);
|
|
}
|
|
|
|
template <typename T>
|
|
token_t lexer_t::_number()
|
|
{
|
|
while (T::pred(peek())) advance();
|
|
|
|
return {pos, token_t::number, lexeme(), T::template parse<num_t>(lexeme())};
|
|
}
|
|
|
|
token_t lexer_t::_identifier()
|
|
{
|
|
while (alpha_numeric_p(peek())) advance();
|
|
std::optional<enum token_t::type_t> keyword = keyword::find(lexeme());
|
|
if (keyword) return {pos, *keyword, lexeme()};
|
|
else return {pos, token_t::identifier, lexeme()};
|
|
}
|
|
|
|
std::optional<token_t> lexer_t::lex_token()
|
|
{
|
|
using enum token_t::type_t;
|
|
|
|
while (true) {
|
|
if (at_end_p())
|
|
return {{pos, eof, ""}};
|
|
|
|
start_ix = current_ix;
|
|
|
|
const char c = advance();
|
|
switch (c) {
|
|
case '(': return {{pos, left_paren, lexeme()}};
|
|
case ')': return {{pos, right_paren, lexeme()}};
|
|
case ',': return {{pos, comma, lexeme()}};
|
|
case '.': return {{pos, dot, lexeme()}};
|
|
case '+': return {{pos, plus, lexeme()}};
|
|
case '-': return {{pos, minus, lexeme()}};
|
|
case '*': return {{pos, star, lexeme()}};
|
|
case '/': return {{pos, slash, lexeme()}};
|
|
case '%': return {{pos, percent, lexeme()}};
|
|
case '~': return {{pos, tilde, lexeme()}};
|
|
case '&': return {{pos, ampersand, lexeme()}};
|
|
case '|': return {{pos, bar, lexeme()}};
|
|
case '^': return {{pos, carot, lexeme()}};
|
|
case '=': return {{pos, equal, lexeme()}};
|
|
case ':': return {{pos, colon, lexeme()}};
|
|
case '#': return {{pos, hash, lexeme()}};
|
|
case '<':
|
|
if (match('<')) return {{pos, left_shift, lexeme()}};
|
|
break;
|
|
case '>':
|
|
if (match('>')) return {{pos, right_shift, lexeme()}};
|
|
break;
|
|
case ';':
|
|
while (!at_end_p() && peek() != '\n') advance();
|
|
break;
|
|
case ' ':
|
|
case '\r':
|
|
case '\t':
|
|
break;
|
|
case '\n':
|
|
{
|
|
token_pos_t tmp = pos;
|
|
pos.line++;
|
|
pos.col = 0;
|
|
return {{tmp, eol, lexeme()}};
|
|
}
|
|
break;
|
|
case '$':
|
|
if (hex_t::pred(peek())) {
|
|
start_ix += 1;
|
|
return {_number<hex_t>()};
|
|
}
|
|
[[fallthrough]];
|
|
case '0':
|
|
if (match('x')) {
|
|
if (hex_t::pred(peek())) {
|
|
start_ix += 2;
|
|
return {_number<hex_t>()};
|
|
}
|
|
}
|
|
[[fallthrough]];
|
|
default:
|
|
if (dec_t::pred(c)) {
|
|
return {_number<dec_t>()};
|
|
} else if (alpha_p(c)) {
|
|
return {_identifier()};
|
|
} else {
|
|
error(pos.line, pos.col - 1, "unexpected character");
|
|
return {};
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
__builtin_unreachable();
|
|
}
|
|
|
|
std::vector<token_t> lexer_t::lex_tokens()
|
|
{
|
|
std::vector<token_t> tokens;
|
|
|
|
while (true) {
|
|
std::optional<token_t> token_o = lex_token();
|
|
if (!token_o) continue;
|
|
tokens.push_back(*token_o);
|
|
if (token_o->type == token_t::eof) break;
|
|
}
|
|
return tokens;
|
|
}
|
|
|
|
}
|