dsp-asm/lexer.cpp
2023-08-14 16:52:56 +00:00

219 lines
4.5 KiB
C++

#include <string_view>
#include <functional>
#include <cstdint>
#include <optional>
#include "token.hpp"
#include "num.hpp"
#include "lexer.hpp"
#include "keyword.hpp"
namespace dsp {
template <typename N>
constexpr static N parse_digit(const char c)
{
switch (c) {
default: [[fallthrough]];
case '0': return 0;
case '1': return 1;
case '2': return 2;
case '3': return 3;
case '4': return 4;
case '5': return 5;
case '6': return 6;
case '7': return 7;
case '8': return 8;
case '9': return 9;
case 'a': return 10;
case 'b': return 11;
case 'c': return 12;
case 'd': return 13;
case 'e': return 14;
case 'f': return 15;
case 'A': return 10;
case 'B': return 11;
case 'C': return 12;
case 'D': return 13;
case 'E': return 14;
case 'F': return 15;
}
}
template <typename N, int base>
constexpr static N parse_number(const std::string_view s)
{
N n = 0;
for (std::string_view::size_type ix = 0; ix < s.length(); ix++) {
n *= base;
n += parse_digit<N>(s[ix]);
}
return n;
}
struct dec_t {
constexpr static bool pred(const char c)
{
return c >= '0' && c <= '9';
}
template <typename N>
constexpr static token_t<N> parse(const std::string_view s)
{
return parse_number<N, 10>(s);
}
};
struct hex_t {
constexpr static bool pred(const char c)
{
return dec_t::pred(c)
|| (c >= 'a' && c <= 'f')
|| (c >= 'A' && c <= 'F');
}
template <typename N>
constexpr static token_t<N> parse(const std::string_view s)
{
return parse_number<N, 16>(s);
}
};
constexpr bool alpha_p(const char c)
{
return (c >= 'a' && c <= 'z')
|| (c >= 'A' && c <= 'Z');
}
constexpr bool alpha_numeric_p(const char c)
{
return alpha_p(c) || dec_t::pred(c) || (c == '_');
}
struct lexer_t {
const std::string_view source;
std::string_view::size_type start_ix;
std::string_view::size_type current_ix;
token_pos_t pos;
lexer_t() = delete;
constexpr lexer_t(const std::string_view source)
: source(source), start_ix(0), pos{ .line = 1, .col = 0}
{ }
bool at_end_p()
{
return current_ix >= source.length();
}
char peek()
{
if (at_end_p()) return '\0';
return source[current_ix];
}
bool match(const char expected)
{
if (at_end_p()) return false;
else if (source[current_ix] != expected) return false;
pos.col++;
current_ix++;
return true;
}
char advance()
{
pos.col++;
return source[current_ix++];
}
const std::string_view lexeme()
{
return source.substr(start_ix, current_ix);
}
template <typename T>
token _number()
{
while (T::pred(peek())) advance();
return {pos, token::number, lexeme(), T::parse(lexeme())};
}
token _identifier()
{
while (alpha_numeric_p(peek())) advance();
std::optional<enum token::type_t> keyword = keyword::find(lexeme());
if (keyword) return {pos, *keyword, lexeme()};
else return {pos, token::identifier, lexeme()};
}
token scan_token()
{
using enum token::type_t;
start_ix = current_ix;
const char c = advance();
switch (c) {
case '(': return {pos, left_paren, lexeme()};
case ')': return {pos, right_paren, lexeme()};
case ',': return {pos, comma, lexeme()};
case '.': return {pos, dot, lexeme()};
case '+': return {pos, plus, lexeme()};
case '-': return {pos, minus, lexeme()};
case '*': return {pos, star, lexeme()};
case '/': return {pos, slash, lexeme()};
case '%': return {pos, percent, lexeme()};
case '~': return {pos, tilde, lexeme()};
case '&': return {pos, ampersand, lexeme()};
case '|': return {pos, bar, lexeme()};
case '^': return {pos, carot, lexeme()};
case '<':
if (match('<')) return {pos, left_shift, lexeme()};
break;
case '>':
if (match('>')) return {pos, right_shift, lexeme()};
break;
case ';':
while (!at_end_p() && peek() != '\n') advance();
break;
case ' ':
case '\r':
case '\t':
break;
case '\n':
pos.line++;
pos.col = 0;
break;
case '$':
if (hex_t::pred(peek())) {
start_ix += 1;
return _number<hex_t>();
}
[[fallthrough]];
case '0':
if (match('x')) {
if (hex_t::pred(peek())) {
start_ix += 2;
return _number<hex_t>();
}
}
[[fallthrough]];
default:
if (dec_t::pred(c)) {
return _number<dec_t>();
} else if (alpha_p(c)) {
return _identifier();
} else {
//error(pos.line, "Unexpected character.");
}
break;
}
}
};
}