lexer: add header

This also calls the lexer from main.
This commit is contained in:
Zack Buhman 2023-08-14 11:38:52 -07:00
parent 29428c7a92
commit 082ebc20e5
6 changed files with 189 additions and 129 deletions

View File

@ -2,9 +2,10 @@ CXXFLAGS = -Og -g -Wall -Wextra -Werror -Wfatal-errors -Wpedantic -std=c++20
LDFLAGS = LDFLAGS =
TARGET = TARGET =
CXX = $(TARGET)g++ CXX = $(TARGET)clang++
SRC = main.cpp SRC = main.cpp
SRC += lexer.cpp
OBJ = $(patsubst %.cpp,%.o,$(SRC)) OBJ = $(patsubst %.cpp,%.o,$(SRC))
DEP = $(patsubst %.cpp,%.d,$(SRC)) DEP = $(patsubst %.cpp,%.d,$(SRC))

24
error.hpp Normal file
View File

@ -0,0 +1,24 @@
#pragma once
#include <iostream>
namespace dsp {
extern bool had_error;
static inline void report(const int line, const int col, const std::string where, const std::string message)
{
std::cerr << "[line " << line
<< " col " << col
<< "] error" << where << ": " << message
<< std::endl;
had_error = true;
}
static inline void error(const int line, const int col, std::string message)
{
report(line, col, "", message);
}
}

221
lexer.cpp
View File

@ -7,6 +7,7 @@
#include "num.hpp" #include "num.hpp"
#include "lexer.hpp" #include "lexer.hpp"
#include "keyword.hpp" #include "keyword.hpp"
#include "error.hpp"
namespace dsp { namespace dsp {
@ -59,7 +60,7 @@ struct dec_t {
} }
template <typename N> template <typename N>
constexpr static token_t<N> parse(const std::string_view s) constexpr static N parse(const std::string_view s)
{ {
return parse_number<N, 10>(s); return parse_number<N, 10>(s);
} }
@ -74,145 +75,137 @@ struct hex_t {
} }
template <typename N> template <typename N>
constexpr static token_t<N> parse(const std::string_view s) constexpr static N parse(const std::string_view s)
{ {
return parse_number<N, 16>(s); return parse_number<N, 16>(s);
} }
}; };
constexpr bool alpha_p(const char c) inline constexpr bool alpha_p(const char c)
{ {
return (c >= 'a' && c <= 'z') return (c >= 'a' && c <= 'z')
|| (c >= 'A' && c <= 'Z'); || (c >= 'A' && c <= 'Z');
} }
constexpr bool alpha_numeric_p(const char c) inline constexpr bool alpha_numeric_p(const char c)
{ {
return alpha_p(c) || dec_t::pred(c) || (c == '_'); return alpha_p(c) || dec_t::pred(c) || (c == '_');
} }
struct lexer_t { bool lexer_t::at_end_p()
const std::string_view source; {
std::string_view::size_type start_ix; return current_ix >= source.length();
std::string_view::size_type current_ix; }
token_pos_t pos;
lexer_t() = delete; char lexer_t::peek()
{
if (at_end_p()) return '\0';
return source[current_ix];
}
constexpr lexer_t(const std::string_view source) bool lexer_t::match(const char expected)
: source(source), start_ix(0), pos{ .line = 1, .col = 0} {
{ } if (at_end_p()) return false;
else if (source[current_ix] != expected) return false;
pos.col++;
current_ix++;
return true;
}
bool at_end_p() char lexer_t::advance()
{ {
return current_ix >= source.length(); pos.col++;
} return source[current_ix++];
}
char peek() const std::string_view lexer_t::lexeme()
{ {
if (at_end_p()) return '\0'; return source.substr(start_ix, current_ix - start_ix);
return source[current_ix]; }
}
bool match(const char expected) template <typename T>
{ token lexer_t::_number()
if (at_end_p()) return false; {
else if (source[current_ix] != expected) return false; while (T::pred(peek())) advance();
pos.col++;
current_ix++;
return true;
}
char advance() return {pos, token::number, lexeme(), T::template parse<num_t>(lexeme())};
{ }
pos.col++;
return source[current_ix++];
}
const std::string_view lexeme() token lexer_t::_identifier()
{ {
return source.substr(start_ix, current_ix); while (alpha_numeric_p(peek())) advance();
} std::optional<enum token::type_t> keyword = keyword::find(lexeme());
if (keyword) return {pos, *keyword, lexeme()};
else return {pos, token::identifier, lexeme()};
}
template <typename T> std::optional<token> lexer_t::scan_token()
token _number() {
{ using enum token::type_t;
while (T::pred(peek())) advance();
return {pos, token::number, lexeme(), T::parse(lexeme())}; if (at_end_p())
} return {{pos, eof, ""}};
token _identifier() start_ix = current_ix;
{
while (alpha_numeric_p(peek())) advance();
std::optional<enum token::type_t> keyword = keyword::find(lexeme());
if (keyword) return {pos, *keyword, lexeme()};
else return {pos, token::identifier, lexeme()};
}
token scan_token() const char c = advance();
{ switch (c) {
using enum token::type_t; case '(': return {{pos, left_paren, lexeme()}};
case ')': return {{pos, right_paren, lexeme()}};
start_ix = current_ix; case ',': return {{pos, comma, lexeme()}};
case '.': return {{pos, dot, lexeme()}};
const char c = advance(); case '+': return {{pos, plus, lexeme()}};
switch (c) { case '-': return {{pos, minus, lexeme()}};
case '(': return {pos, left_paren, lexeme()}; case '*': return {{pos, star, lexeme()}};
case ')': return {pos, right_paren, lexeme()}; case '/': return {{pos, slash, lexeme()}};
case ',': return {pos, comma, lexeme()}; case '%': return {{pos, percent, lexeme()}};
case '.': return {pos, dot, lexeme()}; case '~': return {{pos, tilde, lexeme()}};
case '+': return {pos, plus, lexeme()}; case '&': return {{pos, ampersand, lexeme()}};
case '-': return {pos, minus, lexeme()}; case '|': return {{pos, bar, lexeme()}};
case '*': return {pos, star, lexeme()}; case '^': return {{pos, carot, lexeme()}};
case '/': return {pos, slash, lexeme()}; case '<':
case '%': return {pos, percent, lexeme()}; if (match('<')) return {{pos, left_shift, lexeme()}};
case '~': return {pos, tilde, lexeme()}; break;
case '&': return {pos, ampersand, lexeme()}; case '>':
case '|': return {pos, bar, lexeme()}; if (match('>')) return {{pos, right_shift, lexeme()}};
case '^': return {pos, carot, lexeme()}; break;
case '<': case ';':
if (match('<')) return {pos, left_shift, lexeme()}; while (!at_end_p() && peek() != '\n') advance();
break; break;
case '>': case ' ':
if (match('>')) return {pos, right_shift, lexeme()}; case '\r':
break; case '\t':
case ';': break;
while (!at_end_p() && peek() != '\n') advance(); case '\n':
break; pos.line++;
case ' ': pos.col = 0;
case '\r': break;
case '\t': case '$':
break; if (hex_t::pred(peek())) {
case '\n': start_ix += 1;
pos.line++; return {_number<hex_t>()};
pos.col = 0;
break;
case '$':
if (hex_t::pred(peek())) {
start_ix += 1;
return _number<hex_t>();
}
[[fallthrough]];
case '0':
if (match('x')) {
if (hex_t::pred(peek())) {
start_ix += 2;
return _number<hex_t>();
}
}
[[fallthrough]];
default:
if (dec_t::pred(c)) {
return _number<dec_t>();
} else if (alpha_p(c)) {
return _identifier();
} else {
//error(pos.line, "Unexpected character.");
}
break;
} }
[[fallthrough]];
case '0':
if (match('x')) {
if (hex_t::pred(peek())) {
start_ix += 2;
return {_number<hex_t>()};
}
}
[[fallthrough]];
default:
if (dec_t::pred(c)) {
return {_number<dec_t>()};
} else if (alpha_p(c)) {
return {_identifier()};
} else {
error(pos.line, pos.col - 1, "Unexpected character.");
return {};
}
break;
} }
}; __builtin_unreachable();
}
} }

View File

@ -1 +1,41 @@
#pragma once
#include <string_view>
#include <optional>
#include "token.hpp"
namespace dsp {
struct lexer_t {
const std::string_view source;
std::string_view::size_type start_ix;
std::string_view::size_type current_ix;
token_pos_t pos;
lexer_t() = delete;
constexpr lexer_t(const std::string_view source)
: source(source), start_ix(0), current_ix(0), pos{ .line = 1, .col = 0 }
{ }
std::optional<token> scan_token();
private:
bool at_end_p();
char peek();
bool match(const char expected);
char advance();
const std::string_view lexeme();
template <typename T>
token _number();
token _identifier();
};
}

View File

@ -2,25 +2,27 @@
#include <fstream> #include <fstream>
#include <string> #include <string>
#include "lexer.hpp"
#include "token.hpp" #include "token.hpp"
static bool had_error = false; namespace dsp {
static void report(int line, std::string where, std::string message) bool had_error = false;
{
std::cerr << "[line " << line << "] Error" << where << ": " << message;
had_error = true;
}
void error(int line, std::string message)
{
report(line, "", message);
} }
static void run(std::string source) static void run(std::string source)
{ {
using namespace dsp;
std::string_view buf {source}; std::string_view buf {source};
(void)buf; lexer_t lexer {buf};
while (std::optional<token> token_o = lexer.scan_token()) {
std::cout << *token_o << std::endl;
if (token_o->type == token::type_t::eof) {
break;
}
}
} }
static void run_prompt() static void run_prompt()
@ -49,13 +51,13 @@ static int run_file(char const * const filename)
return -1; return -1;
} }
run(buf); run(buf);
return had_error; return dsp::had_error;
} }
int main(const int argc, char const * const argv[]) int main(const int argc, char const * const argv[])
{ {
switch (argc) { switch (argc) {
case 1: run_prompt(); return had_error; case 1: run_prompt(); return dsp::had_error;
case 2: return run_file(argv[1]); case 2: return run_file(argv[1]);
default: default:
std::cerr << "Usage: " << argv[0] << " [filename]" << std::endl; std::cerr << "Usage: " << argv[0] << " [filename]" << std::endl;

View File

@ -189,6 +189,6 @@ struct token_t {
}; };
}
using token = dsp::token_t<num_t>; using token = dsp::token_t<num_t>;
}