lexer: add header

This also calls the lexer from main.
This commit is contained in:
Zack Buhman 2023-08-14 11:38:52 -07:00
parent 29428c7a92
commit 082ebc20e5
6 changed files with 189 additions and 129 deletions

View File

@ -2,9 +2,10 @@ CXXFLAGS = -Og -g -Wall -Wextra -Werror -Wfatal-errors -Wpedantic -std=c++20
LDFLAGS =
TARGET =
CXX = $(TARGET)g++
CXX = $(TARGET)clang++
SRC = main.cpp
SRC += lexer.cpp
OBJ = $(patsubst %.cpp,%.o,$(SRC))
DEP = $(patsubst %.cpp,%.d,$(SRC))

24
error.hpp Normal file
View File

@ -0,0 +1,24 @@
#pragma once
#include <iostream>
namespace dsp {
extern bool had_error;
static inline void report(const int line, const int col, const std::string where, const std::string message)
{
std::cerr << "[line " << line
<< " col " << col
<< "] error" << where << ": " << message
<< std::endl;
had_error = true;
}
static inline void error(const int line, const int col, std::string message)
{
report(line, col, "", message);
}
}

121
lexer.cpp
View File

@ -7,6 +7,7 @@
#include "num.hpp"
#include "lexer.hpp"
#include "keyword.hpp"
#include "error.hpp"
namespace dsp {
@ -59,7 +60,7 @@ struct dec_t {
}
template <typename N>
constexpr static token_t<N> parse(const std::string_view s)
constexpr static N parse(const std::string_view s)
{
return parse_number<N, 10>(s);
}
@ -74,108 +75,99 @@ struct hex_t {
}
template <typename N>
constexpr static token_t<N> parse(const std::string_view s)
constexpr static N parse(const std::string_view s)
{
return parse_number<N, 16>(s);
}
};
constexpr bool alpha_p(const char c)
inline constexpr bool alpha_p(const char c)
{
return (c >= 'a' && c <= 'z')
|| (c >= 'A' && c <= 'Z');
}
constexpr bool alpha_numeric_p(const char c)
inline constexpr bool alpha_numeric_p(const char c)
{
return alpha_p(c) || dec_t::pred(c) || (c == '_');
}
struct lexer_t {
const std::string_view source;
std::string_view::size_type start_ix;
std::string_view::size_type current_ix;
token_pos_t pos;
lexer_t() = delete;
constexpr lexer_t(const std::string_view source)
: source(source), start_ix(0), pos{ .line = 1, .col = 0}
{ }
bool at_end_p()
{
bool lexer_t::at_end_p()
{
return current_ix >= source.length();
}
}
char peek()
{
char lexer_t::peek()
{
if (at_end_p()) return '\0';
return source[current_ix];
}
}
bool match(const char expected)
{
bool lexer_t::match(const char expected)
{
if (at_end_p()) return false;
else if (source[current_ix] != expected) return false;
pos.col++;
current_ix++;
return true;
}
}
char advance()
{
char lexer_t::advance()
{
pos.col++;
return source[current_ix++];
}
}
const std::string_view lexeme()
{
return source.substr(start_ix, current_ix);
}
const std::string_view lexer_t::lexeme()
{
return source.substr(start_ix, current_ix - start_ix);
}
template <typename T>
token _number()
{
template <typename T>
token lexer_t::_number()
{
while (T::pred(peek())) advance();
return {pos, token::number, lexeme(), T::parse(lexeme())};
}
return {pos, token::number, lexeme(), T::template parse<num_t>(lexeme())};
}
token _identifier()
{
token lexer_t::_identifier()
{
while (alpha_numeric_p(peek())) advance();
std::optional<enum token::type_t> keyword = keyword::find(lexeme());
if (keyword) return {pos, *keyword, lexeme()};
else return {pos, token::identifier, lexeme()};
}
}
token scan_token()
{
std::optional<token> lexer_t::scan_token()
{
using enum token::type_t;
if (at_end_p())
return {{pos, eof, ""}};
start_ix = current_ix;
const char c = advance();
switch (c) {
case '(': return {pos, left_paren, lexeme()};
case ')': return {pos, right_paren, lexeme()};
case ',': return {pos, comma, lexeme()};
case '.': return {pos, dot, lexeme()};
case '+': return {pos, plus, lexeme()};
case '-': return {pos, minus, lexeme()};
case '*': return {pos, star, lexeme()};
case '/': return {pos, slash, lexeme()};
case '%': return {pos, percent, lexeme()};
case '~': return {pos, tilde, lexeme()};
case '&': return {pos, ampersand, lexeme()};
case '|': return {pos, bar, lexeme()};
case '^': return {pos, carot, lexeme()};
case '(': return {{pos, left_paren, lexeme()}};
case ')': return {{pos, right_paren, lexeme()}};
case ',': return {{pos, comma, lexeme()}};
case '.': return {{pos, dot, lexeme()}};
case '+': return {{pos, plus, lexeme()}};
case '-': return {{pos, minus, lexeme()}};
case '*': return {{pos, star, lexeme()}};
case '/': return {{pos, slash, lexeme()}};
case '%': return {{pos, percent, lexeme()}};
case '~': return {{pos, tilde, lexeme()}};
case '&': return {{pos, ampersand, lexeme()}};
case '|': return {{pos, bar, lexeme()}};
case '^': return {{pos, carot, lexeme()}};
case '<':
if (match('<')) return {pos, left_shift, lexeme()};
if (match('<')) return {{pos, left_shift, lexeme()}};
break;
case '>':
if (match('>')) return {pos, right_shift, lexeme()};
if (match('>')) return {{pos, right_shift, lexeme()}};
break;
case ';':
while (!at_end_p() && peek() != '\n') advance();
@ -191,28 +183,29 @@ struct lexer_t {
case '$':
if (hex_t::pred(peek())) {
start_ix += 1;
return _number<hex_t>();
return {_number<hex_t>()};
}
[[fallthrough]];
case '0':
if (match('x')) {
if (hex_t::pred(peek())) {
start_ix += 2;
return _number<hex_t>();
return {_number<hex_t>()};
}
}
[[fallthrough]];
default:
if (dec_t::pred(c)) {
return _number<dec_t>();
return {_number<dec_t>()};
} else if (alpha_p(c)) {
return _identifier();
return {_identifier()};
} else {
//error(pos.line, "Unexpected character.");
error(pos.line, pos.col - 1, "Unexpected character.");
return {};
}
break;
}
}
};
__builtin_unreachable();
}
}

View File

@ -1 +1,41 @@
#pragma once
#include <string_view>
#include <optional>
#include "token.hpp"
namespace dsp {
struct lexer_t {
const std::string_view source;
std::string_view::size_type start_ix;
std::string_view::size_type current_ix;
token_pos_t pos;
lexer_t() = delete;
constexpr lexer_t(const std::string_view source)
: source(source), start_ix(0), current_ix(0), pos{ .line = 1, .col = 0 }
{ }
std::optional<token> scan_token();
private:
bool at_end_p();
char peek();
bool match(const char expected);
char advance();
const std::string_view lexeme();
template <typename T>
token _number();
token _identifier();
};
}

View File

@ -2,25 +2,27 @@
#include <fstream>
#include <string>
#include "lexer.hpp"
#include "token.hpp"
static bool had_error = false;
namespace dsp {
static void report(int line, std::string where, std::string message)
{
std::cerr << "[line " << line << "] Error" << where << ": " << message;
had_error = true;
}
bool had_error = false;
void error(int line, std::string message)
{
report(line, "", message);
}
static void run(std::string source)
{
using namespace dsp;
std::string_view buf {source};
(void)buf;
lexer_t lexer {buf};
while (std::optional<token> token_o = lexer.scan_token()) {
std::cout << *token_o << std::endl;
if (token_o->type == token::type_t::eof) {
break;
}
}
}
static void run_prompt()
@ -49,13 +51,13 @@ static int run_file(char const * const filename)
return -1;
}
run(buf);
return had_error;
return dsp::had_error;
}
int main(const int argc, char const * const argv[])
{
switch (argc) {
case 1: run_prompt(); return had_error;
case 1: run_prompt(); return dsp::had_error;
case 2: return run_file(argv[1]);
default:
std::cerr << "Usage: " << argv[0] << " [filename]" << std::endl;

View File

@ -189,6 +189,6 @@ struct token_t {
};
}
using token = dsp::token_t<num_t>;
}