262 lines
6.7 KiB
C++
262 lines
6.7 KiB
C++
#include <cassert>
|
|
#include <string_view>
|
|
#include <functional>
|
|
#include <iostream>
|
|
#include <variant>
|
|
|
|
#include "assembler.hh"
|
|
#include "addressing_mode.hh"
|
|
#include "fromstring.hh"
|
|
#include "isa.hh"
|
|
|
|
static bool tokenize(std::string_view buf, std::function<bool(std::string_view, ssize_t, ssize_t)> cb)
|
|
{
|
|
auto it = buf.cbegin();
|
|
std::optional<decltype(it)> token_begin = std::nullopt;
|
|
|
|
ssize_t row = 0;
|
|
ssize_t col = 0;
|
|
|
|
while (it != buf.cend()) {
|
|
auto c = *it;
|
|
switch (c) {
|
|
case '\n':
|
|
case ' ':
|
|
if (token_begin != std::nullopt) {
|
|
bool ret = cb(buf.substr(*token_begin - buf.cbegin(), it - *token_begin), row, col);
|
|
if (!ret) {
|
|
// cb failed
|
|
return false;
|
|
}
|
|
token_begin = std::nullopt;
|
|
}
|
|
break;
|
|
default:
|
|
if (token_begin == std::nullopt) {
|
|
token_begin = it;
|
|
}
|
|
break;
|
|
}
|
|
|
|
if (c == '\n') {
|
|
col = 0;
|
|
row++;
|
|
} else {
|
|
col++;
|
|
};
|
|
it++;
|
|
}
|
|
|
|
bool ret = cb("", row + 1, col);
|
|
if (!ret) {
|
|
// cb failed
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
|
|
namespace parser {
|
|
enum class state {
|
|
label_or_op,
|
|
op,
|
|
mode,
|
|
value,
|
|
comment,
|
|
just_comment,
|
|
};
|
|
}
|
|
|
|
using symbol_strings_t = std::unordered_map<std::string_view, size_t>;
|
|
|
|
namespace parser {
|
|
|
|
bool parse(std::string_view buf, assembler::program_t& program)
|
|
{
|
|
size_t current_symbol = 0;
|
|
symbol_strings_t ss;
|
|
|
|
parser::state state = parser::state::label_or_op;
|
|
|
|
auto get_symbol = [&](std::string_view s) {
|
|
auto find = ss.find(s);
|
|
if (find == ss.end()) {
|
|
auto insert = ss.insert({s, current_symbol++});
|
|
assert(insert.second);
|
|
return (insert.first)->second;
|
|
} else {
|
|
return find->second;
|
|
}
|
|
};
|
|
|
|
assembler::instruction_t current_instruction;
|
|
ssize_t current_row;
|
|
|
|
bool inside_comment = false;
|
|
|
|
bool parsed = tokenize(buf, [&](std::string_view s, ssize_t row, ssize_t col) -> bool {
|
|
while (true) {
|
|
switch (state) {
|
|
case parser::state::label_or_op:
|
|
{
|
|
current_row = row;
|
|
if (s.back() == ':') {
|
|
std::string_view key = s.substr(0, s.length() - 1);
|
|
auto symbol = get_symbol(key);
|
|
std::cout << "label `" << symbol << "`\n";
|
|
current_instruction.symbol = symbol;
|
|
state = parser::state::op;
|
|
return true;
|
|
} else if (s.front() == ';') {
|
|
state = parser::state::just_comment;
|
|
continue;
|
|
} else if (s.empty()) {
|
|
// I hate myself for this dirty hack
|
|
return true;
|
|
} else {
|
|
current_instruction.symbol = std::nullopt;
|
|
[[fallthrough]];
|
|
}
|
|
}
|
|
case parser::state::op:
|
|
{
|
|
assert(row == current_row);
|
|
auto op_it = fromstring::op().find(s);
|
|
if (op_it == fromstring::op().end()) {
|
|
std::cout << "invalid op `" << s << "`\n";
|
|
return false;
|
|
} else {
|
|
current_instruction.op = op_it->second;
|
|
std::cout << "ok op `" << static_cast<int>(op_it->second) << "`\n";
|
|
}
|
|
state = parser::state::mode;
|
|
return true;
|
|
}
|
|
case parser::state::mode:
|
|
{
|
|
assert(row == current_row);
|
|
auto mode_it = fromstring::mode().find(s);
|
|
if (mode_it == fromstring::mode().end()) {
|
|
std::cout << "invalid mode `" << s << "`\n";
|
|
return false;
|
|
} else {
|
|
current_instruction.mode = mode_it->second;
|
|
std::cout << "ok mode `" << static_cast<int>(mode_it->second) << "`\n";
|
|
}
|
|
state = parser::state::value;
|
|
return true;
|
|
}
|
|
case parser::state::value:
|
|
{
|
|
auto am_it = addressing_mode().find(current_instruction.mode);
|
|
assert(am_it != addressing_mode().end());
|
|
if (am_it->second.len == 0) {
|
|
std::cout << "no value expected\n";
|
|
assembler::implied_t i {};
|
|
current_instruction.value = i;
|
|
state = parser::state::comment;
|
|
continue;
|
|
}
|
|
|
|
assert(row == current_row);
|
|
|
|
auto parse_integer = [](std::string_view s, int base) -> std::optional<ssize_t> {
|
|
std::string value_str {s.data() + 1, s.size() - 1};
|
|
size_t pos;
|
|
ssize_t value;
|
|
value = std::stoll(value_str, &pos, base);
|
|
if (pos != value_str.length())
|
|
return std::nullopt;
|
|
else
|
|
return value;
|
|
};
|
|
|
|
std::optional<ssize_t> literal;
|
|
auto as_literal = [](ssize_t n) -> assembler::literal_t { return {n}; };
|
|
switch (*s.cbegin()) {
|
|
case 'h':
|
|
{
|
|
literal = parse_integer(s, 16);
|
|
if (literal == std::nullopt) {
|
|
std::cout << "invalid hex literal\n";
|
|
}
|
|
current_instruction.value = as_literal(*literal);
|
|
std::cout << "value hex literal `" << *literal << "`\n";
|
|
break;
|
|
}
|
|
case 'd':
|
|
{
|
|
literal = parse_integer(s, 10);
|
|
if (literal == std::nullopt) {
|
|
std::cout << "invalid dec literal\n";
|
|
return false;
|
|
}
|
|
current_instruction.value = as_literal(*literal);
|
|
std::cout << "value dec literal `" << *literal << "`\n";
|
|
break;
|
|
}
|
|
case ':':
|
|
{
|
|
std::string_view key = s.substr(1, s.length() - 1);
|
|
assembler::reference_t reference = { get_symbol(key) };
|
|
current_instruction.value = reference;
|
|
std::cout << "value reference `" << reference.symbol << "`\n";
|
|
break;
|
|
}
|
|
default:
|
|
std::cout << "invalid base\n";
|
|
return false;
|
|
}
|
|
|
|
state = parser::state::comment;
|
|
return true;
|
|
}
|
|
case parser::state::comment:
|
|
{
|
|
if (row != current_row) {
|
|
std::cerr << "push " << (int)current_instruction.op << '\n';
|
|
inside_comment = false;
|
|
program.push_back(current_instruction);
|
|
state = parser::state::label_or_op;
|
|
continue;
|
|
}
|
|
|
|
if (*s.cbegin() == ';') {
|
|
inside_comment = true;
|
|
return true;
|
|
} else if (inside_comment) {
|
|
return true;
|
|
} else {
|
|
std::cout << "expected comment\n";
|
|
return false;
|
|
}
|
|
}
|
|
case parser::state::just_comment:
|
|
{
|
|
if (row != current_row) {
|
|
inside_comment = false;
|
|
state = parser::state::label_or_op;
|
|
continue;
|
|
}
|
|
|
|
if (*s.cbegin() == ';') {
|
|
inside_comment = true;
|
|
return true;
|
|
} else if (inside_comment) {
|
|
return true;
|
|
} else {
|
|
std::cout << "expected comment\n";
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return true;
|
|
});
|
|
|
|
return parsed;
|
|
}
|
|
|
|
}
|