assembler/lexer: add support for #include directive

This commit is contained in:
Zack Buhman 2025-11-11 15:06:11 -06:00
parent 90b486e744
commit 9e281cba58
4 changed files with 60 additions and 17 deletions

View File

@ -1,8 +1,10 @@
import sys
def print_error(filename, buf, e):
def print_error(e):
assert len(e.args) == 2, e
message, token = e.args
with open(token.filename, 'rb') as f:
buf = f.read()
lines = buf.splitlines()
line = lines[token.line - 1]
@ -11,7 +13,7 @@ def print_error(filename, buf, e):
col_pointer = '^' * len(token.lexeme)
RED = "\033[0;31m"
DEFAULT = "\033[0;0m"
print(f'File: "{filename}", line {token.line}, column {token.col}\n', file=sys.stderr)
print(f'File: "{token.filename}", line {token.line}, column {token.col}\n', file=sys.stderr)
sys.stderr.write(' ')
wrote_default = False
for i, c in enumerate(line.decode('utf-8')):

View File

@ -10,8 +10,8 @@ from assembler.fs.validator import validate_instruction
from assembler.fs.emitter import emit_instruction
from assembler.error import print_error
def frontend_inner(buf):
lexer = Lexer(buf, find_keyword, emit_newlines=False, minus_is_token=True)
def frontend_inner(filename, buf):
lexer = Lexer(filename, buf, find_keyword, emit_newlines=False, minus_is_token=True)
tokens = list(lexer.lex_tokens())
parser = Parser(tokens)
for ins_ast in parser.instructions():
@ -22,15 +22,15 @@ def frontend_inner(buf):
def frontend(filename, buf):
try:
yield from frontend_inner(buf)
yield from frontend_inner(filename, buf)
except LexerError as e:
print_error(filename, buf, e)
print_error(e)
raise
except ParserError as e:
print_error(filename, buf, e)
print_error(e)
raise
except ValidatorError as e:
print_error(filename, buf, e)
print_error(e)
raise
if __name__ == "__main__":

View File

@ -2,6 +2,7 @@ from dataclasses import dataclass
from enum import Enum, auto
from itertools import chain
from typing import Union, Any
from os import path
DEBUG = True
@ -25,6 +26,7 @@ class TT(Enum):
@dataclass
class Token:
filename: str
start_ix: int
line: int
col: int
@ -43,7 +45,10 @@ class LexerError(Exception):
pass
class Lexer:
def __init__(self, buf: memoryview, find_keyword,
def __init__(self,
filename: str,
buf: memoryview,
find_keyword,
emit_newlines=False,
minus_is_token=False):
self.start_ix = 0
@ -54,6 +59,8 @@ class Lexer:
self.find_keyword = find_keyword
self.emit_newlines = emit_newlines
self.minus_is_token = minus_is_token
self.filename = filename
self.nested_lexer = None
def at_end_p(self):
return self.current_ix >= len(self.buf)
@ -74,7 +81,7 @@ class Lexer:
return self.buf[self.current_ix]
def pos(self):
return self.start_ix, self.line, self.col - (self.current_ix - self.start_ix)
return self.filename, self.start_ix, self.line, self.col - (self.current_ix - self.start_ix)
def identifier(self):
while not self.at_end_p() and self.peek() in identifier_characters:
@ -85,8 +92,26 @@ class Lexer:
else:
return Token(*self.pos(), TT.identifier, self.lexeme(), None)
def include(self, filename):
dirname = path.dirname(self.filename)
new_filename = path.join(dirname, filename.decode('utf-8'))
with open(new_filename, 'rb') as f:
buf = f.read()
self.nested_lexer = Lexer(new_filename,
buf,
find_keyword=self.find_keyword,
emit_newlines=self.emit_newlines,
minus_is_token=self.minus_is_token)
def lex_token(self):
while True:
if self.nested_lexer is not None:
token = self.nested_lexer.lex_token()
if token.type is TT.eof:
self.nested_lexer = None
else:
return token
self.start_ix = self.current_ix
if self.at_end_p():
@ -122,8 +147,24 @@ class Lexer:
elif self.minus_is_token and c == ord('-'):
return Token(*self.pos(), TT.minus, self.lexeme())
elif c == ord('#'):
while not self.at_end_p() and self.peek() != ord('\n'):
for c in b"include":
o = self.advance()
if o != c:
token = Token(*self.pos(), None, self.lexeme())
raise LexerError(f"unexpected character at line:{self.line} col:{self.col}, expected `#include`", token)
while self.peek() == ord(' '):
self.advance()
self.start_ix = self.current_ix
quote = self.advance()
if quote != ord('"'):
token = Token(*self.pos(), None, self.lexeme())
raise LexerError(f"unexpected character at line:{self.line} col:{self.col}, expected `\"`", token)
self.start_ix = self.current_ix
while self.peek() != ord('"'):
self.advance()
filename = self.lexeme()
assert self.advance() == ord('"')
self.include(filename)
elif c == ord(' ') or c == ord('\r') or c == ord('\t'):
pass
elif c == ord('\n'):

View File

@ -22,8 +22,8 @@ out[0].xz = VE_MAD input[0].-y-_-0-_ temp[0].x_0_ temp[0].y_0_
out[0].yw = VE_MAD input[0]._x_0 temp[0]._x_0 temp[0]._z_1
"""
def frontend_inner(buf):
lexer = Lexer(buf, find_keyword)
def frontend_inner(filename, buf):
lexer = Lexer(filename, buf, find_keyword)
tokens = list(lexer.lex_tokens())
parser = Parser(tokens)
for ins in parser.instructions():
@ -37,15 +37,15 @@ def frontend_inner(buf):
def frontend(filename, buf):
try:
yield from frontend_inner(buf)
yield from frontend_inner(filename, buf)
except ParserError as e:
print_error(input_filename, buf, e)
print_error(e)
raise
except LexerError as e:
print_error(input_filename, buf, e)
print_error(e)
raise
except ValidatorError as e:
print_error(filename, buf, e)
print_error(e)
raise
if __name__ == "__main__":