assembler/lexer: add support for #include directive

This commit is contained in:
Zack Buhman 2025-11-11 15:06:11 -06:00
parent 90b486e744
commit 9e281cba58
4 changed files with 60 additions and 17 deletions

View File

@ -1,8 +1,10 @@
import sys import sys
def print_error(filename, buf, e): def print_error(e):
assert len(e.args) == 2, e assert len(e.args) == 2, e
message, token = e.args message, token = e.args
with open(token.filename, 'rb') as f:
buf = f.read()
lines = buf.splitlines() lines = buf.splitlines()
line = lines[token.line - 1] line = lines[token.line - 1]
@ -11,7 +13,7 @@ def print_error(filename, buf, e):
col_pointer = '^' * len(token.lexeme) col_pointer = '^' * len(token.lexeme)
RED = "\033[0;31m" RED = "\033[0;31m"
DEFAULT = "\033[0;0m" DEFAULT = "\033[0;0m"
print(f'File: "{filename}", line {token.line}, column {token.col}\n', file=sys.stderr) print(f'File: "{token.filename}", line {token.line}, column {token.col}\n', file=sys.stderr)
sys.stderr.write(' ') sys.stderr.write(' ')
wrote_default = False wrote_default = False
for i, c in enumerate(line.decode('utf-8')): for i, c in enumerate(line.decode('utf-8')):

View File

@ -10,8 +10,8 @@ from assembler.fs.validator import validate_instruction
from assembler.fs.emitter import emit_instruction from assembler.fs.emitter import emit_instruction
from assembler.error import print_error from assembler.error import print_error
def frontend_inner(buf): def frontend_inner(filename, buf):
lexer = Lexer(buf, find_keyword, emit_newlines=False, minus_is_token=True) lexer = Lexer(filename, buf, find_keyword, emit_newlines=False, minus_is_token=True)
tokens = list(lexer.lex_tokens()) tokens = list(lexer.lex_tokens())
parser = Parser(tokens) parser = Parser(tokens)
for ins_ast in parser.instructions(): for ins_ast in parser.instructions():
@ -22,15 +22,15 @@ def frontend_inner(buf):
def frontend(filename, buf): def frontend(filename, buf):
try: try:
yield from frontend_inner(buf) yield from frontend_inner(filename, buf)
except LexerError as e: except LexerError as e:
print_error(filename, buf, e) print_error(e)
raise raise
except ParserError as e: except ParserError as e:
print_error(filename, buf, e) print_error(e)
raise raise
except ValidatorError as e: except ValidatorError as e:
print_error(filename, buf, e) print_error(e)
raise raise
if __name__ == "__main__": if __name__ == "__main__":

View File

@ -2,6 +2,7 @@ from dataclasses import dataclass
from enum import Enum, auto from enum import Enum, auto
from itertools import chain from itertools import chain
from typing import Union, Any from typing import Union, Any
from os import path
DEBUG = True DEBUG = True
@ -25,6 +26,7 @@ class TT(Enum):
@dataclass @dataclass
class Token: class Token:
filename: str
start_ix: int start_ix: int
line: int line: int
col: int col: int
@ -43,7 +45,10 @@ class LexerError(Exception):
pass pass
class Lexer: class Lexer:
def __init__(self, buf: memoryview, find_keyword, def __init__(self,
filename: str,
buf: memoryview,
find_keyword,
emit_newlines=False, emit_newlines=False,
minus_is_token=False): minus_is_token=False):
self.start_ix = 0 self.start_ix = 0
@ -54,6 +59,8 @@ class Lexer:
self.find_keyword = find_keyword self.find_keyword = find_keyword
self.emit_newlines = emit_newlines self.emit_newlines = emit_newlines
self.minus_is_token = minus_is_token self.minus_is_token = minus_is_token
self.filename = filename
self.nested_lexer = None
def at_end_p(self): def at_end_p(self):
return self.current_ix >= len(self.buf) return self.current_ix >= len(self.buf)
@ -74,7 +81,7 @@ class Lexer:
return self.buf[self.current_ix] return self.buf[self.current_ix]
def pos(self): def pos(self):
return self.start_ix, self.line, self.col - (self.current_ix - self.start_ix) return self.filename, self.start_ix, self.line, self.col - (self.current_ix - self.start_ix)
def identifier(self): def identifier(self):
while not self.at_end_p() and self.peek() in identifier_characters: while not self.at_end_p() and self.peek() in identifier_characters:
@ -85,8 +92,26 @@ class Lexer:
else: else:
return Token(*self.pos(), TT.identifier, self.lexeme(), None) return Token(*self.pos(), TT.identifier, self.lexeme(), None)
def include(self, filename):
dirname = path.dirname(self.filename)
new_filename = path.join(dirname, filename.decode('utf-8'))
with open(new_filename, 'rb') as f:
buf = f.read()
self.nested_lexer = Lexer(new_filename,
buf,
find_keyword=self.find_keyword,
emit_newlines=self.emit_newlines,
minus_is_token=self.minus_is_token)
def lex_token(self): def lex_token(self):
while True: while True:
if self.nested_lexer is not None:
token = self.nested_lexer.lex_token()
if token.type is TT.eof:
self.nested_lexer = None
else:
return token
self.start_ix = self.current_ix self.start_ix = self.current_ix
if self.at_end_p(): if self.at_end_p():
@ -122,8 +147,24 @@ class Lexer:
elif self.minus_is_token and c == ord('-'): elif self.minus_is_token and c == ord('-'):
return Token(*self.pos(), TT.minus, self.lexeme()) return Token(*self.pos(), TT.minus, self.lexeme())
elif c == ord('#'): elif c == ord('#'):
while not self.at_end_p() and self.peek() != ord('\n'): for c in b"include":
o = self.advance()
if o != c:
token = Token(*self.pos(), None, self.lexeme())
raise LexerError(f"unexpected character at line:{self.line} col:{self.col}, expected `#include`", token)
while self.peek() == ord(' '):
self.advance() self.advance()
self.start_ix = self.current_ix
quote = self.advance()
if quote != ord('"'):
token = Token(*self.pos(), None, self.lexeme())
raise LexerError(f"unexpected character at line:{self.line} col:{self.col}, expected `\"`", token)
self.start_ix = self.current_ix
while self.peek() != ord('"'):
self.advance()
filename = self.lexeme()
assert self.advance() == ord('"')
self.include(filename)
elif c == ord(' ') or c == ord('\r') or c == ord('\t'): elif c == ord(' ') or c == ord('\r') or c == ord('\t'):
pass pass
elif c == ord('\n'): elif c == ord('\n'):

View File

@ -22,8 +22,8 @@ out[0].xz = VE_MAD input[0].-y-_-0-_ temp[0].x_0_ temp[0].y_0_
out[0].yw = VE_MAD input[0]._x_0 temp[0]._x_0 temp[0]._z_1 out[0].yw = VE_MAD input[0]._x_0 temp[0]._x_0 temp[0]._z_1
""" """
def frontend_inner(buf): def frontend_inner(filename, buf):
lexer = Lexer(buf, find_keyword) lexer = Lexer(filename, buf, find_keyword)
tokens = list(lexer.lex_tokens()) tokens = list(lexer.lex_tokens())
parser = Parser(tokens) parser = Parser(tokens)
for ins in parser.instructions(): for ins in parser.instructions():
@ -37,15 +37,15 @@ def frontend_inner(buf):
def frontend(filename, buf): def frontend(filename, buf):
try: try:
yield from frontend_inner(buf) yield from frontend_inner(filename, buf)
except ParserError as e: except ParserError as e:
print_error(input_filename, buf, e) print_error(e)
raise raise
except LexerError as e: except LexerError as e:
print_error(input_filename, buf, e) print_error(e)
raise raise
except ValidatorError as e: except ValidatorError as e:
print_error(filename, buf, e) print_error(e)
raise raise
if __name__ == "__main__": if __name__ == "__main__":