sh-dis/lexer.py
Zack Buhman 8a300ba4c6 initial SH4 emulator implementation in C
This currently only implements the SH2 instructions.
2024-04-22 20:53:36 +08:00

172 lines
4.2 KiB
Python

from dataclasses import dataclass
"""
token:
keyword
identifier
constant
punctuator
"""
def is_nondigit(c):
return c in {
"_",
"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m",
"n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z",
"A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M",
"N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z",
"",
}
def is_digit(c):
return c in {
"0", "1", "2", "3", "4", "5", "6", "7", "8", "9",
}
def is_hexadecimal_digit(c):
return c in {
"0", "1", "2", "3", "4", "5", "6", "7", "8", "9",
"a", "b", "c", "d", "e", "f",
"A", "B", "C", "D", "E", "F",
}
def is_punctuator(c):
return c in {
"[", "]", "(", ")", "{", "}", ".",
"+", "-", "~", "!",
"<<", ">>",
"<", ">",
"", "",
"", "=",
"", "", "",
"×", "/",
"|", ";", ",",
"",
}
@dataclass
class Identifier:
line: int
token: str
@dataclass
class IntegerConstant:
line: int
token: str
value: int
@dataclass
class Punctuator:
line: int
token: str
class Lexer:
def __init__(self, buf):
self.buf = buf
self.start = 0
self.end = 0
self.line = 0
def peek(self):
return self.buf[self.end]
def match(self, c):
if self.buf[self.end] == c:
self.end += 1
return True
else:
return False
def slice(self, offset=0):
return self.buf[self.start:self.end+offset]
def advance(self):
c = self.buf[self.end]
self.end += 1
return c
def advance_whitespace(self):
if self.match('\n'):
self.line += 1
return True
elif self.match('\n'):
return True
elif self.match('\t'):
return True
elif self.match(' '):
return True
return False
def identifier(self):
while True:
c = self.peek()
if is_digit(c) or is_nondigit(c):
self.advance()
else:
return Identifier(self.line, self.slice())
def hexadecimal_constant(self):
n = 0
while True:
c = self.peek()
if is_hexadecimal_digit(c):
self.advance()
n *= 16
i = ord(c)
if i >= ord('0') and i <= ord('9'):
n += i - ord('0')
elif i >= ord('a') and i <= ord('f'):
n += 10 + (i - ord('a'))
elif i >= ord('A') and i <= ord('F'):
n += 10 + (i - ord('A'))
else:
assert False
else:
return IntegerConstant(self.line, self.slice(), n)
def decimal_constant(self):
n = 0
while True:
c = self.peek()
if is_digit(c):
self.advance()
n *= 10
i = ord(c)
if i >= ord('0') and i <= ord('9'):
n += i - ord('0')
else:
assert False
else:
return IntegerConstant(self.line, self.slice(), n)
def punctuator(self):
while True:
if self.end < len(self.buf) and is_punctuator(self.slice(1)):
self.advance()
else:
assert is_punctuator(self.slice())
return Punctuator(self.line, self.slice())
def integer_constant(self):
if self.buf[self.start] == '0' and (self.match('x') or self.match('X')):
return self.hexadecimal_constant()
else:
self.end -= 1
return self.decimal_constant()
def next_token(self):
while self.advance_whitespace():
pass
self.start = self.end
c = self.advance()
if is_nondigit(c):
return self.identifier()
elif is_digit(c):
return self.integer_constant()
elif is_punctuator(c):
return self.punctuator()
else:
raise ValueError(c)