import os import os.path from lexer import Lexer, IntegerConstant, Punctuator, Identifier def is_instruction_descriptor(s): return all(c in {'0', '1', 'n', 'm', 'i', 'd'} for c in s) def parse_file(path): with open(path) as f: buf = f.read() lines = buf.split('\n', maxsplit=2) assert len(lines[0]) == 16 and is_instruction_descriptor(lines[0]), lines[0] if lines[1].startswith('Available only when'): buf = lines[2] else: if len(lines) >= 3: buf = '\n'.join([lines[1], lines[2]]) else: buf = lines[1] lexer = Lexer(buf) while True: try: token = lexer.next_token() except IndexError: break yield token files = os.listdir('sh4') for filename in files: if filename.startswith('F'): continue path = os.path.join('sh4', filename) for token in parse_file(path): if type(token) is Identifier: print(token.token)