import builtins from dataclasses import dataclass from parse.generic import tokenize from parse.generic import number from parse.generic import string from parse.generic import label_data def _label(type, args): assert type is label_data.Label, (type, args) assert builtins.type(args) == str return args def _string(type, args): assert type is label_data.Data, (type, args) # hmm, this is a bug in label_data.event # ...too lazy to fix arg = ','.join(args) return string.parse(arg) def type_sequence(schema, type_args): assert len(schema) == len(type_args) return ( schema[i](*type_args[i]) for i in range(len(schema)) ) @dataclass class DexText: label: str content: list[tuple[str, str]] def parse_text(type_args, ix): schema = [ _label, _string, _string, _string, _string, _string, _string, ] label, \ text1, text2, text3, \ text4, text5, text6 \ = type_sequence(schema, type_args[ix:ix+7]) dex_text = DexText( label, [ ('text', text1), ('next', text2), ('next', text3), ('page', text4), ('next', text5), ('next', text6), ] ) return ix+7, dex_text def parse_texts(tokens): data_tokens = {'text', 'next', 'page'} type_args = label_data.event(tokens, data_tokens=data_tokens) ix = 0 while ix < len(type_args): ix, dex_text = parse_text(type_args, ix) yield dex_text def parse(prefix): path = prefix / "data/pokemon/dex_text.asm" with open(path) as f: return list(parse_texts(tokenize.lines(f.read().split('\n')))) from pathlib import Path from pprint import pprint pprint(parse(Path("pokered")))