pokemon/tools/parse/text.py

from parse.line import next_line, skip_whitespace

def parse_label(lines):
    lines, line = next_line(lines)
    assert line[-2:] == '::', line
    name = line.removesuffix('::')
    return lines, name

string_tokens = {"text", "cont", "para", "line"}

def parse_string(line):
    line = line.strip()
    assert line[0] == '"'
    line = line[1:]
    for i, c in enumerate(line):
        if c != '\\' and line[i] == '"':
            return line[:i], line[i+1:]

def parse_args(line):
    return [i.strip() for i in line.split(',')]

def parse_body(lines):
    body = []
    while lines:
        lines, line = next_line(lines)
        if line in {"text_end", "done", "prompt"}:
            body.append((line,))
            break
        type, *rest = line.split(maxsplit=1)
        if type in string_tokens:
            value, = rest
            string_value, rest = parse_string(value)
            assert rest == "", rest
            body.append((type, string_value))
        elif type == 'text_ram':
            value, = rest
            assert value[0] == "w", value
            body.append((type, value))
        elif type == 'text_start':
            body.append((type,))
        elif type in {'text_decimal', 'text_bcd'}:
            value, = rest
            body.append((type, parse_args(value)))
        else:
            assert False, line

    return lines, body

def tokenize_text(lines):
    lines, name = parse_label(lines)
    lines, body = parse_body(lines)
    return lines, (name, body)

def tokenize(lines):
    while lines:
        lines, tokens = tokenize_text(lines)
        lines = skip_whitespace(lines)
        yield tokens

def parse(path):
    with open(path) as f:
        tokens = list(tokenize(f.read().split('\n')))
        d = dict(tokens)
        assert len(tokens) == len(d)
        return d

def parse_all(prefix):
    base_path = prefix / 'text'
    paths = [p for p in base_path.iterdir() if p.is_file()]
    return [parse(path) for path in paths]

import sys
from pprint import pprint
from pathlib import Path
pprint(parse_all(Path(sys.argv[1])))