pokemon/tools/parse/text.py

from itertools import chain

from parse.generic.line import next_line, skip_whitespace

def parse_label(lines):
    lines, line = next_line(lines)
    assert line[-2:] == '::', line
    name = line.removesuffix('::')
    return lines, name

string_tokens = {"text", "cont", "para", "line", "next"}

def parse_string(line):
    line = line.strip()
    assert line[0] == '"'
    line = line[1:]
    for i, c in enumerate(line):
        if c != '\\' and line[i] == '"':
            return line[:i], line[i+1:]

def parse_args(line):
    return [i.strip() for i in line.split(',')]

def parse_body(lines):
    body = []
    while lines:
        lines, line = next_line(lines)
        if line in {"text_end", "done", "prompt"}:
            body.append((line,))
            break
        type, *rest = line.split(maxsplit=1)
        if type in string_tokens:
            value, = rest
            string_value, rest = parse_string(value)
            assert rest == "", rest
            body.append((type, string_value))
        elif type == 'text_ram':
            value, = rest
            assert value[0] == "w", value
            body.append((type, value))
        elif type == 'text_start':
            body.append((type,))
        elif type in {'text_decimal', 'text_bcd'}:
            value, = rest
            body.append((type, parse_args(value)))
        else:
            # hack hack; some texts don't have a control word at the end
            # _MoveNameText
            if line.endswith('::'):
                return [line] + lines, body
            assert False, line

    return lines, body

def tokenize_text(lines):
    lines, name = parse_label(lines)
    # fixme: hack
    if name == '_CableClubNPCLinkClosedBecauseOfInactivityText':
        return None
    lines, body = parse_body(lines)
    return lines, (name, body)

def tokenize(lines):
    while lines:
        lines__tokens = tokenize_text(lines)
        if lines__tokens is None:
            # fixme: hack9000
            return
        lines, tokens = lines__tokens
        lines = skip_whitespace(lines)
        yield tokens

def parse(path):
    with open(path) as f:
        tokens = list(tokenize(f.read().split('\n')))
        d = dict(tokens)
        assert len(tokens) == len(d)
        return d

def parse_all(prefix):
    base_path0 = prefix / 'text'
    paths0 = [p for p in base_path0.iterdir() if p.is_file()]
    base_path1 = prefix / 'data/text'
    paths1 = [p for p in base_path1.iterdir()
              if p.is_file() and p.stem.startswith('text_')]
    return [parse(path) for path in chain(paths0, paths1)]