pokemon/tools/parse/text.py

87 lines
2.5 KiB
Python

from itertools import chain
from parse.generic.line import next_line, skip_whitespace
def parse_label(lines):
lines, line = next_line(lines)
assert line[-2:] == '::', line
name = line.removesuffix('::')
return lines, name
string_tokens = {"text", "cont", "para", "line", "next"}
def parse_string(line):
line = line.strip()
assert line[0] == '"'
line = line[1:]
for i, c in enumerate(line):
if c != '\\' and line[i] == '"':
return line[:i], line[i+1:]
def parse_args(line):
return [i.strip() for i in line.split(',')]
def parse_body(lines):
body = []
while lines:
lines, line = next_line(lines)
if line in {"text_end", "done", "prompt"}:
body.append((line,))
break
type, *rest = line.split(maxsplit=1)
if type in string_tokens:
value, = rest
string_value, rest = parse_string(value)
assert rest == "", rest
body.append((type, string_value))
elif type == 'text_ram':
value, = rest
assert value[0] == "w", value
body.append((type, value))
elif type == 'text_start':
body.append((type,))
elif type in {'text_decimal', 'text_bcd'}:
value, = rest
body.append((type, parse_args(value)))
else:
# hack hack; some texts don't have a control word at the end
# _MoveNameText
if line.endswith('::'):
return [line] + lines, body
assert False, line
return lines, body
def tokenize_text(lines):
lines, name = parse_label(lines)
# fixme: hack
if name == '_CableClubNPCLinkClosedBecauseOfInactivityText':
return None
lines, body = parse_body(lines)
return lines, (name, body)
def tokenize(lines):
while lines:
lines__tokens = tokenize_text(lines)
if lines__tokens is None:
# fixme: hack9000
return
lines, tokens = lines__tokens
lines = skip_whitespace(lines)
yield tokens
def parse(path):
with open(path) as f:
tokens = list(tokenize(f.read().split('\n')))
d = dict(tokens)
assert len(tokens) == len(d)
return d
def parse_all(prefix):
base_path0 = prefix / 'text'
paths0 = [p for p in base_path0.iterdir() if p.is_file()]
base_path1 = prefix / 'data/text'
paths1 = [p for p in base_path1.iterdir()
if p.is_file() and p.stem.startswith('text_')]
return [parse(path) for path in chain(paths0, paths1)]