87 lines
2.5 KiB
Python
87 lines
2.5 KiB
Python
from itertools import chain
|
|
|
|
from parse.generic.line import next_line, skip_whitespace
|
|
|
|
def parse_label(lines):
|
|
lines, line = next_line(lines)
|
|
assert line[-2:] == '::', line
|
|
name = line.removesuffix('::')
|
|
return lines, name
|
|
|
|
string_tokens = {"text", "cont", "para", "line", "next"}
|
|
|
|
def parse_string(line):
|
|
line = line.strip()
|
|
assert line[0] == '"'
|
|
line = line[1:]
|
|
for i, c in enumerate(line):
|
|
if c != '\\' and line[i] == '"':
|
|
return line[:i], line[i+1:]
|
|
|
|
def parse_args(line):
|
|
return [i.strip() for i in line.split(',')]
|
|
|
|
def parse_body(lines):
|
|
body = []
|
|
while lines:
|
|
lines, line = next_line(lines)
|
|
if line in {"text_end", "done", "prompt"}:
|
|
body.append((line,))
|
|
break
|
|
type, *rest = line.split(maxsplit=1)
|
|
if type in string_tokens:
|
|
value, = rest
|
|
string_value, rest = parse_string(value)
|
|
assert rest == "", rest
|
|
body.append((type, string_value))
|
|
elif type == 'text_ram':
|
|
value, = rest
|
|
assert value[0] == "w", value
|
|
body.append((type, value))
|
|
elif type == 'text_start':
|
|
body.append((type,))
|
|
elif type in {'text_decimal', 'text_bcd'}:
|
|
value, = rest
|
|
body.append((type, parse_args(value)))
|
|
else:
|
|
# hack hack; some texts don't have a control word at the end
|
|
# _MoveNameText
|
|
if line.endswith('::'):
|
|
return [line] + lines, body
|
|
assert False, line
|
|
|
|
return lines, body
|
|
|
|
def tokenize_text(lines):
|
|
lines, name = parse_label(lines)
|
|
# fixme: hack
|
|
if name == '_CableClubNPCLinkClosedBecauseOfInactivityText':
|
|
return None
|
|
lines, body = parse_body(lines)
|
|
return lines, (name, body)
|
|
|
|
def tokenize(lines):
|
|
while lines:
|
|
lines__tokens = tokenize_text(lines)
|
|
if lines__tokens is None:
|
|
# fixme: hack9000
|
|
return
|
|
lines, tokens = lines__tokens
|
|
lines = skip_whitespace(lines)
|
|
yield tokens
|
|
|
|
def parse(path):
|
|
with open(path) as f:
|
|
tokens = list(tokenize(f.read().split('\n')))
|
|
d = dict(tokens)
|
|
assert len(tokens) == len(d)
|
|
return d
|
|
|
|
def parse_all(prefix):
|
|
base_path0 = prefix / 'text'
|
|
paths0 = [p for p in base_path0.iterdir() if p.is_file()]
|
|
base_path1 = prefix / 'data/text'
|
|
paths1 = [p for p in base_path1.iterdir()
|
|
if p.is_file() and p.stem.startswith('text_')]
|
|
return [parse(path) for path in chain(paths0, paths1)]
|