pokemon/tools/parse/text.py
Zack Buhman a56def6074 add font and text data
The original font is a bit of a mess, and includes many duplicate
characters.

I decided to reorganize the characters in a new set of glyphs, in
derivced/font.png.

This also includes very basic parsing for text data.
2023-07-30 03:09:20 +00:00

76 lines
2.0 KiB
Python

from parse.line import next_line, skip_whitespace
def parse_label(lines):
lines, line = next_line(lines)
assert line[-2:] == '::', line
name = line.removesuffix('::')
return lines, name
string_tokens = {"text", "cont", "para", "line"}
def parse_string(line):
line = line.strip()
assert line[0] == '"'
line = line[1:]
for i, c in enumerate(line):
if c != '\\' and line[i] == '"':
return line[:i], line[i+1:]
def parse_args(line):
return [i.strip() for i in line.split(',')]
def parse_body(lines):
body = []
while lines:
lines, line = next_line(lines)
if line in {"text_end", "done", "prompt"}:
body.append((line,))
break
type, *rest = line.split(maxsplit=1)
if type in string_tokens:
value, = rest
string_value, rest = parse_string(value)
assert rest == "", rest
body.append((type, string_value))
elif type == 'text_ram':
value, = rest
assert value[0] == "w", value
body.append((type, value))
elif type == 'text_start':
body.append((type,))
elif type in {'text_decimal', 'text_bcd'}:
value, = rest
body.append((type, parse_args(value)))
else:
assert False, line
return lines, body
def tokenize_text(lines):
lines, name = parse_label(lines)
lines, body = parse_body(lines)
return lines, (name, body)
def tokenize(lines):
while lines:
lines, tokens = tokenize_text(lines)
lines = skip_whitespace(lines)
yield tokens
def parse(path):
with open(path) as f:
tokens = list(tokenize(f.read().split('\n')))
d = dict(tokens)
assert len(tokens) == len(d)
return d
def parse_all(prefix):
base_path = prefix / 'text'
paths = [p for p in base_path.iterdir() if p.is_file()]
return [parse(path) for path in paths]
import sys
from pprint import pprint
from pathlib import Path
pprint(parse_all(Path(sys.argv[1])))