from itertools import chain from parse.line import next_line, skip_whitespace def parse_label(lines): lines, line = next_line(lines) assert line[-2:] == '::', line name = line.removesuffix('::') return lines, name string_tokens = {"text", "cont", "para", "line", "next"} def parse_string(line): line = line.strip() assert line[0] == '"' line = line[1:] for i, c in enumerate(line): if c != '\\' and line[i] == '"': return line[:i], line[i+1:] def parse_args(line): return [i.strip() for i in line.split(',')] def parse_body(lines): body = [] while lines: lines, line = next_line(lines) if line in {"text_end", "done", "prompt"}: body.append((line,)) break type, *rest = line.split(maxsplit=1) if type in string_tokens: value, = rest string_value, rest = parse_string(value) assert rest == "", rest body.append((type, string_value)) elif type == 'text_ram': value, = rest assert value[0] == "w", value body.append((type, value)) elif type == 'text_start': body.append((type,)) elif type in {'text_decimal', 'text_bcd'}: value, = rest body.append((type, parse_args(value))) else: # hack hack; some texts don't have a control word at the end # _MoveNameText if line.endswith('::'): return [line] + lines, body assert False, line return lines, body def tokenize_text(lines): lines, name = parse_label(lines) # fixme: hack if name == '_CableClubNPCLinkClosedBecauseOfInactivityText': return None lines, body = parse_body(lines) return lines, (name, body) def tokenize(lines): while lines: lines__tokens = tokenize_text(lines) if lines__tokens is None: # fixme: hack9000 return lines, tokens = lines__tokens lines = skip_whitespace(lines) yield tokens def parse(path): with open(path) as f: tokens = list(tokenize(f.read().split('\n'))) d = dict(tokens) assert len(tokens) == len(d) return d def parse_all(prefix): base_path0 = prefix / 'text' paths0 = [p for p in base_path0.iterdir() if p.is_file()] base_path1 = prefix / 'data/text' paths1 = [p for p in base_path1.iterdir() if p.is_file() and p.stem.startswith('text_')] return [parse(path) for path in chain(paths0, paths1)]