From 4e20ade92283fa63eb247420ca8fe3317dc658bd Mon Sep 17 00:00:00 2001 From: Zack Buhman Date: Mon, 17 Feb 2025 23:09:47 -0600 Subject: [PATCH] add x parser --- x/lex.py | 118 ++++++++++++++++ x/parse.py | 370 +++++++++++++++++++++++++++++++++++++++++++++++++ x/templates.py | 124 +++++++++++++++++ x/x.h | 109 +++++++++++++++ 4 files changed, 721 insertions(+) create mode 100644 x/lex.py create mode 100644 x/parse.py create mode 100644 x/templates.py create mode 100644 x/x.h diff --git a/x/lex.py b/x/lex.py new file mode 100644 index 0000000..90fcfed --- /dev/null +++ b/x/lex.py @@ -0,0 +1,118 @@ +import string + +def parse_magic(mem, offset): + magic = b"xof 0302txt 0064" + window = bytes(mem[offset:offset+len(magic)]) + assert window == magic, window + return offset + len(magic) + +string_digits = set(ord(i) for i in string.digits) + +def parse_number(mem, offset): + mem = memoryview(mem) + whole = [] + fraction = [] + sign = 1 + if mem[offset] == ord('-'): + sign = -1 + offset += 1 + # whole + while True: + c = mem[offset] + if c in string_digits: + whole.append(c) + offset += 1 + elif c == ord('.'): + assert whole != [], chr(c) + offset += 1 + break + else: + assert whole != [], chr(c) + number = sign * int(bytes(whole)) + return offset, number + # fraction + while True: + c = mem[offset] + if c in string_digits: + fraction.append(c) + offset += 1 + else: + assert fraction != [], chr(c) + w = int(bytes(whole)) + f = int(bytes(fraction)) / (10 ** len(fraction)) + number = sign * (w + f) + return offset, number + +assert parse_number(b"1234;", 0)[1] == 1234 +assert abs(parse_number(b"1234.5678;", 0)[1] - 1234.5678) < 0.0001 +assert parse_number(b"-1234;", 0)[1] == -1234 +assert abs(parse_number(b"-1234.5678;", 0)[1] - -1234.5678) < 0.0001 + +whitespace = set([ord(' '), ord('\n')]) + +TOKEN_SEMICOLON = type("TOKEN_SEMICOLON", (), {}) +TOKEN_COMMA = type("TOKEN_COMMA", (), {}) +TOKEN_LBRACKET = type("TOKEN_LBRACKET", (), {}) +TOKEN_RBRACKET = type("TOKEN_RBRACKET", (), {}) + +identifier_start = set(map(ord, string.ascii_letters + "_")) +identifier = identifier_start | string_digits + +def parse_identifier(mem, offset): + l = [] + while True: + c = mem[offset] + if c in identifier: + l.append(c) + offset += 1 + else: + assert l != [] + return offset, bytes(l) + +def parse_string(mem, offset): + assert mem[offset] == ord('"') + offset += 1 + start = offset + while mem[offset] != ord('"'): + assert mem[offset] != ord("\n") + offset += 1 + assert mem[offset] == ord('"') + s = bytes(mem[start:offset]).decode("utf-8") + offset += 1 + return offset, s + +def next_token(mem, offset): + while True: + if offset >= len(mem): + return offset, None + c = mem[offset] + if c in whitespace: + offset += 1 + else: + break + + if c in string_digits or c == ord('-'): + return parse_number(mem, offset) + elif c == ord(';'): + return offset + 1, TOKEN_SEMICOLON + elif c == ord(','): + return offset + 1, TOKEN_COMMA + elif c == ord('{'): + return offset + 1, TOKEN_LBRACKET + elif c == ord('}'): + return offset + 1, TOKEN_RBRACKET + elif c == ord('"'): + return parse_string(mem, offset) + elif c in identifier_start: + return parse_identifier(mem, offset) + else: + assert False, chr(c) + +def lex_all(mem, offset): + offset = parse_magic(mem, offset) + while True: + offset, token = next_token(mem, offset) + if token is None: + return + else: + yield token diff --git a/x/parse.py b/x/parse.py new file mode 100644 index 0000000..6eeb5de --- /dev/null +++ b/x/parse.py @@ -0,0 +1,370 @@ +from pprint import pprint +import sys + +import lex +import templates + +with open(sys.argv[1], "rb") as f: + buf = f.read() +mem = memoryview(buf) + +class TokenReader: + def __init__(self, mem): + self.tokens = list(lex.lex_all(mem, 0)) + self.ix = 0 + + def consume(self, o): + assert self.tokens[self.ix] == o, (self.tokens[self.ix], o) + self.ix += 1 + + def consume_type(self, t): + assert type(self.tokens[self.ix]) == t, (t, + self.tokens[self.ix], + type(self.tokens[self.ix])) + self.ix += 1 + return self.tokens[self.ix-1] + + def match(self, o): + if self.tokens[self.ix] == o: + self.ix += 1 + return self.tokens[self.ix-1] + else: + return False + + def match_type(self, t): + if type(self.tokens[self.ix]) == t: + self.ix += 1 + return self.tokens[self.ix-1] + else: + return False + + def peek(self): + return self.tokens[self.ix] + + def eof(self): + return self.ix >= len(self.tokens) + +def parse_int_raw(r): + i = r.consume_type(int) + return i + +def parse_float_raw(r): + i = r.consume_type(float) + return i + +def parse_int(r): + i = r.consume_type(int) + r.consume(lex.TOKEN_SEMICOLON) + return i + +def parse_float(r): + f = r.consume_type(float) + r.consume(lex.TOKEN_SEMICOLON) + return f + +def parse_vector(r): + x = parse_float(r) + y = parse_float(r) + z = parse_float(r) + return templates.Vector(x, y, z) + +def parse_string(r): + s = r.consume_type(str) + r.consume(lex.TOKEN_SEMICOLON) + return s + +def parse_color_rgba(r): + red = r.consume_type(float) + r.consume(lex.TOKEN_COMMA) + green = r.consume_type(float) + r.consume(lex.TOKEN_COMMA) + blue = r.consume_type(float) + r.consume(lex.TOKEN_COMMA) + alpha = r.consume_type(float) + r.consume(lex.TOKEN_SEMICOLON) + r.consume(lex.TOKEN_SEMICOLON) + return templates.ColorRGBA( + red, green, blue, alpha + ) + +def parse_color_rgb(r): + red = r.consume_type(float) + r.consume(lex.TOKEN_COMMA) + green = r.consume_type(float) + r.consume(lex.TOKEN_COMMA) + blue = r.consume_type(float) + r.consume(lex.TOKEN_SEMICOLON) + r.consume(lex.TOKEN_SEMICOLON) + return templates.ColorRGB( + red, green, blue + ) + +def parse_list(r, n, parse, *, delim=lex.TOKEN_COMMA): + l = [] + for _ in range(n - 1): + l.append(parse(r)) + if delim is not None: + r.consume(delim) + l.append(parse(r)) + r.consume(lex.TOKEN_SEMICOLON) + return l + +def parse_matrix4x4(r): + v = [] + for i in range(15): + v.append(r.consume_type(float)) + r.consume(lex.TOKEN_COMMA) + v.append(r.consume_type(float)) + r.consume(lex.TOKEN_SEMICOLON) + r.consume(lex.TOKEN_SEMICOLON) + return templates.Matrix4x4( + v + ) + +def parse_header(r): + r.consume(b"Header") + r.consume(lex.TOKEN_LBRACKET) + major = parse_int(r) + minor = parse_int(r) + flags = parse_int(r) + r.consume(lex.TOKEN_RBRACKET) + return templates.Header( + major, minor, flags + ) + +def parse_material(r): + r.consume(b"Material") + name = r.consume_type(bytes) + r.consume(lex.TOKEN_LBRACKET) + faceColor = parse_color_rgba(r) + power = parse_float(r) + specularColor = parse_color_rgb(r) + emissiveColor = parse_color_rgb(r) + + objects = [] + while not r.match(lex.TOKEN_RBRACKET): + objects.append(parse_one_ref(r)) + + return name, templates.Material( + faceColor, + power, + specularColor, + emissiveColor, + objects + ) + +def parse_texture_filename(r): + r.consume(b"TextureFilename") + r.consume(lex.TOKEN_LBRACKET) + filename = parse_string(r) + r.consume(lex.TOKEN_RBRACKET) + return templates.TextureFilename( + filename + ) + +def parse_frame(r): + r.consume(b"Frame") + name = r.consume_type(bytes) + r.consume(lex.TOKEN_LBRACKET) + objects = [] + while not r.match(lex.TOKEN_RBRACKET): + objects.append(parse_one_ref(r)) + return name, templates.Frame( + objects + ) + +def parse_frame_transform_matrix(r): + r.consume(b"FrameTransformMatrix") + r.consume(lex.TOKEN_LBRACKET) + frameMatrix = parse_matrix4x4(r) + r.consume(lex.TOKEN_RBRACKET) + return templates.FrameTransformMatrix( + frameMatrix + ) + +def parse_mesh_face(r): + nFaceVertexIndices = parse_int(r) + faceVertexIndices = parse_list(r, nFaceVertexIndices, parse_int_raw) + + return templates.MeshFace( + nFaceVertexIndices, + faceVertexIndices, + ) + +def parse_mesh(r): + r.consume(b"Mesh") + name = r.consume_type(bytes) + r.consume(lex.TOKEN_LBRACKET) + nVertices = parse_int(r) + vertices = parse_list(r, nVertices, parse_vector) + nFaces = parse_int(r) + faces = parse_list(r, nFaces, parse_mesh_face) + + objects = [] + while not r.match(lex.TOKEN_RBRACKET): + objects.append(parse_one_ref(r)) + + return name, templates.Mesh( + nVertices, + vertices, + nFaces, + faces, + objects + ) + +def parse_mesh_material_list(r): + r.consume(b"MeshMaterialList") + r.consume(lex.TOKEN_LBRACKET) + + nMaterials = parse_int(r) + nFaceIndices = parse_int(r) + faceIndices = parse_list(r, nFaceIndices, parse_int, delim=None) + + objects = [] + while not r.match(lex.TOKEN_RBRACKET): + objects.append(parse_one_ref(r, "Material")) + + return templates.MeshMaterialList( + nMaterials, + nFaceIndices, + faceIndices, + objects + ) + +def parse_mesh_normals(r): + r.consume(b"MeshNormals") + r.consume(lex.TOKEN_LBRACKET) + nNormals = parse_int(r) + normals = parse_list(r, nNormals, parse_vector) + nFaceNormals = parse_int(r) + faceNormals = parse_list(r, nFaceNormals, parse_mesh_face) + r.consume(lex.TOKEN_RBRACKET) + return templates.MeshNormals( + nNormals, + normals, + nFaceNormals, + faceNormals + ) + +def parse_coords2d(r): + u = parse_float(r) + v = parse_float(r) + return templates.Coords2D(u, v) + +def parse_mesh_texture_coords(r): + r.consume(b"MeshTextureCoords") + r.consume(lex.TOKEN_LBRACKET) + nTextureCoords = parse_int(r) + textureCoords = parse_list(r, nTextureCoords, parse_coords2d) + r.consume(lex.TOKEN_RBRACKET) + return templates.MeshTextureCoords( + nTextureCoords, + textureCoords + ) + +def parse_animation_set(r): + r.consume(b"AnimationSet") + name = r.consume_type(bytes) + r.consume(lex.TOKEN_LBRACKET) + objects = [] + while not r.match(lex.TOKEN_RBRACKET): + objects.append(parse_one_ref(r, b"Animation")) + + return name, templates.AnimationSet( + objects + ) + +def parse_animation(r): + r.consume(b"Animation") + name = r.consume_type(bytes) + r.consume(lex.TOKEN_LBRACKET) + objects = [] + while not r.match(lex.TOKEN_RBRACKET): + objects.append(parse_one_ref(r, {b"AnimationKey", b"AnimationOptions"})) + + return name, templates.Animation( + objects + ) + +def parse_float_keys(r): + nValues = parse_int(r) + values = parse_list(r, nValues, parse_float_raw) + r.consume(lex.TOKEN_SEMICOLON) # FIXME: is this correct for nKeys>1? + return templates.TimedFloatKeys( + nValues, + values, + ) + +def parse_timed_float_keys(r): + time = parse_int(r) + tfkeys = parse_float_keys(r) + return templates.TimedFloatKeys( + time, + tfkeys, + ) + +def parse_animation_key(r): + r.consume(b"AnimationKey") + r.consume(lex.TOKEN_LBRACKET) + keyType = parse_int(r) + nKeys = parse_int(r) + keys = parse_list(r, nKeys, parse_timed_float_keys) + r.consume(lex.TOKEN_RBRACKET) + return templates.AnimationKey( + keyType, + nKeys, + keys, + ) + +def parse_one(r, peek_token=None): + token = r.peek() + if peek_token != None: + if type(peek_token) is set: + assert token in peek_token, (token, peek_token) + else: + assert token == peek_token, (token, peek_token) + if token == b"Header": + return parse_header(r) + elif token == b"Material": + return parse_material(r) + elif token == b"TextureFilename": + return parse_texture_filename(r) + elif token == b"Frame": + return parse_frame(r) + elif token == b"FrameTransformMatrix": + return parse_frame_transform_matrix(r) + elif token == b"Mesh": + return parse_mesh(r) + elif token == b"MeshMaterialList": + return parse_mesh_material_list(r) + elif token == b"MeshNormals": + return parse_mesh_normals(r) + elif token == b"MeshTextureCoords": + return parse_mesh_texture_coords(r) + elif token == b"AnimationSet": + return parse_animation_set(r) + elif token == b"Animation": + return parse_animation(r) + elif token == b"AnimationKey": + return parse_animation_key(r) + else: + assert False, token + +def parse_one_ref(r, peek_token=None): + if r.match(lex.TOKEN_LBRACKET): + name = r.consume_type(bytes) + r.consume(lex.TOKEN_RBRACKET) + return templates.Reference( + name + ) + else: + token = r.peek() + return parse_one(r, peek_token) + +def parse_all(r): + while not r.eof(): + yield parse_one_ref(r) + +r = TokenReader(mem) +for i in parse_all(r): + pprint(i) diff --git a/x/templates.py b/x/templates.py new file mode 100644 index 0000000..1bbc97e --- /dev/null +++ b/x/templates.py @@ -0,0 +1,124 @@ +from typing import Any, Union +from dataclasses import dataclass + +@dataclass +class Header: + major: int + minor: int + flags: int + +@dataclass +class ColorRGBA: + r: float + g: float + b: float + a: float + +@dataclass +class ColorRGB: + r: float + g: float + b: float + +@dataclass +class Matrix4x4: + v: list[float] + +@dataclass +class Material: + faceColor: ColorRGBA + power: float + specularColor: ColorRGB + emissiveColor: ColorRGB + + objects: list[Any] + +@dataclass +class Frame: + objects: list[Any] + +@dataclass +class FrameTransformMatrix: + frameMatrix: Matrix4x4 + +@dataclass +class Vector: + x: float + y: float + z: float + +@dataclass +class MeshFace: + nFaceVertexIndices: int + faceVertexIndices: list[int] + +@dataclass +class Mesh: + nVertices: int + vertices: list[Vector] + nFaces: int + faces: list[MeshFace] + + objects: list[Any] + +@dataclass +class MeshMaterialList: + nMaterials: int + nFaceIndices: int + faceIndices: list[int] + + objects: list[Material] + +@dataclass +class MeshNormals: + nNormals: int + normals: list[Vector] + nFaceNormals: int + faceNormals: list[MeshFace] + +@dataclass +class Coords2D: + u: float + v: float + +@dataclass +class MeshTextureCoords: + nTextureCoords: int + textureCoords: list[Coords2D] + +@dataclass +class TextureFilename: + filename: str + +@dataclass +class Reference: + name: bytes + +@dataclass +class FloatKeys: + nValues: int + values: list[float] + +@dataclass +class TimedFloatKeys: + time: int + tfkeys: FloatKeys + +@dataclass +class AnimationKey: + keyType: int + nKeys: int + keys: list[TimedFloatKeys] + +@dataclass +class AnimationOptions: + openClosed: int + positionQuality: int + +@dataclass +class Animation: + objects: list[Union[Reference, AnimationKey, AnimationOptions]] + +@dataclass +class AnimationSet: + objects: list[Animation] diff --git a/x/x.h b/x/x.h new file mode 100644 index 0000000..74628a8 --- /dev/null +++ b/x/x.h @@ -0,0 +1,109 @@ +struct header { + int tag; + int major; + int minor; + int flags; +}; + +struct color_rgba { + float r; + float g; + float b; + float a; +}; + +struct color_rgb { + float r; + float g; + float b; +}; + +struct material { + int tag; + x_color_rgba facecolor; + float power; + x_color_rgb specularcolor; + x_color_rgb emissivecolor; + void * objects[]; +}; + +struct frame { + int tag; + void * objects[]; +}; + +struct frame_transform_matrix { + int tag; + mat4x4 framematrix; +}; + +struct mesh_face { + int nfacevertexindices; + int facevertexindices[]; +}; + +struct mesh { + int nvertices; + vec3 * vertices; + int nfaces; + mesh_face * faces; + void * objects[]; +}; + +struct mesh_material_list { + int n_materials; + int n_face_indices; + int * face_indices; + void * objects[]; +}; + +struct mesh_normals { + int tag; + int n_normals; + vec3 * normals; + int n_face_normals; + mesh_face * face_normals; +}; + +struct mesh_texture_coords { + int tag; + int n_texture_coords; + vec2 texture_coords[]; +}; + +struct texture_filename { + const char * filename; +}; + +struct float_keys { + int nvalues; + float values; +}; + +struct timed_float_keys { + int time; + float_keys tfkeys; +}; + +struct animation_key { + int tag; + int key_type; + int n_keys; + timed_float_keys keys[]; +}; + +struct animation_options { + int tag; + int open_closed; + int position_quality; +}; + +struct animation { + int tag; + void * objects[]; +}; + +struct animation_set { + int tag; + void * objects[]; +}