add x parser

This commit is contained in:
Zack Buhman 2025-02-17 23:09:47 -06:00
parent e389a0c80c
commit 4e20ade922
4 changed files with 721 additions and 0 deletions

118
x/lex.py Normal file
View File

@ -0,0 +1,118 @@
import string
def parse_magic(mem, offset):
magic = b"xof 0302txt 0064"
window = bytes(mem[offset:offset+len(magic)])
assert window == magic, window
return offset + len(magic)
string_digits = set(ord(i) for i in string.digits)
def parse_number(mem, offset):
mem = memoryview(mem)
whole = []
fraction = []
sign = 1
if mem[offset] == ord('-'):
sign = -1
offset += 1
# whole
while True:
c = mem[offset]
if c in string_digits:
whole.append(c)
offset += 1
elif c == ord('.'):
assert whole != [], chr(c)
offset += 1
break
else:
assert whole != [], chr(c)
number = sign * int(bytes(whole))
return offset, number
# fraction
while True:
c = mem[offset]
if c in string_digits:
fraction.append(c)
offset += 1
else:
assert fraction != [], chr(c)
w = int(bytes(whole))
f = int(bytes(fraction)) / (10 ** len(fraction))
number = sign * (w + f)
return offset, number
assert parse_number(b"1234;", 0)[1] == 1234
assert abs(parse_number(b"1234.5678;", 0)[1] - 1234.5678) < 0.0001
assert parse_number(b"-1234;", 0)[1] == -1234
assert abs(parse_number(b"-1234.5678;", 0)[1] - -1234.5678) < 0.0001
whitespace = set([ord(' '), ord('\n')])
TOKEN_SEMICOLON = type("TOKEN_SEMICOLON", (), {})
TOKEN_COMMA = type("TOKEN_COMMA", (), {})
TOKEN_LBRACKET = type("TOKEN_LBRACKET", (), {})
TOKEN_RBRACKET = type("TOKEN_RBRACKET", (), {})
identifier_start = set(map(ord, string.ascii_letters + "_"))
identifier = identifier_start | string_digits
def parse_identifier(mem, offset):
l = []
while True:
c = mem[offset]
if c in identifier:
l.append(c)
offset += 1
else:
assert l != []
return offset, bytes(l)
def parse_string(mem, offset):
assert mem[offset] == ord('"')
offset += 1
start = offset
while mem[offset] != ord('"'):
assert mem[offset] != ord("\n")
offset += 1
assert mem[offset] == ord('"')
s = bytes(mem[start:offset]).decode("utf-8")
offset += 1
return offset, s
def next_token(mem, offset):
while True:
if offset >= len(mem):
return offset, None
c = mem[offset]
if c in whitespace:
offset += 1
else:
break
if c in string_digits or c == ord('-'):
return parse_number(mem, offset)
elif c == ord(';'):
return offset + 1, TOKEN_SEMICOLON
elif c == ord(','):
return offset + 1, TOKEN_COMMA
elif c == ord('{'):
return offset + 1, TOKEN_LBRACKET
elif c == ord('}'):
return offset + 1, TOKEN_RBRACKET
elif c == ord('"'):
return parse_string(mem, offset)
elif c in identifier_start:
return parse_identifier(mem, offset)
else:
assert False, chr(c)
def lex_all(mem, offset):
offset = parse_magic(mem, offset)
while True:
offset, token = next_token(mem, offset)
if token is None:
return
else:
yield token

370
x/parse.py Normal file
View File

@ -0,0 +1,370 @@
from pprint import pprint
import sys
import lex
import templates
with open(sys.argv[1], "rb") as f:
buf = f.read()
mem = memoryview(buf)
class TokenReader:
def __init__(self, mem):
self.tokens = list(lex.lex_all(mem, 0))
self.ix = 0
def consume(self, o):
assert self.tokens[self.ix] == o, (self.tokens[self.ix], o)
self.ix += 1
def consume_type(self, t):
assert type(self.tokens[self.ix]) == t, (t,
self.tokens[self.ix],
type(self.tokens[self.ix]))
self.ix += 1
return self.tokens[self.ix-1]
def match(self, o):
if self.tokens[self.ix] == o:
self.ix += 1
return self.tokens[self.ix-1]
else:
return False
def match_type(self, t):
if type(self.tokens[self.ix]) == t:
self.ix += 1
return self.tokens[self.ix-1]
else:
return False
def peek(self):
return self.tokens[self.ix]
def eof(self):
return self.ix >= len(self.tokens)
def parse_int_raw(r):
i = r.consume_type(int)
return i
def parse_float_raw(r):
i = r.consume_type(float)
return i
def parse_int(r):
i = r.consume_type(int)
r.consume(lex.TOKEN_SEMICOLON)
return i
def parse_float(r):
f = r.consume_type(float)
r.consume(lex.TOKEN_SEMICOLON)
return f
def parse_vector(r):
x = parse_float(r)
y = parse_float(r)
z = parse_float(r)
return templates.Vector(x, y, z)
def parse_string(r):
s = r.consume_type(str)
r.consume(lex.TOKEN_SEMICOLON)
return s
def parse_color_rgba(r):
red = r.consume_type(float)
r.consume(lex.TOKEN_COMMA)
green = r.consume_type(float)
r.consume(lex.TOKEN_COMMA)
blue = r.consume_type(float)
r.consume(lex.TOKEN_COMMA)
alpha = r.consume_type(float)
r.consume(lex.TOKEN_SEMICOLON)
r.consume(lex.TOKEN_SEMICOLON)
return templates.ColorRGBA(
red, green, blue, alpha
)
def parse_color_rgb(r):
red = r.consume_type(float)
r.consume(lex.TOKEN_COMMA)
green = r.consume_type(float)
r.consume(lex.TOKEN_COMMA)
blue = r.consume_type(float)
r.consume(lex.TOKEN_SEMICOLON)
r.consume(lex.TOKEN_SEMICOLON)
return templates.ColorRGB(
red, green, blue
)
def parse_list(r, n, parse, *, delim=lex.TOKEN_COMMA):
l = []
for _ in range(n - 1):
l.append(parse(r))
if delim is not None:
r.consume(delim)
l.append(parse(r))
r.consume(lex.TOKEN_SEMICOLON)
return l
def parse_matrix4x4(r):
v = []
for i in range(15):
v.append(r.consume_type(float))
r.consume(lex.TOKEN_COMMA)
v.append(r.consume_type(float))
r.consume(lex.TOKEN_SEMICOLON)
r.consume(lex.TOKEN_SEMICOLON)
return templates.Matrix4x4(
v
)
def parse_header(r):
r.consume(b"Header")
r.consume(lex.TOKEN_LBRACKET)
major = parse_int(r)
minor = parse_int(r)
flags = parse_int(r)
r.consume(lex.TOKEN_RBRACKET)
return templates.Header(
major, minor, flags
)
def parse_material(r):
r.consume(b"Material")
name = r.consume_type(bytes)
r.consume(lex.TOKEN_LBRACKET)
faceColor = parse_color_rgba(r)
power = parse_float(r)
specularColor = parse_color_rgb(r)
emissiveColor = parse_color_rgb(r)
objects = []
while not r.match(lex.TOKEN_RBRACKET):
objects.append(parse_one_ref(r))
return name, templates.Material(
faceColor,
power,
specularColor,
emissiveColor,
objects
)
def parse_texture_filename(r):
r.consume(b"TextureFilename")
r.consume(lex.TOKEN_LBRACKET)
filename = parse_string(r)
r.consume(lex.TOKEN_RBRACKET)
return templates.TextureFilename(
filename
)
def parse_frame(r):
r.consume(b"Frame")
name = r.consume_type(bytes)
r.consume(lex.TOKEN_LBRACKET)
objects = []
while not r.match(lex.TOKEN_RBRACKET):
objects.append(parse_one_ref(r))
return name, templates.Frame(
objects
)
def parse_frame_transform_matrix(r):
r.consume(b"FrameTransformMatrix")
r.consume(lex.TOKEN_LBRACKET)
frameMatrix = parse_matrix4x4(r)
r.consume(lex.TOKEN_RBRACKET)
return templates.FrameTransformMatrix(
frameMatrix
)
def parse_mesh_face(r):
nFaceVertexIndices = parse_int(r)
faceVertexIndices = parse_list(r, nFaceVertexIndices, parse_int_raw)
return templates.MeshFace(
nFaceVertexIndices,
faceVertexIndices,
)
def parse_mesh(r):
r.consume(b"Mesh")
name = r.consume_type(bytes)
r.consume(lex.TOKEN_LBRACKET)
nVertices = parse_int(r)
vertices = parse_list(r, nVertices, parse_vector)
nFaces = parse_int(r)
faces = parse_list(r, nFaces, parse_mesh_face)
objects = []
while not r.match(lex.TOKEN_RBRACKET):
objects.append(parse_one_ref(r))
return name, templates.Mesh(
nVertices,
vertices,
nFaces,
faces,
objects
)
def parse_mesh_material_list(r):
r.consume(b"MeshMaterialList")
r.consume(lex.TOKEN_LBRACKET)
nMaterials = parse_int(r)
nFaceIndices = parse_int(r)
faceIndices = parse_list(r, nFaceIndices, parse_int, delim=None)
objects = []
while not r.match(lex.TOKEN_RBRACKET):
objects.append(parse_one_ref(r, "Material"))
return templates.MeshMaterialList(
nMaterials,
nFaceIndices,
faceIndices,
objects
)
def parse_mesh_normals(r):
r.consume(b"MeshNormals")
r.consume(lex.TOKEN_LBRACKET)
nNormals = parse_int(r)
normals = parse_list(r, nNormals, parse_vector)
nFaceNormals = parse_int(r)
faceNormals = parse_list(r, nFaceNormals, parse_mesh_face)
r.consume(lex.TOKEN_RBRACKET)
return templates.MeshNormals(
nNormals,
normals,
nFaceNormals,
faceNormals
)
def parse_coords2d(r):
u = parse_float(r)
v = parse_float(r)
return templates.Coords2D(u, v)
def parse_mesh_texture_coords(r):
r.consume(b"MeshTextureCoords")
r.consume(lex.TOKEN_LBRACKET)
nTextureCoords = parse_int(r)
textureCoords = parse_list(r, nTextureCoords, parse_coords2d)
r.consume(lex.TOKEN_RBRACKET)
return templates.MeshTextureCoords(
nTextureCoords,
textureCoords
)
def parse_animation_set(r):
r.consume(b"AnimationSet")
name = r.consume_type(bytes)
r.consume(lex.TOKEN_LBRACKET)
objects = []
while not r.match(lex.TOKEN_RBRACKET):
objects.append(parse_one_ref(r, b"Animation"))
return name, templates.AnimationSet(
objects
)
def parse_animation(r):
r.consume(b"Animation")
name = r.consume_type(bytes)
r.consume(lex.TOKEN_LBRACKET)
objects = []
while not r.match(lex.TOKEN_RBRACKET):
objects.append(parse_one_ref(r, {b"AnimationKey", b"AnimationOptions"}))
return name, templates.Animation(
objects
)
def parse_float_keys(r):
nValues = parse_int(r)
values = parse_list(r, nValues, parse_float_raw)
r.consume(lex.TOKEN_SEMICOLON) # FIXME: is this correct for nKeys>1?
return templates.TimedFloatKeys(
nValues,
values,
)
def parse_timed_float_keys(r):
time = parse_int(r)
tfkeys = parse_float_keys(r)
return templates.TimedFloatKeys(
time,
tfkeys,
)
def parse_animation_key(r):
r.consume(b"AnimationKey")
r.consume(lex.TOKEN_LBRACKET)
keyType = parse_int(r)
nKeys = parse_int(r)
keys = parse_list(r, nKeys, parse_timed_float_keys)
r.consume(lex.TOKEN_RBRACKET)
return templates.AnimationKey(
keyType,
nKeys,
keys,
)
def parse_one(r, peek_token=None):
token = r.peek()
if peek_token != None:
if type(peek_token) is set:
assert token in peek_token, (token, peek_token)
else:
assert token == peek_token, (token, peek_token)
if token == b"Header":
return parse_header(r)
elif token == b"Material":
return parse_material(r)
elif token == b"TextureFilename":
return parse_texture_filename(r)
elif token == b"Frame":
return parse_frame(r)
elif token == b"FrameTransformMatrix":
return parse_frame_transform_matrix(r)
elif token == b"Mesh":
return parse_mesh(r)
elif token == b"MeshMaterialList":
return parse_mesh_material_list(r)
elif token == b"MeshNormals":
return parse_mesh_normals(r)
elif token == b"MeshTextureCoords":
return parse_mesh_texture_coords(r)
elif token == b"AnimationSet":
return parse_animation_set(r)
elif token == b"Animation":
return parse_animation(r)
elif token == b"AnimationKey":
return parse_animation_key(r)
else:
assert False, token
def parse_one_ref(r, peek_token=None):
if r.match(lex.TOKEN_LBRACKET):
name = r.consume_type(bytes)
r.consume(lex.TOKEN_RBRACKET)
return templates.Reference(
name
)
else:
token = r.peek()
return parse_one(r, peek_token)
def parse_all(r):
while not r.eof():
yield parse_one_ref(r)
r = TokenReader(mem)
for i in parse_all(r):
pprint(i)

124
x/templates.py Normal file
View File

@ -0,0 +1,124 @@
from typing import Any, Union
from dataclasses import dataclass
@dataclass
class Header:
major: int
minor: int
flags: int
@dataclass
class ColorRGBA:
r: float
g: float
b: float
a: float
@dataclass
class ColorRGB:
r: float
g: float
b: float
@dataclass
class Matrix4x4:
v: list[float]
@dataclass
class Material:
faceColor: ColorRGBA
power: float
specularColor: ColorRGB
emissiveColor: ColorRGB
objects: list[Any]
@dataclass
class Frame:
objects: list[Any]
@dataclass
class FrameTransformMatrix:
frameMatrix: Matrix4x4
@dataclass
class Vector:
x: float
y: float
z: float
@dataclass
class MeshFace:
nFaceVertexIndices: int
faceVertexIndices: list[int]
@dataclass
class Mesh:
nVertices: int
vertices: list[Vector]
nFaces: int
faces: list[MeshFace]
objects: list[Any]
@dataclass
class MeshMaterialList:
nMaterials: int
nFaceIndices: int
faceIndices: list[int]
objects: list[Material]
@dataclass
class MeshNormals:
nNormals: int
normals: list[Vector]
nFaceNormals: int
faceNormals: list[MeshFace]
@dataclass
class Coords2D:
u: float
v: float
@dataclass
class MeshTextureCoords:
nTextureCoords: int
textureCoords: list[Coords2D]
@dataclass
class TextureFilename:
filename: str
@dataclass
class Reference:
name: bytes
@dataclass
class FloatKeys:
nValues: int
values: list[float]
@dataclass
class TimedFloatKeys:
time: int
tfkeys: FloatKeys
@dataclass
class AnimationKey:
keyType: int
nKeys: int
keys: list[TimedFloatKeys]
@dataclass
class AnimationOptions:
openClosed: int
positionQuality: int
@dataclass
class Animation:
objects: list[Union[Reference, AnimationKey, AnimationOptions]]
@dataclass
class AnimationSet:
objects: list[Animation]

109
x/x.h Normal file
View File

@ -0,0 +1,109 @@
struct header {
int tag;
int major;
int minor;
int flags;
};
struct color_rgba {
float r;
float g;
float b;
float a;
};
struct color_rgb {
float r;
float g;
float b;
};
struct material {
int tag;
x_color_rgba facecolor;
float power;
x_color_rgb specularcolor;
x_color_rgb emissivecolor;
void * objects[];
};
struct frame {
int tag;
void * objects[];
};
struct frame_transform_matrix {
int tag;
mat4x4 framematrix;
};
struct mesh_face {
int nfacevertexindices;
int facevertexindices[];
};
struct mesh {
int nvertices;
vec3 * vertices;
int nfaces;
mesh_face * faces;
void * objects[];
};
struct mesh_material_list {
int n_materials;
int n_face_indices;
int * face_indices;
void * objects[];
};
struct mesh_normals {
int tag;
int n_normals;
vec3 * normals;
int n_face_normals;
mesh_face * face_normals;
};
struct mesh_texture_coords {
int tag;
int n_texture_coords;
vec2 texture_coords[];
};
struct texture_filename {
const char * filename;
};
struct float_keys {
int nvalues;
float values;
};
struct timed_float_keys {
int time;
float_keys tfkeys;
};
struct animation_key {
int tag;
int key_type;
int n_keys;
timed_float_keys keys[];
};
struct animation_options {
int tag;
int open_closed;
int position_quality;
};
struct animation {
int tag;
void * objects[];
};
struct animation_set {
int tag;
void * objects[];
}