From 3b4604c6ba7fdf17e7faa639e827c69222e711df Mon Sep 17 00:00:00 2001 From: Zack Buhman Date: Wed, 4 Mar 2026 22:20:35 -0600 Subject: [PATCH] minecraft: world pre-processing scripts --- minecraft/gen/blocks.py | 103 +++++++++++ minecraft/gen/cube.py | 134 ++++++++++++++ minecraft/gen/mc.py | 401 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 638 insertions(+) create mode 100644 minecraft/gen/blocks.py create mode 100644 minecraft/gen/cube.py create mode 100644 minecraft/gen/mc.py diff --git a/minecraft/gen/blocks.py b/minecraft/gen/blocks.py new file mode 100644 index 0000000..c7a8d81 --- /dev/null +++ b/minecraft/gen/blocks.py @@ -0,0 +1,103 @@ +import struct + +def id_to_px(i): + x = i % 16 + y = i // 16 + return x * 16, y * 16 + +def px_to_id(px, py): + x = px // 16 + y = py // 16 + i = y * 16 + x + return i + +unk = 185 + +mapping = [ + (1, 1, "stone"), + (2, 0, "grass"), + (31, 0, "grass"), # fixme actually tallgrass + (3, 2, "dirt"), + (4, 16, "stonebrick"), + (5, 4, "wood"), + (6, 15, "sapling"), + (7, 17, "bedrock"), + (8, 205, "water"), # flowing + (9, 205, "water"), # still + (10, 237, "lava"), # flowing + (11, 237, "lava"), # still + (12, 18, "sand"), + (13, 19, "gravel"), + (14, 32, "oreGold"), + (15, 33, "oreIron"), + (16, 34, "oreCoal"), + (17, 20, "log"), + (18, 52, "leaves"), + (19, 48, "sponge"), + (20, 49, "glass"), + (35, 64, "cloth"), + (37, 13, "flower"), + (38, 12, "rose"), + (39, 29, "mushroom"), + (40, 28, "mushroom"), + (41, 39, "blockGold"), + (42, 38, "blockIron"), + (43, 5, "stoneSlab"), # double + (44, 5, "stoneSlab"), # single + (45, 7, "brick"), + (46, 8, "tnt"), + (47, 35, "bookshelf"), + (48, 36, "stoneMoss"), + (49, 37, "obsidian"), + (50, 80, "torch"), + (51, 31, "fire"), + (52, 65, "mobSpawner"), + (53, 4, "stairsWood"), + (54, 27, "chest"), + (55, 84, "redstoneDust"), + (56, 50, "oreDiamond"), + (57, 40, "blockDiamond"), + (58, 43, "workbench"), + (59, 88, "crops"), + (60, 87, "farmland"), + (61, 44, "furnace"), # off + (62, 61, "furnace"), # burning + (63, unk, "sign"), + (64, 81, "doorWood"), + (65, 83, "ladder"), + (66, 128, "rail"), + (67, 16, "stairsStone"), + (68, unk, "sign"), + (69, 96, "lever"), + (70, 6, "pressurePlate"), + (71, 82, "doorIron"), + (72, 6, "pressurePlate"), + (73, 51, "oreRedstone"), + (74, 51, "oreRedstone"), + (75, 115, "notGate"), + (76, 99, "notGate"), + (77, unk, "button"), + (78, 66, "snow"), + (79, 67, "ice"), + (80, 66, "snow"), + (81, 70, "cactus"), + (82, 72, "clay"), + (83, 73, "reeds"), + (84, 74, "jukebox"), + (85, 4, "fence"), + (86, 102, "pumpkin"), + (87, 103, "hellrock"), + (88, 104, "hellsand"), + (89, 105, "lightgem"), + (90, 14, "portal"), + (91, 102, "pumpkin"), +] + +lookup = { + k: v for k, v, _ in mapping +} + +with open("block_id_to_texture_id.data", "wb") as f: + for i in range(256): + value = lookup.get(i, unk) + f.write(struct.pack("I", mem[ix:ix+4]) + chunk_offset = (chunk_location >> 8) & 0xffffff + chunk_sector_count = chunk_location & 0xff + yield chunk_offset, chunk_sector_count + +def parse_locations(mem, offset): + locations = list(_parse_locations(mem, offset)) + return offset + 1024 * 4, locations + +def _parse_timestamps(mem, offset): + for i in range(1024): + ix = offset + i * 4 + timestamp, = struct.unpack(">I", mem[ix:ix+4]) + yield timestamp + +def parse_timestamps(mem, offset): + timestamps = list(_parse_timestamps(mem, offset)) + return offset + 1024 * 4, timestamps + +def print_locations(locations): + for y in range(32): + for x in range(32): + offset, count = locations[y * 32 + x] + print(str(offset).rjust(4), end=' ') + print() + +class CountZeroException(Exception): + pass + +def parse_payload(mem, location): + offset, count = location + if count == 0: + raise CountZeroException() + ix = offset * 4096 + payload = mem[ix:ix + count * 4096] + length, = struct.unpack(">I", payload[0:4]) + assert length <= count * 4096, (length, count) + compression_type = payload[4] + data = payload[5:5 + (length - 1)] + assert compression_type == 2, compression_type + uncompressed = zlib.decompress(data) + return memoryview(uncompressed) + +class TAG: + Byte = 0x01 + Short = 0x02 + Int = 0x03 + Long = 0x04 + Float = 0x05 + Double = 0x06 + ByteArray = 0x07 + String = 0x08 + List = 0x09 + Compound = 0x0a + +@dataclass +class Byte: + name: str + value: int + +@dataclass +class Short: + name: str + value: int + +@dataclass +class Int: + name: str + value: int + +@dataclass +class Long: + name: str + value: int + +@dataclass +class Float: + name: str + value: float + +@dataclass +class Double: + name: str + value: float + +@dataclass +class ByteArray: + name: str + value: bytes + +@dataclass +class String: + name: str + value: str + +@dataclass +class List: + name: str + items: list + +@dataclass +class Compound: + name: str + tags: list + +def indent(level): + return " " * level + +def parse_tag_inner(mem, offset, level, tag_type, name): + payload = mem[offset:] + if tag_type == TAG.Byte: + value, = struct.unpack(">b", payload[0:1]) + return offset + 1, Byte(name, value) + if tag_type == TAG.Short: + value, = struct.unpack(">h", payload[0:2]) + return offset + 2, Short(name, value) + elif tag_type == TAG.Int: + value, = struct.unpack(">i", payload[0:4]) + return offset + 4, Int(name, value) + elif tag_type == TAG.Long: + value, = struct.unpack(">q", payload[0:8]) + return offset + 8, Long(name, value) + elif tag_type == TAG.Float: + value, = struct.unpack(">f", payload[0:4]) + return offset + 4, Float(name, value) + elif tag_type == TAG.Double: + value, = struct.unpack(">d", payload[0:8]) + return offset + 8, Double(name, value) + elif tag_type == TAG.ByteArray: + size, = struct.unpack(">i", payload[0:4]) + value = bytes(payload[4:4+size]) + return offset + 4 + size, ByteArray(name, value) + elif tag_type == TAG.String: + size, = struct.unpack(">H", payload[0:2]) + value = bytes(payload[2:2+size]).decode('utf-8') + return offset + 2 + size, String(name, value) + elif tag_type == TAG.List: + list_content_tag_id, size = struct.unpack(">BI", payload[0:5]) + items = [] + offset = offset + 5 + for i in range(size): + payload = mem[offset:] + offset, item = parse_tag_inner(mem, offset, level, list_content_tag_id, None) + items.append(item) + return offset, List(name, items) + elif tag_type == TAG.Compound: + tags = [] + while payload[0] != 0: + offset, tag = parse_tag(mem, offset, level+1) + payload = mem[offset:] + tags.append(tag) + return offset + 1, Compound(name, tags) + else: + assert False, tag_type + +def parse_tag(mem, offset, level): + data = mem[offset:] + tag_type = data[0] + name_length, = struct.unpack(">H", data[1:3]) + name = bytes(data[3:3+name_length]) + #print(indent(level), tag_type, name_length, name) + offset = offset + 3 + name_length + return parse_tag_inner(mem, offset, level, tag_type, name) + +@dataclass +class Level: + blocks: bytes + data: bytes + sky_light: bytes + block_light: bytes + height_map: bytes + x_pos: int + z_pos: int + +def level_from_tag(tag): + assert type(tag) == Compound + assert tag.name == b'' + assert len(tag.tags) == 1 + level, = tag.tags + assert type(level) == Compound + assert level.name == b'Level' + + name_mapping = { + b'Blocks': 'blocks', + b'Data': 'data', + b'SkyLight': 'sky_light', + b'BlockLight': 'block_light', + b'HeightMap': 'height_map', + b'xPos': 'x_pos', + b'zPos': 'z_pos', + } + + args = {} + + for tag in level.tags: + if tag.name in name_mapping: + arg_name = name_mapping[tag.name] + args[arg_name] = tag.value + + return Level(**args) + +def parse_location(mem, location): + uncompressed = parse_payload(mem, location) + offset, tag = parse_tag(uncompressed, 0, 0) + assert offset == len(uncompressed), (offset, len(uncompressed)) + level = level_from_tag(tag) + return level + +def xyz_from_block_index(block_index): + assert block_index >= 0 and block_index < (128 * 16 * 16) + x = int(block_index / (128 * 16)) + y = int(block_index % 128) + z = int(int(block_index / 128) % 16) + return x, y, z + +def block_index_from_xyz(x, y, z): + assert x >= 0 and x < 16 + assert y >= 0 and y < 128 + assert z >= 0 and z < 16 + return int(y + z * 128 + x * 128 * 16) + +def wrap_n(nc, chunk_c): + if nc < 0: + nc = 15 + chunk_c = chunk_c - 1 + if nc > 15: + nc = 0 + chunk_c = chunk_c + 1 + return nc, chunk_c + +def vec3_add(v1, v2): + return ( + v1[0] + v2[0], + v1[1] + v2[1], + v1[2] + v2[2], + ) + +def vec3_mul(v, s): + return ( + v[0] * s, + v[1] * s, + v[2] * s, + ) + +vertex_table = [ + ((-1.0, 1.0, -1.0), (0.0, 1.0, 0.0), (1.0, 0.0)), + ((1.0, 1.0, 1.0), (0.0, 1.0, 0.0), (0.0, 1.0)), + ((1.0, 1.0, -1.0), (0.0, 1.0, 0.0), (0.0, 0.0)), + ((1.0, 1.0, 1.0), (0.0, 0.0, 1.0), (1.0, 1.0)), + + ((-1.0, -1.0, 1.0), (0.0, 0.0, 1.0), (0.0, 0.0)), + ((1.0, -1.0, 1.0), (0.0, 0.0, 1.0), (1.0, 0.0)), + ((-1.0, 1.0, 1.0), (-1.0, 0.0, 0.0), (1.0, 1.0)), + ((-1.0, -1.0, -1.0), (-1.0, 0.0, 0.0), (0.0, 0.0)), + + ((-1.0, -1.0, 1.0), (-1.0, 0.0, 0.0), (1.0, 0.0)), + ((1.0, -1.0, -1.0), (0.0, -1.0, 0.0), (1.0, 0.0)), + ((-1.0, -1.0, 1.0), (0.0, -1.0, 0.0), (0.0, 1.0)), + ((-1.0, -1.0, -1.0), (0.0, -1.0, 0.0), (0.0, 0.0)), + + ((1.0, 1.0, -1.0), (1.0, 0.0, 0.0), (1.0, 1.0)), + ((1.0, -1.0, 1.0), (1.0, 0.0, 0.0), (0.0, 0.0)), + ((1.0, -1.0, -1.0), (1.0, 0.0, 0.0), (1.0, 0.0)), + ((-1.0, 1.0, -1.0), (0.0, 0.0, -1.0), (1.0, 1.0)), + ((1.0, -1.0, -1.0), (0.0, 0.0, -1.0), (0.0, 0.0)), + ((-1.0, -1.0, -1.0), (0.0, 0.0, -1.0), (1.0, 0.0)), + ((-1.0, 1.0, 1.0), (0.0, 1.0, 0.0), (1.0, 1.0)), + ((-1.0, 1.0, 1.0), (0.0, 0.0, 1.0), (0.0, 1.0)), + ((-1.0, 1.0, -1.0), (-1.0, 0.0, 0.0), (0.0, 1.0)), + ((1.0, -1.0, 1.0), (0.0, -1.0, 0.0), (1.0, 1.0)), + ((1.0, 1.0, 1.0), (1.0, 0.0, 0.0), (0.0, 1.0)), + ((1.0, 1.0, -1.0), (0.0, 0.0, -1.0), (0.0, 1.0)) +] + +faces_by_normal = { + (-1.0, 0.0, 0.0): [6, 7, 8, 6, 20, 7], + (0.0, -1.0, 0.0): [9, 10, 11, 9, 21, 10], + (0.0, 0.0, -1.0): [15, 16, 17, 15, 23, 16], + (0.0, 0.0, 1.0): [3, 4, 5, 3, 19, 4], + (0.0, 1.0, 0.0): [0, 1, 2, 0, 18, 1], + (1.0, 0.0, 0.0): [12, 13, 14, 12, 22, 13] +} + +vertex_buffer = {} + +def add_vertex(vertex): + if vertex in vertex_buffer: + return vertex_buffer[vertex] + else: + index = len(vertex_buffer) + vertex_buffer[vertex] = index + return index + +def emit_face(center_position, block_id, triangles): + for index in triangles: + position, normal, texture = vertex_table[index] + position = vec3_add(vec3_mul(position, 0.5), center_position) + vertex = (position, normal, texture, block_id) + new_index = add_vertex(vertex) + yield new_index + +def block_neighbors(level_table, chunk_x, chunk_z, block_index): + block_id = level_table[(chunk_x, chunk_z)].blocks[block_index] + if block_id == 0: + return + + def neighbor_exists(nx, ny, nz): + if ny > 127 or ny < 0: + return False + nx, n_chunk_x = wrap_n(nx, chunk_x) + nz, n_chunk_z = wrap_n(nz, chunk_z) + if nx > 15 or nx < 0: + return True + if nz > 15 or nz < 0: + return True + n_block_index = block_index_from_xyz(nx, ny, nz) + key = (n_chunk_x, n_chunk_z) + if key not in level_table: + return True + n_block_id = level_table[key].blocks[n_block_index] + return n_block_id != 0 + + x, y, z = xyz_from_block_index(block_index) + + center_position = vec3_add((x, y, z), (chunk_x * 16, 0, chunk_z * 16)) + + for normal, triangles in faces_by_normal.items(): + neighbor = vec3_add(normal, (x, y, z)) + if not neighbor_exists(*neighbor): + yield from emit_face(center_position, block_id, triangles) + + #yield chunk_x, chunk_z, block_index, block_id + #break + +def devoxelize_region(level_table): + for chunk_x, chunk_z in level_table.keys(): + for block_index in range(128 * 16 * 16): + yield from block_neighbors(level_table, chunk_x, chunk_z, block_index) + +from collections import defaultdict +counts = defaultdict(int) + +def linearized_vertex_buffer(): + for vertex, i in sorted(vertex_buffer.items(), key=lambda kv: kv[1]): + yield vertex + +def main(mcr_path, data_path): + with open(mcr_path, "rb") as f: + buf = f.read() + mem = memoryview(buf) + + offset = 0 + offset, locations = parse_locations(mem, offset) + offset, timestamps = parse_timestamps(mem, offset) + assert offset == 0x2000 + + level_table = {} + for location in locations: + try: + level = parse_location(mem, location) + except CountZeroException: + continue + x, z = level.x_pos, level.z_pos + level_table[(x, z)] = level + #with open(f"blocks__{x:02x}_{z:02x}.data", "wb") as f: + # f.write(level.blocks) + + with open(data_path + ".idx", "wb") as f: + for index in devoxelize_region(level_table): + f.write(struct.pack("