jvm/class_file.py
2024-12-25 01:16:59 -06:00

240 lines
8.1 KiB
Python

import struct
import sys
from binascii import hexlify
import csv
from gen_decoder import parse_opcode_table
opcode_table = parse_opcode_table()
def size_to_format(size):
if size == 4:
return ">I"
elif size == 2:
return ">H"
elif size == 1:
return ">B"
else:
assert False, size
def field(buf, size):
format = size_to_format(size)
value, = struct.unpack(format, buf[0:size])
return buf[size:], value
constant_lookup = {
7: ("CONSTANT_Class", [("name_index", 2)]),
9: ("CONSTANT_Fieldref", [("class_index", 2), ("name_and_type_index", 2)]),
10: ("CONSTANT_Methodref", [("class_index", 2), ("name_and_type_index", 2)]),
11: ("CONSTANT_InterfaceMethodref", [("class_index", 2), ("name_and_type_index", 2)]),
8: ("CONSTANT_String", [("string_index", 2)]),
3: ("CONSTANT_Integer", [("bytes", 4)]),
4: ("CONSTANT_Float", [("bytes", 4)]),
5: ("CONSTANT_Long", [("high_bytes", 4), ("low_bytes", 4)]),
6: ("CONSTANT_Double", [("high_bytes", 4), ("low_bytes", 4)]),
12: ("CONSTANT_NameAndType", [("name_index", 2), ("descriptor_index", 2)]),
1: ("CONSTANT_Utf8", [("length", 2), ("bytes", 0)]),
15: ("CONSTANT_MethodHandle", [("reference_kind", 2), ("reference_index", 2)]),
16: ("CONSTANT_MethodType", [("descriptor_index", 2)]),
18: ("CONSTANT_InvokeDynamic", [("bootstrap_method_attr_index", 2), ("name_and_type_index", 2)]),
}
def parse_cp_info(buf):
buf, tag = field(buf, 1)
assert tag in constant_lookup, tag
name, fields = constant_lookup[tag]
d = {}
last_value = None
for field_name, field_size in fields:
if field_size == 0:
value = bytes(buf[0:last_value])
buf = buf[last_value:]
else:
buf, value = field(buf, field_size)
last_value = value
d[field_name] = value
if tag in {5, 6}:
entries = 2
else:
entries = 1
return buf, (name, d), entries
def parse_attribute_info(buf):
buf, attribute_name_index = field(buf, 2)
buf, attribute_length = field(buf, 4)
info = bytes(buf[0:attribute_length])
buf = buf[attribute_length:]
return buf, (attribute_name_index, attribute_length, info)
def parse_field_info(buf):
buf, access_flags = field(buf, 2)
buf, name_index = field(buf, 2)
buf, descriptor_index = field(buf, 2)
buf, attributes_count = field(buf, 2)
attributes = []
for i in range(attributes_count):
buf, attribute = parse_attribute_info(buf)
attributes.append(attribute)
return buf, (access_flags, name_index, descriptor_index, attributes_count, attributes)
def parse_exception_table(buf, indent):
exception_table = [
("start_pc", 2),
("end_pc", 2),
("handler_pc", 2),
("catch_type", 2),
]
for name, size in exception_table:
buf, value = field(buf, size)
print(indent, name, value)
return buf
def dump_opcodes(buf, indent):
ix = 0
while len(buf) > 0:
op = buf[0]
instruction = opcode_table[op]
buf = buf[1:]
args = list(buf[0:instruction.arguments_size])
print(indent, f"{ix:> 3}", instruction.mnemonic, args)
ix += 1 + instruction.arguments_size
buf = buf[instruction.arguments_size:]
def print_code_info(buf, indent, constant_pool):
code_attribute = [
("max_stack", 2),
("max_locals", 2),
("code_length", 4),
("code", 0),
("exception_table_length", 2),
("exception_table", 0),
("attributes_count", 2),
("attributes", 0)
]
last_value = None
for name, size in code_attribute:
if name == "code":
print(indent, "code:")
size = last_value
dump_opcodes(buf[0:last_value], indent + " ")
buf = buf[last_value:]
elif name == "exception_table":
print(indent, "exception_table:")
for i in range(last_value):
buf = parse_exception_table(buf, indent + " ")
elif name == "attributes":
print(indent, "attributes:")
for i in range(last_value):
buf, attribute_info = parse_attribute_info(buf)
print(indent + " ", f"attribute {i}:")
attribute_name_index, attribute_length, info = attribute_info
constant_type, constant = constant_pool[attribute_name_index - 1]
assert constant_type == 'CONSTANT_Utf8', constant_type
attribute_name_bytes = constant["bytes"]
print(indent + " ", "attribute_name_index", attribute_name_index, attribute_name_bytes)
print(indent + " ", "attribute_length", attribute_length)
print(indent + " ", "info", info)
else:
buf, value = field(buf, size)
last_value = value
print(indent, name, value)
assert len(buf) == 0
def parse_class(buf):
buf, magic = field(buf, 4)
buf, minor_version = field(buf, 2)
buf, major_version = field(buf, 2)
buf, constant_pool_count = field(buf, 2)
print("magic", hex(magic))
print("minor_version", minor_version)
print("major_version", major_version)
print("constant_pool_count", constant_pool_count)
print("constant_pool:")
constant_pool = []
i = 1
while i < constant_pool_count:
buf, cp_info, entries = parse_cp_info(buf)
constant_pool.append(cp_info)
if entries == 2:
constant_pool.append(None)
print(i, cp_info)
i += entries
buf, access_flags = field(buf, 2)
buf, this_class = field(buf, 2)
buf, super_class = field(buf, 2)
buf, interfaces_count = field(buf, 2)
print("access_flags", hex(access_flags))
print("this_class", this_class)
print("super_class", super_class)
print("interfaces_count", interfaces_count)
print("interfaces:")
for i in range(interfaces_count):
buf, interface = field(buf, 2)
print(i, interface)
buf, fields_count = field(buf, 2)
print("fields_count", fields_count)
for i in range(fields_count):
buf, field_info = parse_field_info(buf)
print(i, field_info)
buf, methods_count = field(buf, 2)
print("methods_count", methods_count)
print("methods:")
for i in range(methods_count):
buf, method_info = parse_field_info(buf)
print(f" method {i}:")
access_flags, name_index, descriptor_index, attributes_count, attributes = method_info
print(" access_flags", hex(access_flags))
constant_type, constant = constant_pool[name_index - 1]
assert constant_type == 'CONSTANT_Utf8', constant_type
name_bytes = constant["bytes"]
print(" name_index", name_index, name_bytes)
constant_type, constant = constant_pool[descriptor_index - 1]
assert constant_type == 'CONSTANT_Utf8', constant_type
descriptor_bytes = constant["bytes"]
print(" descriptor_index", descriptor_index, descriptor_bytes)
print(" attributes_count", attributes_count)
print(" attributes:")
for j in range(attributes_count):
print(f" attribute {j}:")
attribute_name_index, attribute_length, info = attributes[j]
constant_type, constant = constant_pool[attribute_name_index - 1]
assert constant_type == 'CONSTANT_Utf8', constant_type
attribute_name_bytes = constant["bytes"]
print(" attribute_name_index", attribute_name_index, attribute_name_bytes)
print(" attribute_length", attribute_length)
#print(" info", info)
if attribute_name_bytes == b'Code':
print_code_info(info, " ", constant_pool)
buf, attributes_count = field(buf, 2)
print("attributes_count", attributes_count)
print("attributes:")
for i in range(attributes_count):
buf, attribute_info = parse_attribute_info(buf)
print(i, attribute_info)
assert len(buf) == 0, bytes(buf)
filename = sys.argv[1]
with open(filename, 'rb') as f:
buf = f.read()
parse_class(memoryview(buf))