diff --git a/Makefile b/Makefile index 6cac559..786a7f1 100644 --- a/Makefile +++ b/Makefile @@ -13,11 +13,12 @@ OBJ = \ c/execute.o \ c/memory_allocator.o \ c/class_resolver.o \ - c/hash_table.o + c/hash_table.o \ + c/frame.o MAIN_OBJ = \ $(OBJ) \ - c/frame.o + c/main.o PRINT_CLASS_OBJ = \ $(OBJ) \ diff --git a/c/class_resolver.c b/c/class_resolver.c index e23b574..e118e04 100644 --- a/c/class_resolver.c +++ b/c/class_resolver.c @@ -39,6 +39,7 @@ struct hash_table_entry * class_resolver_load_from_filenames(const char * filena assert(class_file->magic == 0xcafebabe); class_entry[i].class_file = class_file; + class_entry[i].initialization_state = CLASS_UNINITIALIZED; hash_table_add(class_hash_table_length, class_hash_table, diff --git a/c/class_resolver.h b/c/class_resolver.h index 90805c7..dbe91f3 100644 --- a/c/class_resolver.h +++ b/c/class_resolver.h @@ -10,8 +10,15 @@ struct field_entry { uint32_t value; }; +enum initialization_state { + CLASS_UNINITIALIZED, + CLASS_INITIALIZING, + CLASS_INITIALIZED, +}; + struct class_entry { struct class_file * class_file; + enum initialization_state initialization_state; struct { int length; diff --git a/c/execute.c b/c/execute.c index ae9df72..84e8761 100644 --- a/c/execute.c +++ b/c/execute.c @@ -550,6 +550,13 @@ void op_getstatic(struct vm * vm, uint32_t index) field_name_constant->utf8.length); assert(field_entry != nullptr); + /* On successful resolution of the field, the class or interface that + declared the resolved field is initialized if that class or interface has + not already been initialized (§5.5). */ + + if (!vm_initialize_class(vm, class_entry)) + return; + uint32_t value = field_entry->value; operand_stack_push_u32(vm->current_frame, value); } @@ -925,6 +932,10 @@ void op_invokestatic(struct vm * vm, uint32_t index) method_name_constant->utf8.length); assert(method_info != nullptr); + /* On successful resolution of the method, the class or interface that + declared the resolved method is initialized if that class or interface has + not already been initialized (§5.5). */ + vm_static_method_call(vm, class_entry->class_file, method_info); } @@ -1266,6 +1277,17 @@ void op_multianewarray(struct vm * vm, uint32_t index, uint32_t dimensions) void op_new(struct vm * vm, uint32_t index) { + /* On successful resolution of the class, it is initialized if it has not + already been initialized (§5.5). + + (new) Upon execution of a new instruction, the class to be initialized is + the class referenced by the instruction. */ + + /* + if (!vm_initialize_class(vm, class_entry)) + return; + */ + assert(!"op_new"); } @@ -1307,8 +1329,20 @@ void op_newarray(struct vm * vm, uint32_t atype) int32_t count = operand_stack_pop_u32(vm->current_frame); int32_t size = element_size * count + 4; int32_t * arrayref = memory_allocate(size); + assert(arrayref != 0); arrayref[0] = count; + + /* Each of the elements of the new array is initialized to the default initial + value (§2.3, §2.4) for the element type of the array type. */ + /* round up u32_count, possibly initializing past the end of the array. This + is acceptable because memory_allocate always allocates a multiple of 4 + greater than or equal to `size`. */ + int32_t u32_count = (size - 4 + 3) / 4; + for (int i = 0; i < u32_count; i++) { + arrayref[i + 1] = 0; + } + operand_stack_push_u32(vm->current_frame, (uint32_t)arrayref); } @@ -1333,7 +1367,47 @@ void op_putfield(struct vm * vm, uint32_t index) void op_putstatic(struct vm * vm, uint32_t index) { - assert(!"op_putstatic"); + struct constant * fieldref_constant = &vm->current_thread.current_class->constant_pool[index - 1]; + #ifdef DEBUG + assert(fieldref_constant->tag == CONSTANT_Fieldref); + #endif + struct constant * class_constant = &vm->current_thread.current_class->constant_pool[fieldref_constant->fieldref.class_index - 1]; + #ifdef DEBUG + assert(class_constant->tag == CONSTANT_Class); + #endif + struct constant * nameandtype_constant = &vm->current_thread.current_class->constant_pool[fieldref_constant->fieldref.name_and_type_index - 1]; + #ifdef DEBUG + assert(nameandtype_constant->tag == CONSTANT_NameAndType); + #endif + struct constant * class_name_constant = &vm->current_thread.current_class->constant_pool[class_constant->class.name_index - 1]; + #ifdef DEBUG + assert(class_name_constant->tag == CONSTANT_Utf8); + #endif + struct constant * field_name_constant = &vm->current_thread.current_class->constant_pool[nameandtype_constant->nameandtype.name_index - 1]; + #ifdef DEBUG + assert(field_name_constant->tag == CONSTANT_Utf8); + #endif + + struct class_entry * class_entry = class_resolver_lookup_class(vm->class_hash_table.length, + vm->class_hash_table.entry, + class_name_constant->utf8.bytes, + class_name_constant->utf8.length); + assert(class_entry != nullptr); + + struct field_entry * field_entry = class_resolver_lookup_field(class_entry, + field_name_constant->utf8.bytes, + field_name_constant->utf8.length); + assert(field_entry != nullptr); + + /* On successful resolution of the field, the class or interface that declared + the resolved field is initialized if that class or interface has not + already been initialized (§5.5). */ + + if (!vm_initialize_class(vm, class_entry)) + return; + + uint32_t value = operand_stack_pop_u32(vm->current_frame); + field_entry->value = value; } void op_ret(struct vm * vm, uint32_t index) @@ -1358,7 +1432,7 @@ void op_sastore(struct vm * vm) void op_sipush(struct vm * vm, int32_t byte) { - assert(!"op_sipush2"); + operand_stack_push_u32(vm->current_frame, byte); } void op_swap(struct vm * vm) diff --git a/c/frame.c b/c/frame.c index 44afc16..5e1916a 100644 --- a/c/frame.c +++ b/c/frame.c @@ -9,39 +9,6 @@ #include "decode.h" #include "frame.h" #include "class_resolver.h" -#include "string.h" - -struct frame * stack_push_frame(struct stack * stack, int num_frames) -{ - struct frame * frame = &stack->frame[stack->ix]; - stack->ix += num_frames; - assert(stack->ix <= stack->capacity); - return frame; -} - -struct frame * stack_pop_frame(struct stack * stack, int num_frames) -{ - stack->ix -= num_frames; - assert(stack->ix >= 0); - struct frame * frame = &stack->frame[stack->ix - 1]; - return frame; -} - -uint32_t * stack_push_data(struct stack * stack, int num_data) -{ - uint32_t * data = &stack->data[stack->ix]; - stack->ix += num_data; - assert(stack->ix <= stack->capacity); - return data; -} - -uint32_t * stack_pop_data(struct stack * stack, int num_data) -{ - stack->ix -= num_data; - assert(stack->ix >= 0); - uint32_t * data = &stack->data[stack->ix]; - return data; -} struct Code_attribute * get_code_attribute(int code_name_index, int attributes_count, @@ -67,6 +34,19 @@ int find_code_name_index(struct class_file * class_file) return 0; } +int find_constantvalue_name_index(struct class_file * class_file) +{ + for (int i = 0; i < class_file->constant_pool_count; i++) { + struct constant * constant = &class_file->constant_pool[i]; + if (constant->tag == CONSTANT_Utf8) { + if (bytes_equal(constant->utf8.length, constant->utf8.bytes, "ConstantValue")) { + return i + 1; + } + } + } + return 0; +} + static int descriptor_nargs(struct constant * descriptor_constant) { assert(descriptor_constant->tag == CONSTANT_Utf8); @@ -91,6 +71,75 @@ static int descriptor_nargs(struct constant * descriptor_constant) return nargs; } +bool vm_initialize_class(struct vm * vm, struct class_entry * class_entry) +{ + if (class_entry->initialization_state == CLASS_INITIALIZED) + return true; + + if (class_entry->initialization_state == CLASS_INITIALIZING) { + if (vm->current_thread.current_class == class_entry->class_file) + return true; + else + assert(false); // possible infinite initialization loop + } + + class_entry->initialization_state = CLASS_INITIALIZING; + + /* Then, initialize each static field of C with the constant value in its + ConstantValue attribute (§4.7.2), in the order the fields appear in the + ClassFile structure. */ + + struct class_file * class_file = class_entry->class_file; + + int constantvalue_name_index = find_constantvalue_name_index(class_file); + assert(constantvalue_name_index != 0); + + for (int i = 0; i < class_file->fields_count; i++) { + struct field_info * field_info = &class_file->fields[i]; + if (!(field_info->access_flags & FIELD_ACC_STATIC)) + continue; + + for (int j = 0; j < field_info->attributes_count; j++) { + if (field_info->attributes[j].attribute_name_index == constantvalue_name_index) { + struct attribute_info * attribute = &field_info->attributes[j]; + struct constant * constantvalue = &class_file->constant_pool[attribute->constantvalue->constantvalue_index - 1]; + assert(constantvalue->tag == CONSTANT_Integer); // also need to support CONSTANT_String + + struct constant * name_constant = &class_file->constant_pool[field_info->name_index - 1]; + assert(name_constant->tag == CONSTANT_Utf8); + struct field_entry * field_entry = class_resolver_lookup_field(class_entry, + name_constant->utf8.bytes, + name_constant->utf8.length); + assert(field_entry != nullptr); + field_entry->value = constantvalue->integer.bytes; + printf(" constantvalue: %d\n", field_entry->value); + break; + } + } + } + + /* Next, if C declares a class or interface initialization method, execute + that method. */ + const uint8_t * method_name = (const uint8_t *)""; + int method_length = 8; + + struct method_info * method_info = class_resolver_lookup_method(class_entry, + method_name, + method_length); + if (method_info != nullptr) { + assert((method_info->access_flags & METHOD_ACC_STATIC) != 0); + printf("\n"); + + // tamper with next_pc + vm->current_frame->next_pc = vm->current_frame->pc; + + vm_static_method_call(vm, class_file, method_info); + return false; + } + + return true; +} + void vm_static_method_call(struct vm * vm, struct class_file * class_file, struct method_info * method_info) { /* If the method is not native, the nargs argument values are popped from the @@ -132,6 +181,8 @@ void vm_static_method_call(struct vm * vm, struct class_file * class_file, struc vm->current_frame->pc = 0; vm->current_thread.current_class = class_file; vm->current_thread.current_method = method_info; + + printf("operand_stack_ix: %d\n", vm->current_frame->operand_stack_ix); } void vm_method_return(struct vm * vm) @@ -225,53 +276,3 @@ void vm_execute(struct vm * vm) } } } - -int main(int argc, const char * argv[]) -{ - assert(argc >= 3); - - const char * main_class = argv[1]; - - const char ** class_filenames = &argv[2]; - int num_class_filenames = argc - 2; - - int class_hash_table_length; - struct hash_table_entry * class_hash_table = class_resolver_load_from_filenames(class_filenames, num_class_filenames, &class_hash_table_length); - - struct class_entry * class_entry = class_resolver_lookup_class(class_hash_table_length, - class_hash_table, - (const uint8_t *)main_class, - string_length(main_class)); - - const char * method_name = "main"; - int method_name_length = string_length(method_name); - struct method_info * method_info = class_resolver_lookup_method(class_entry, - (const uint8_t *)method_name, - method_name_length); - - struct vm vm; - vm.class_hash_table.entry = class_hash_table; - vm.class_hash_table.length = class_hash_table_length; - - vm.frame_stack.ix = 0; - vm.frame_stack.capacity = 1024; - struct frame frames[vm.frame_stack.capacity]; - vm.frame_stack.frame = frames; - - vm.data_stack.ix = 0; - vm.data_stack.capacity = 0x100000; - uint32_t data[vm.data_stack.capacity]; - vm.data_stack.data = data; - - struct frame * entry_frame = stack_push_frame(&vm.frame_stack, 1); - struct Code_attribute code; - code.max_locals = 0; - code.max_stack = 0; - entry_frame->code = &code; - entry_frame->local_variable = 0; - entry_frame->operand_stack = 0; - entry_frame->operand_stack_ix = 0; - - vm_static_method_call(&vm, class_entry->class_file, method_info); - vm_execute(&vm); -} diff --git a/c/frame.h b/c/frame.h index 384dbde..8a2b35c 100644 --- a/c/frame.h +++ b/c/frame.h @@ -3,6 +3,7 @@ #include #include "class_file.h" +#include "class_resolver.h" struct frame { struct Code_attribute * code; @@ -39,6 +40,38 @@ struct vm { } class_hash_table; }; +static inline struct frame * stack_push_frame(struct stack * stack, int num_frames) +{ + struct frame * frame = &stack->frame[stack->ix]; + stack->ix += num_frames; + assert(stack->ix <= stack->capacity); + return frame; +} + +static inline struct frame * stack_pop_frame(struct stack * stack, int num_frames) +{ + stack->ix -= num_frames; + assert(stack->ix >= 0); + struct frame * frame = &stack->frame[stack->ix - 1]; + return frame; +} + +static inline uint32_t * stack_push_data(struct stack * stack, int num_data) +{ + uint32_t * data = &stack->data[stack->ix]; + stack->ix += num_data; + assert(stack->ix <= stack->capacity); + return data; +} + +static inline uint32_t * stack_pop_data(struct stack * stack, int num_data) +{ + stack->ix -= num_data; + assert(stack->ix >= 0); + uint32_t * data = &stack->data[stack->ix]; + return data; +} + static inline void operand_stack_push_u32(struct frame * frame, uint32_t value) { frame->operand_stack[frame->operand_stack_ix] = value; @@ -76,5 +109,7 @@ static inline float operand_stack_pop_f32(struct frame * frame) return f; } +bool vm_initialize_class(struct vm * vm, struct class_entry * class_entry); void vm_static_method_call(struct vm * vm, struct class_file * class_file, struct method_info * method_info); void vm_method_return(struct vm * vm); +void vm_execute(struct vm * vm); diff --git a/c/main.c b/c/main.c new file mode 100644 index 0000000..4d7e780 --- /dev/null +++ b/c/main.c @@ -0,0 +1,53 @@ +#include "frame.h" +#include "class_resolver.h" +#include "string.h" + +int main(int argc, const char * argv[]) +{ + assert(argc >= 3); + + const char * main_class = argv[1]; + + const char ** class_filenames = &argv[2]; + int num_class_filenames = argc - 2; + + int class_hash_table_length; + struct hash_table_entry * class_hash_table = class_resolver_load_from_filenames(class_filenames, num_class_filenames, &class_hash_table_length); + + struct class_entry * class_entry = class_resolver_lookup_class(class_hash_table_length, + class_hash_table, + (const uint8_t *)main_class, + string_length(main_class)); + + const char * method_name = "main"; + int method_name_length = string_length(method_name); + struct method_info * method_info = class_resolver_lookup_method(class_entry, + (const uint8_t *)method_name, + method_name_length); + + struct vm vm; + vm.class_hash_table.entry = class_hash_table; + vm.class_hash_table.length = class_hash_table_length; + + vm.frame_stack.ix = 0; + vm.frame_stack.capacity = 1024; + struct frame frames[vm.frame_stack.capacity]; + vm.frame_stack.frame = frames; + + vm.data_stack.ix = 0; + vm.data_stack.capacity = 0x100000; + uint32_t data[vm.data_stack.capacity]; + vm.data_stack.data = data; + + struct frame * entry_frame = stack_push_frame(&vm.frame_stack, 1); + struct Code_attribute code; + code.max_locals = 0; + code.max_stack = 0; + entry_frame->code = &code; + entry_frame->local_variable = 0; + entry_frame->operand_stack = 0; + entry_frame->operand_stack_ix = 0; + + vm_static_method_call(&vm, class_entry->class_file, method_info); + vm_execute(&vm); +} diff --git a/p/StaticField.java b/p/StaticField.java new file mode 100644 index 0000000..9155afa --- /dev/null +++ b/p/StaticField.java @@ -0,0 +1,21 @@ +package p; + +class StaticField { + final static int foo = 1234; + + static int[] bar = {10, 11, 12, 13, 14}; + + int qux; + + static int test() { + int sum = 0; + for (int i = 0; i < bar.length; i++) { + sum += bar[i] * foo; + } + return sum; + } + + public static void main() { + test(); + } +}