From dd4795c46be043e6b0b236c82d0a1cd0f1bef9a9 Mon Sep 17 00:00:00 2001 From: Zack Buhman Date: Thu, 9 Jan 2025 16:40:50 -0600 Subject: [PATCH] gc: add mark+sweep garbage collector --- c/backtrace.c | 11 ++- c/class_resolver.c | 18 +++- c/class_resolver.h | 1 + c/execute.c | 25 ++--- c/gc.c | 159 +++++++++++++++++++++++++++++++ c/gc.h | 6 ++ c/main_hosted.c | 4 + c/memory_allocator.c | 32 ++++++- c/memory_allocator.h | 5 + c/native_types.h | 55 ++++++++++- classes/java/io/PrintStream.java | 2 +- classes/test/TestGC.java | 16 ++++ java.mk | 6 +- 13 files changed, 312 insertions(+), 28 deletions(-) create mode 100644 c/gc.c create mode 100644 c/gc.h create mode 100644 classes/test/TestGC.java diff --git a/c/backtrace.c b/c/backtrace.c index 7aa73b8..46f0686 100644 --- a/c/backtrace.c +++ b/c/backtrace.c @@ -6,7 +6,16 @@ struct backtrace * backtrace_allocate(struct vm * vm) { - struct backtrace * backtrace = memory_allocate((sizeof (struct backtrace))); + struct class_entry * object_class_entry = class_resolver_lookup_class(vm->class_hash_table.length, + vm->class_hash_table.entry, + (const uint8_t *)"java/lang/Object", + 16); + debugf("object class entry: %p\n", object_class_entry); + + int num_fields = (sizeof (struct backtrace)) / (sizeof (void *)); + struct objectref * objectref = obj_allocate(num_fields); + objectref->class_entry = object_class_entry; + struct backtrace * backtrace = (struct backtrace *)&objectref->aref[0]; backtrace->num_entries = vm->frame_stack.ix; int backtrace_entries_size = (sizeof (struct backtrace_entry)) * backtrace->num_entries; backtrace->entry = memory_allocate(backtrace_entries_size); diff --git a/c/class_resolver.c b/c/class_resolver.c index dea31a5..b546ca8 100644 --- a/c/class_resolver.c +++ b/c/class_resolver.c @@ -151,6 +151,7 @@ static int32_t class_resolver_create_fields_hash_table(int class_hash_table_leng class_entry->fields.length = fields_hash_table_length; class_entry->fields.entry = fields_hash_table; class_entry->instance_fields_count = instance_index; + class_entry->static_fields_count = static_index; return static_index; } @@ -233,6 +234,7 @@ struct hash_table_entry * class_resolver_load_from_buffers(const uint8_t ** buff class_entry[i].class_file = class_file; class_entry[i].initialization_state = CLASS_UNINITIALIZED; + struct constant * class_constant = &class_file->constant_pool[class_file->this_class - 1]; assert(class_constant->tag == CONSTANT_Class); struct constant * class_name_constant = &class_file->constant_pool[class_constant->class.name_index - 1]; @@ -596,16 +598,20 @@ struct objectref * class_resolver_lookup_string(int class_hash_table_length, return class_entry->attribute_entry[string_index - 1].string_objectref; } - struct constant * utf8_constant = &class_entry->class_file->constant_pool[string_index - 1]; + struct constant * string_constant = &class_entry->class_file->constant_pool[string_index - 1]; + assert(string_constant->tag == CONSTANT_String); + + struct constant * utf8_constant = &class_entry->class_file->constant_pool[string_constant->string.string_index - 1]; assert(utf8_constant->tag == CONSTANT_Utf8); struct class_entry * string_class_entry = class_resolver_lookup_class(class_hash_table_length, class_hash_table, (const uint8_t *)"java/lang/String", 16); + debugf("string class entry: %p\n", string_class_entry); - int32_t size = utf8_constant->utf8.length + (sizeof (struct arrayref)); - struct arrayref * arrayref = memory_allocate(size); + int32_t count = utf8_constant->utf8.length; + struct arrayref * arrayref = prim_array_allocate(1, count); assert(arrayref != nullptr); arrayref->class_entry = nullptr; // byte[] arrayref->length = utf8_constant->utf8.length; @@ -614,9 +620,13 @@ struct objectref * class_resolver_lookup_string(int class_hash_table_length, } assert(string_class_entry != nullptr); - struct objectref * objectref = memory_allocate((sizeof (struct objectref)) + (sizeof (void *))); + int fields_count = string_class_entry->instance_fields_count; + struct objectref * objectref = obj_allocate(fields_count); assert(objectref != nullptr); objectref->class_entry = string_class_entry; + for (int i = 0; i < fields_count; i++) { + objectref->oref[i] = nullptr; + } objectref->aref[0] = arrayref; // cache the result diff --git a/c/class_resolver.h b/c/class_resolver.h index 48c8e36..bd798e3 100644 --- a/c/class_resolver.h +++ b/c/class_resolver.h @@ -37,6 +37,7 @@ struct class_entry { struct class_file * class_file; enum initialization_state initialization_state; union attribute_entry * attribute_entry; + int32_t static_fields_count; int32_t * static_fields; int32_t instance_fields_count; diff --git a/c/execute.c b/c/execute.c index 1317c8c..13f6056 100644 --- a/c/execute.c +++ b/c/execute.c @@ -71,9 +71,7 @@ void op_anewarray(struct vm * vm, uint32_t index) index); int32_t count = operand_stack_pop_u32(vm->current_frame); - int32_t element_size = (sizeof (void *)); - int32_t size = element_size * count + (sizeof (struct arrayref)); - struct arrayref * arrayref = memory_allocate(size); + struct arrayref * arrayref = ref_array_allocate(count); assert(arrayref != nullptr); arrayref->length = count; arrayref->class_entry = class_entry; @@ -1429,7 +1427,7 @@ void op_ldc(struct vm * vm, uint32_t index) struct objectref * objectref = class_resolver_lookup_string(vm->class_hash_table.length, vm->class_hash_table.entry, vm->current_frame->class_entry, - constant->string.string_index); + index); operand_stack_push_ref(vm->current_frame, objectref); } else { assert(false); @@ -1456,7 +1454,7 @@ void op_ldc_w(struct vm * vm, uint32_t index) struct objectref * objectref = class_resolver_lookup_string(vm->class_hash_table.length, vm->class_hash_table.entry, vm->current_frame->class_entry, - constant->string.string_index); + index); operand_stack_push_ref(vm->current_frame, objectref); } else { assert(false); @@ -1658,9 +1656,15 @@ void op_monitorexit(struct vm * vm) static struct arrayref * _multiarray(struct vm * vm, int32_t * dims, int num_dimensions, int level, uint8_t * type, uint8_t * type_end) { int32_t count = dims[level]; - int32_t element_size = field_size_array(*type); - int32_t size = element_size * count + (sizeof (struct arrayref)); - struct arrayref * arrayref = memory_allocate(size); + struct arrayref * arrayref; + int32_t element_size; + if (*type == 'L' || *type == '[') { + element_size = (sizeof (void *)); + arrayref = ref_array_allocate(count); + } else { + element_size = field_size_array(*type); + arrayref = prim_array_allocate(element_size, count); + } assert(arrayref != nullptr); arrayref->length = count; arrayref->class_entry = nullptr; @@ -1729,7 +1733,7 @@ void op_new(struct vm * vm, uint32_t index) reference to the instance, is pushed onto the operand stack. */ int fields_count = class_entry->instance_fields_count; - struct objectref * objectref = memory_allocate(fields_count * 4 + 4); + struct objectref * objectref = obj_allocate(fields_count); assert(objectref != nullptr); objectref->class_entry = class_entry; for (int i = 0; i < fields_count; i++) { @@ -1744,8 +1748,7 @@ void op_newarray(struct vm * vm, uint32_t atype) { int32_t count = operand_stack_pop_u32(vm->current_frame); int32_t element_size = array_element_size(atype); - int32_t size = element_size * count + (sizeof (struct arrayref)); - struct arrayref * arrayref = memory_allocate(size); + struct arrayref * arrayref = prim_array_allocate(element_size, count); assert(arrayref != nullptr); arrayref->length = count; arrayref->class_entry = nullptr; diff --git a/c/gc.c b/c/gc.c new file mode 100644 index 0000000..cd217be --- /dev/null +++ b/c/gc.c @@ -0,0 +1,159 @@ +#include "frame.h" +#include "memory_allocator.h" +#include "printf.h" + +static void walk_address(void * address); + +static void walk_object(struct objectref * objectref) +{ + if (objectref->tag.mark != 0) + return; + objectref->tag.mark = 1; + + for (int i = 0; i < objectref->class_entry->instance_fields_count; i++) { + void * address = objectref->aref[i]; + walk_address(address); + } +} + +static void walk_ref_array(struct arrayref * arrayref) +{ + if (arrayref->tag.mark != 0) + return; + arrayref->tag.mark = 1; + + if (arrayref->class_entry == nullptr) { + // this is an array of arrayrefs + for (int i = 0; i < arrayref->length; i++) { + if (arrayref->aref[i] != nullptr) { + walk_ref_array(arrayref->aref[i]); + } + } + } else { + // this is an array of objectrefs + for (int i = 0; i < arrayref->length; i++) { + if (arrayref->oref[i] != nullptr) { + walk_object(arrayref->oref[i]); + } + } + } +} + +static void walk_prim_array(struct arrayref * arrayref) +{ + if (arrayref->tag.mark != 0) + return; + arrayref->tag.mark = 1; +} + +static void walk_address(void * address) +{ + if (address == nullptr) + return; + + if (!memory_is_allocated(address)) + return; + + struct tag * tag = (struct tag *)address; + + if (tag->type == TAG_TYPE_OBJECT) { + walk_object((struct objectref *)address); + } else if (tag->type == TAG_TYPE_REF_ARRAY) { + walk_ref_array((struct arrayref *)address); + } else if (tag->type == TAG_TYPE_PRIM_ARRAY) { + walk_prim_array((struct arrayref *)address); + } else { + assert(false); + } +} + +static void walk_frame(struct frame * frame) +{ + int num_local_variables = frame->code_attribute->max_locals; + int num_stack_variables = frame->operand_stack_ix; + + for (int i = 0; i < num_local_variables; i++) { + void * address = (void *)frame->local_variable[i]; + walk_address(address); + } + + for (int i = 0; i < num_stack_variables; i++) { + void * address = (void *)frame->operand_stack[i]; + walk_address(address); + } +} + +static void walk_static_fields(struct class_entry * class_entry) +{ + for (int i = 0; i < class_entry->static_fields_count; i++) { + void * address = (void *)class_entry->static_fields[i]; + walk_address(address); + } +} + +static void walk_string_attributes(struct class_entry * class_entry) +{ + struct class_file * class_file = class_entry->class_file; + for (int i = 0; i < class_file->constant_pool_count - 1; i++) { + struct constant * constant = &class_file->constant_pool[i]; + if (constant->tag != CONSTANT_String) + continue; + + struct objectref * objectref = class_entry->attribute_entry[i].string_objectref; + if (objectref != nullptr) { + walk_object(objectref); + } + } +} + +static void walk_class_hash_table_entry(struct hash_table_entry * entry) +{ + if (entry->key == nullptr) + return; + + struct class_entry * class_entry = (struct class_entry *)entry->value; + assert(class_entry != nullptr); + walk_static_fields(class_entry); + + walk_string_attributes(class_entry); + + if (entry->next != nullptr) + walk_class_hash_table_entry(entry->next); +} + +static void walk_class_hash_table(int length, struct hash_table_entry * entry) +{ + for (int i = 0; i < length; i++) + walk_class_hash_table_entry(&entry[i]); +} + +static void walk_vm(struct vm * vm) +{ + walk_class_hash_table(vm->class_hash_table.length, + vm->class_hash_table.entry); + + for (int i = 0; i < vm->frame_stack.ix; i++) { + walk_frame(&vm->frame_stack.frame[i]); + } +} + +void gc_mark(struct vm * vm) +{ + walk_vm(vm); +} + +static void sweep_address(void * address) +{ + struct tag * tag = (struct tag *)address; + printf("%p mark: %d\n", address, tag->mark); + int mark = tag->mark; + tag->mark = 0; + if (mark == 0) { + memory_free(address); + } +} + +void gc_sweep() +{ + memory_iterate_allocated(sweep_address); +} diff --git a/c/gc.h b/c/gc.h new file mode 100644 index 0000000..9341ea7 --- /dev/null +++ b/c/gc.h @@ -0,0 +1,6 @@ +#pragma once + +#include "frame.h" + +void gc_mark(struct vm * vm); +void gc_sweep(); diff --git a/c/main_hosted.c b/c/main_hosted.c index 3ee43f3..55fbe05 100644 --- a/c/main_hosted.c +++ b/c/main_hosted.c @@ -9,6 +9,7 @@ #include "malloc.h" #include "memory_allocator.h" #include "backtrace.h" +#include "gc.h" static struct hash_table_entry * load_from_filenames(const char * filenames[], int length, int * hash_table_length) { @@ -56,4 +57,7 @@ int main(int argc, const char * argv[]) main_class_length); vm_execute(vm); + + gc_mark(vm); + gc_sweep(); } diff --git a/c/memory_allocator.c b/c/memory_allocator.c index ef34cf0..d61e88f 100644 --- a/c/memory_allocator.c +++ b/c/memory_allocator.c @@ -2,10 +2,11 @@ #include "assert.h" #include "printf.h" +#include "memory_allocator.h" #define block_power (5UL) #define block_size (1UL << block_power) -static uint8_t memory[0x100]; +static uint8_t memory[0x200]; //static uint8_t memory[0x100000]; #define free_list_length ((sizeof (memory)) / block_size) static uint8_t free_list[free_list_length]; @@ -34,7 +35,7 @@ static inline uint32_t find_contiguous_blocks(uint32_t blocks, int * zero_crossi for (uint32_t i = 0; i < blocks; i++) { if (free_list[free_ix + i] != 0) - return i + 1; + return i; } return blocks; } @@ -44,6 +45,7 @@ void * memory_allocate(uint32_t size) assert(size != 0); uint32_t blocks = ((size + (block_size - 1)) & ~(block_size - 1)) >> block_power; + assert(blocks > 0); int zero_crossings = 0; while (true) { @@ -52,9 +54,13 @@ void * memory_allocate(uint32_t size) return nullptr; // memory allocation failed if (ix_offset == blocks) break; - free_ix = (free_ix + ix_offset) & (free_list_length - 1); + uint32_t next_free_ix = (free_ix + ix_offset + 1) & (free_list_length - 1); + if (next_free_ix < free_ix) + zero_crossings += 1; + free_ix = next_free_ix; } + assert(free_list[free_ix] == 0); free_list[free_ix] = START | ALLOCATED; for (int i = 1; i < blocks; i++) { free_list[free_ix + i] = ALLOCATED; @@ -63,15 +69,21 @@ void * memory_allocate(uint32_t size) void * mem = &memory[free_ix << block_power]; free_ix = (free_ix + blocks) & (free_list_length - 1); + + printf("memory allocate: %p\n", mem); + return mem; } void memory_free(void * p) { + printf("memory free: %p\n", p); + uint8_t * buf = (uint8_t *)p; assert(buf >= memory); uint32_t address_index = buf - memory; assert(address_index < (sizeof (memory))); + assert((address_index & (~(block_size - 1))) == address_index); uint32_t free_list_index = address_index >> block_power; assert((free_list[free_list_index] & START) != 0); @@ -96,8 +108,18 @@ bool memory_is_allocated(void * p) return (free_list[free_list_index] & START) != 0; } -#if 1 -void print_free_list() +void memory_iterate_allocated(memory_iterate_func_t func) +{ + for (int i = 0; i < free_list_length; i++) { + if ((free_list[i] & START) != 0) { + void * address = &memory[i << block_power]; + func(address); + } + } +} + +#if 0 +static void print_free_list() { for (int i = 0; i < free_list_length; i++) { printf("%d ", free_list[i]); diff --git a/c/memory_allocator.h b/c/memory_allocator.h index 2ace29e..d65829e 100644 --- a/c/memory_allocator.h +++ b/c/memory_allocator.h @@ -1,5 +1,10 @@ +#pragma once + #include void memory_reset_free_list(); void * memory_allocate(uint32_t size); void memory_free(void * p); +bool memory_is_allocated(void * p); +typedef void (* memory_iterate_func_t)(void * address); +void memory_iterate_allocated(memory_iterate_func_t func); diff --git a/c/native_types.h b/c/native_types.h index 6a60e26..82b2e8b 100644 --- a/c/native_types.h +++ b/c/native_types.h @@ -4,11 +4,24 @@ #include #include "class_resolver.h" +#include "memory_allocator.h" -struct object_arrayref; +enum tag_type { + TAG_TYPE_OBJECT = -30741, + TAG_TYPE_REF_ARRAY = 23240, + TAG_TYPE_PRIM_ARRAY = -5251, +}; + +struct tag { + int8_t mark; + int8_t _res; + int16_t type; +}; + +static_assert((sizeof (struct tag)) == 4); struct objectref { - int32_t _res; + struct tag tag; struct class_entry * class_entry; union { struct objectref * oref[0]; @@ -20,8 +33,9 @@ struct objectref { static_assert((sizeof (struct objectref)) == 8); struct arrayref { - int32_t length; // length position must match primitive_arrayref + struct tag tag; struct class_entry * class_entry; + int32_t length; union { // object array: struct objectref * oref[0]; @@ -34,7 +48,7 @@ struct arrayref { }; }; -static_assert((sizeof (struct arrayref)) == 8); +static_assert((sizeof (struct arrayref)) == 12); enum ARRAY_TYPE { T_BOOLEAN = 4, // 1 byte @@ -71,3 +85,36 @@ static inline int array_element_size(int atype) break; } } + +static inline struct arrayref * prim_array_allocate(int element_size, int count) +{ + int32_t size = count * element_size + (sizeof (struct arrayref)); + struct arrayref * arrayref = memory_allocate(size); + if (arrayref != nullptr) { + arrayref->tag.type = TAG_TYPE_PRIM_ARRAY; + arrayref->tag.mark = 0; + } + return arrayref; +} + +static inline struct arrayref * ref_array_allocate(int count) +{ + int32_t size = count * (sizeof (void *)) + (sizeof (struct arrayref)); + struct arrayref * arrayref = memory_allocate(size); + if (arrayref != nullptr) { + arrayref->tag.type = TAG_TYPE_REF_ARRAY; + arrayref->tag.mark = 0; + } + return arrayref; +} + +static inline struct objectref * obj_allocate(int num_fields) +{ + int32_t size = num_fields * (sizeof (void *)) + (sizeof (struct objectref)); + struct objectref * objectref = memory_allocate(size); + if (objectref != nullptr) { + objectref->tag.type = TAG_TYPE_OBJECT; + objectref->tag.mark = 0; + } + return objectref; +} diff --git a/classes/java/io/PrintStream.java b/classes/java/io/PrintStream.java index 297a9bf..070e254 100644 --- a/classes/java/io/PrintStream.java +++ b/classes/java/io/PrintStream.java @@ -8,7 +8,7 @@ public class PrintStream public PrintStream() { } - private final byte[] newline = {'\n'}; + private static final byte[] newline = {'\n'}; public static native void write(byte[] buf); diff --git a/classes/test/TestGC.java b/classes/test/TestGC.java new file mode 100644 index 0000000..bf79e23 --- /dev/null +++ b/classes/test/TestGC.java @@ -0,0 +1,16 @@ +package test; + +class TestGC { + static TestGC static_tgc; + TestGC instance_tgc; + + void main() { + System.out.println("static new TestGC"); + TestGC tgc = new TestGC(); + static_tgc = tgc; + System.out.println("instance new TestGC"); + tgc.instance_tgc = new TestGC(); + System.out.println("stack new TestGC"); + TestGC stack = new TestGC(); + } +} diff --git a/java.mk b/java.mk index 38cad90..2f06043 100644 --- a/java.mk +++ b/java.mk @@ -2,7 +2,8 @@ # javac $< %.class: %.java - javac -Xlint:-options --source 8 --target 8 --boot-class-path . $< + cd ./classes ; \ + javac -Xlint:-options --source 8 --target 8 --boot-class-path . $(<:classes/%=%) OBJ = \ c/decode.o \ @@ -22,7 +23,8 @@ OBJ = \ c/fatal.o \ c/parse_type.o \ c/backtrace.o \ - c/find_attribute.o + c/find_attribute.o \ + c/gc.o MAIN_DREAMCAST_OBJ = \ c/main_dreamcast.o \