From ac8e5c6361e4d6434efa3e9ffd53d08e9f14b101 Mon Sep 17 00:00:00 2001 From: ValKmjolnir Date: Sat, 15 Mar 2025 17:36:07 +0800 Subject: [PATCH] :zap: optimize incremental gc --- CMakeLists.txt | 1 + src/nasal_gc.cpp | 148 ++++++++++++++++++++-------------------- src/nasal_gc.h | 25 +++---- src/nasal_type.h | 2 +- src/natives/builtin.cpp | 26 +++---- src/util/gc_stat.cpp | 59 ++++++++++++++++ src/util/gc_stat.h | 55 +++++++++++++++ test/gc_test.nas | 17 +++-- 8 files changed, 219 insertions(+), 114 deletions(-) create mode 100644 src/util/gc_stat.cpp create mode 100644 src/util/gc_stat.h diff --git a/CMakeLists.txt b/CMakeLists.txt index a04118e..a37b4dd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -39,6 +39,7 @@ set(NASAL_OBJECT_SOURCE_FILE ${CMAKE_SOURCE_DIR}/src/natives/unix_lib.cpp ${CMAKE_SOURCE_DIR}/src/repl/repl.cpp ${CMAKE_SOURCE_DIR}/src/util/fs.cpp + ${CMAKE_SOURCE_DIR}/src/util/gc_stat.cpp ${CMAKE_SOURCE_DIR}/src/util/util.cpp ${CMAKE_SOURCE_DIR}/src/ast_dumper.cpp ${CMAKE_SOURCE_DIR}/src/ast_format.cpp diff --git a/src/nasal_gc.cpp b/src/nasal_gc.cpp index 2e0a9e8..fab1854 100644 --- a/src/nasal_gc.cpp +++ b/src/nasal_gc.cpp @@ -4,31 +4,27 @@ namespace nasal { void gc::do_mark_sweep() { - using clk = std::chrono::high_resolution_clock; - auto begin = clk::now(); - if (!in_sweep_stage) { - mark(); - in_sweep_stage = true; - current_sweep_index = memory.size() - 1; + count_mark_time(); + count_sweep_time(); +} + +void gc::count_mark_time() { + if (in_incremental_sweep_stage) { + return; } - auto mark_end = clk::now(); + + status.stamp(); + mark(); + status.elapsed_mark_time(); + + in_incremental_sweep_stage = true; + current_sweep_index = memory.size() - 1; +} + +void gc::count_sweep_time() { + status.stamp(); sweep(); - // if (!in_sweep_stage) { - // for (auto i : memory) { - // if (i->mark == nas_val::gc_status::found) { - // i->mark = nas_val::gc_status::uncollected; - // } - // } - // } - auto sweep_end = clk::now(); - - auto total_time = (sweep_end-begin).count(); - auto mark_time = (mark_end-begin).count(); - auto sweep_time = (sweep_end-mark_end).count(); - worktime += total_time; - max_time = max_time gc::concurrent_threshold() && bfs.size() > 16) { - flag_concurrent_mark_triggered = true; + status.flag_concurrent_mark_triggered = true; usize size = bfs.size(); std::thread t0(&gc::concurrent_mark, this, std::ref(bfs), 0, size/4); std::thread t1(&gc::concurrent_mark, this, std::ref(bfs), size/4, size/2); @@ -190,14 +186,17 @@ void gc::mark_map(std::vector& bfs_queue, nas_map& mp) { } void gc::sweep() { - const i64 threshold = 65536 / 4; + // if threshold is too small, too many allocated objects will be marked as "found" + // objects with "found" will be marked to "uncollected" in the next gc cycle + // this will cause memory wasting. + const i64 threshold = 4096; for (i64 it = 0; it < threshold; ++it) { if (current_sweep_index - it < 0) { break; } auto i = memory[current_sweep_index - it]; if (i->mark==nas_val::gc_status::uncollected) { - unused[static_cast(i->type)-static_cast(vm_type::vm_str)].push_back(i); + unused[static_cast(i->type)-static_cast(vm_type::vm_str)].push_back(i); i->mark = nas_val::gc_status::collected; } else if (i->mark==nas_val::gc_status::found) { i->mark = nas_val::gc_status::uncollected; @@ -205,15 +204,14 @@ void gc::sweep() { } current_sweep_index -= threshold; if (current_sweep_index < 0) { - in_sweep_stage = false; + in_incremental_sweep_stage = false; current_sweep_index = 0; } - return; } void gc::extend(const vm_type type) { - const u8 index = static_cast(type)-static_cast(vm_type::vm_str); - object_size[index] += incr[index]; + const u32 index = static_cast(type)-static_cast(vm_type::vm_str); + status.object_size[index] += incr[index]; for(u64 i = 0; i& constant_strings, const std::vector& argv) { - // initialize counters - worktime = 0; - for(u8 i = 0; i(gc_count[i]); + total += static_cast(status.gc_cycle_trigger_count[i]); std::clog << "│ " << left << setw(indent) << setfill(' ') << name[i]; - std::clog << " │ " << left << setw(indent) << setfill(' ') << gc_count[i]; - std::clog << " │ " << left << setw(indent) << setfill(' ') << alloc_count[i]; - std::clog << " │ " << left << setw(indent) << setfill(' ') << object_size[i]; - std::clog << " │\n"; + std::clog << " │ " << left << setw(indent) << setfill(' ') << status.gc_cycle_trigger_count[i]; + std::clog << " │ " << left << setw(indent) << setfill(' ') << status.alloc_count[i]; + std::clog << " │ " << left << setw(indent) << setfill(' ') << status.object_size[i]; + std::clog << " │\n" << std::internal; } std::clog << mid_line << "\n"; @@ -407,62 +404,67 @@ void gc::info() const { std::clog << " " << left << setw(indent) << setfill(' ') << " "; std::clog << " │\n" << another_mid_line << "\n"; - const auto gc_time = worktime*1.0/den*1000; std::clog << "│ " << left << setw(indent) << setfill(' ') << "gc time"; - std::clog << " │ " << setw(indent-3) << setprecision(4) << gc_time << " ms"; + std::clog << " │ " << setw(indent-3) << setprecision(4) << status.gc_time_ms() << " ms"; std::clog << setw(indent*2+7) << " " << "│\n"; - const auto avg_time = worktime*1.0/den*1000/total; std::clog << "│ " << left << setw(indent) << setfill(' ') << "avg time"; - std::clog << " │ " << setw(indent-3) << setprecision(4) << avg_time << " ms"; + std::clog << " │ " << setw(indent-3) << setprecision(4) << status.avg_time_ms() << " ms"; std::clog << setw(indent*2+7) << " " << "│\n"; - const auto max_gc = max_time*1.0/den*1000; - std::clog << "│ " << left << setw(indent) << setfill(' ') << "max gc"; - std::clog << " │ " << setw(indent-3) << setprecision(4) << max_gc << " ms"; + std::clog << "│ " << left << setw(indent) << setfill(' ') << "avg mark"; + std::clog << " │ " << setw(indent-3) << setprecision(4) << status.avg_mark_time_ms() << " ms"; + std::clog << setw(indent*2+7) << " " << "│\n"; + + std::clog << "│ " << left << setw(indent) << setfill(' ') << "avg sweep"; + std::clog << " │ " << setw(indent-3) << setprecision(4) << status.avg_sweep_time_ms() << " ms"; std::clog << setw(indent*2+7) << " " << "│\n"; - const auto max_mark = max_mark_time*1.0/den*1000; std::clog << "│ " << left << setw(indent) << setfill(' ') << "max mark"; - std::clog << " │ " << setw(indent-3) << setprecision(4) << max_mark << " ms"; + std::clog << " │ " << setw(indent-3) << setprecision(4) << status.max_mark_time_ms() << " ms"; std::clog << setw(indent*2+7) << " " << "│\n"; - const auto max_sweep = max_sweep_time*1.0/den*1000; std::clog << "│ " << left << setw(indent) << setfill(' ') << "max sweep"; - std::clog << " │ " << setw(indent-3) << setprecision(4) << max_sweep << " ms"; + std::clog << " │ " << setw(indent-3) << setprecision(4) << status.max_sweep_time_ms() << " ms"; std::clog << setw(indent*2+7) << " " << "│\n"; std::clog << "│ " << left << setw(indent) << setfill(' ') << "concurrent"; std::clog << " │ " << setw(indent) - << (flag_concurrent_mark_triggered? "true":"false"); + << (status.flag_concurrent_mark_triggered? "true":"false"); std::clog << setw(indent*2+7) << " " << "│\n"; - std::clog << last_line << "\n"; + std::clog << last_line << "\n" << std::internal; wm.restore_code_page(); } var gc::alloc(const vm_type type) { const u32 index = static_cast(type)-static_cast(vm_type::vm_str); - ++alloc_count[index]; + ++status.alloc_count[index]; // if still in incremental sweep stage? do it // if not in incremental sweep stage, run a new gc cycle - if (in_sweep_stage || unused[index].empty()) { - ++gc_count[index]; + if (in_incremental_sweep_stage) { + do_mark_sweep(); + } else if (unused[index].empty()) { + ++status.gc_cycle_trigger_count[index]; do_mark_sweep(); } // if in incremental sweep stage, but the unused list is empty, // do it until the unused list has something - while (unused[index].empty() && in_sweep_stage) { + while (unused[index].empty() && in_incremental_sweep_stage) { do_mark_sweep(); } // after all gc stages, still get empty list, extend if (unused[index].empty()) { extend(type); } + var ret = var::gcobj(unused[index].back()); ret.val.gcobj->clear(); - ret.val.gcobj->mark = in_sweep_stage + + // if incremental sweep stage, mark it as found + // but be aware that it may be collected in next gc cycle + ret.val.gcobj->mark = in_incremental_sweep_stage ? nas_val::gc_status::found : nas_val::gc_status::uncollected; unused[index].pop_back(); @@ -485,9 +487,9 @@ void gc::context_change(nas_co* co) { void gc::context_reserve() { // pc = 0 means this coroutine is finished - cort->status = running_context->pc? - nas_co::status::suspended: - nas_co::status::dead; + cort->status = running_context->pc + ? nas_co::status::suspended + : nas_co::status::dead; // store running state to coroutine cort->ctx = *running_context; diff --git a/src/nasal_gc.h b/src/nasal_gc.h index 82bdecf..18c384f 100644 --- a/src/nasal_gc.h +++ b/src/nasal_gc.h @@ -17,11 +17,12 @@ #include "nasal.h" #include "nasal_type.h" +#include "util/gc_stat.h" namespace nasal { struct free_list { - std::vector elem[gc_type_size]; + std::vector elem[GC_TYPE_SIZE]; auto& operator[](i64 index) { return elem[index]; @@ -50,7 +51,7 @@ struct gc { free_list unused; // gc free list /* heap increase size */ - u64 incr[gc_type_size] = { + u64 incr[GC_TYPE_SIZE] = { 256, // vm_str 256, // vm_vec 256, // vm_hash @@ -65,16 +66,9 @@ struct gc { u64 total_object_count = 0; /* values for analysis */ - u64 object_size[gc_type_size]; - u64 gc_count[gc_type_size]; - u64 alloc_count[gc_type_size]; - i64 worktime = 0; - i64 max_time = 0; - i64 max_mark_time = 0; - i64 max_sweep_time = 0; - bool flag_concurrent_mark_triggered = false; + gc_stat status; - bool in_sweep_stage = false; + bool in_incremental_sweep_stage = false; i64 current_sweep_index = 0; void set(context* _ctx, var* _global, usize _size) { @@ -86,6 +80,8 @@ struct gc { private: /* gc functions */ void do_mark_sweep(); + void count_mark_time(); + void count_sweep_time(); void mark(); void concurrent_mark(std::vector&, usize, usize); void mark_context_root(std::vector&); @@ -113,13 +109,12 @@ public: void context_reserve(); public: - double get_gc_time_ms() const { - const auto den = std::chrono::high_resolution_clock::duration::period::den; - return worktime * 1.0 / den * 1000.0; + f64 get_gc_time_ms() const { + return status.gc_time_ms(); } // not very accurate - double get_total_memory() const { + f64 get_total_memory() const { return total_object_count * 3.5 / 1024.0 / 1024.0; } diff --git a/src/nasal_type.h b/src/nasal_type.h index 7cda862..4fb622b 100644 --- a/src/nasal_type.h +++ b/src/nasal_type.h @@ -35,7 +35,7 @@ enum class vm_type: u8 { }; // size of gc object type -const u32 gc_type_size = +const u32 GC_TYPE_SIZE = static_cast(vm_type::vm_type_size_max) - static_cast(vm_type::vm_str); diff --git a/src/natives/builtin.cpp b/src/natives/builtin.cpp index bc9602b..c1a2307 100644 --- a/src/natives/builtin.cpp +++ b/src/natives/builtin.cpp @@ -723,27 +723,17 @@ var builtin_gcextend(context* ctx, gc* ngc) { } var builtin_gcinfo(context* ctx, gc* ngc) { - const auto den = std::chrono::high_resolution_clock::duration::period::den; var res = ngc->alloc(vm_type::vm_hash); - - f64 total = 0; - for(u32 i = 0; igc_count[i]; - } - - auto& map = res.hash().elems; - const auto worktime = static_cast(ngc->worktime); - const auto max_time = static_cast(ngc->max_time); - const auto max_mark_time = static_cast(ngc->max_mark_time); - const auto max_sweep_time = static_cast(ngc->max_sweep_time); - // using ms - map["total"] = var::num(worktime/den*1000); - map["average"] = var::num(worktime/den*1000/total); - map["max_gc"] = var::num(max_time/den*1000); - map["max_mark"] = var::num(max_mark_time/den*1000); - map["max_sweep"] = var::num(max_sweep_time/den*1000); + map["total"] = var::num(ngc->status.gc_time_ms()); + map["average"] = var::num(ngc->status.avg_time_ms()); + map["mark_count"] = var::num(ngc->status.total_mark_count); + map["sweep_count"] = var::num(ngc->status.total_sweep_count); + map["avg_mark"] = var::num(ngc->status.avg_mark_time_ms()); + map["avg_sweep"] = var::num(ngc->status.avg_sweep_time_ms()); + map["max_mark"] = var::num(ngc->status.max_mark_time_ms()); + map["max_sweep"] = var::num(ngc->status.max_sweep_time_ms()); return res; } diff --git a/src/util/gc_stat.cpp b/src/util/gc_stat.cpp new file mode 100644 index 0000000..5bac2f6 --- /dev/null +++ b/src/util/gc_stat.cpp @@ -0,0 +1,59 @@ +#include "util/gc_stat.h" + +#include + +namespace nasal { + +void gc_stat::init() { + for (i64 i = 0; i < GC_TYPE_SIZE; i++) { + object_size[i] = 0; + alloc_count[i] = 0; + gc_cycle_trigger_count[i] = 0; + } + + total_mark_count = 0; + total_sweep_count = 0; + + total_mark_time = 0; + total_sweep_time = 0; + + max_mark_time = 0; + max_sweep_time = 0; + + flag_concurrent_mark_triggered = false; +} + +f64 gc_stat::gc_time_ms() const { + const auto den = std::chrono::high_resolution_clock::duration::period::den; + return ((total_mark_time + total_sweep_time) * 1000.0) / den; +} + +f64 gc_stat::avg_time_ms() const { + u64 total_gc_cycle = 0; + for (i64 i = 0; i < GC_TYPE_SIZE; i++) { + total_gc_cycle += gc_cycle_trigger_count[i]; + } + return gc_time_ms() / total_mark_count; +} + +f64 gc_stat::avg_mark_time_ms() const { + const auto den = std::chrono::high_resolution_clock::duration::period::den; + return (total_mark_time * 1000.0) / den / total_mark_count; +} + +f64 gc_stat::avg_sweep_time_ms() const { + const auto den = std::chrono::high_resolution_clock::duration::period::den; + return (total_sweep_time * 1000.0) / den / total_sweep_count; +} + +f64 gc_stat::max_mark_time_ms() const { + const auto den = std::chrono::high_resolution_clock::duration::period::den; + return (max_mark_time * 1000.0) / den; +} + +f64 gc_stat::max_sweep_time_ms() const { + const auto den = std::chrono::high_resolution_clock::duration::period::den; + return (max_sweep_time * 1000.0) / den; +} + +} \ No newline at end of file diff --git a/src/util/gc_stat.h b/src/util/gc_stat.h new file mode 100644 index 0000000..ca0083e --- /dev/null +++ b/src/util/gc_stat.h @@ -0,0 +1,55 @@ +#include "nasal.h" +#include "nasal_type.h" + +#include + +namespace nasal { + +struct gc_stat { + u64 object_size[GC_TYPE_SIZE]; + u64 gc_cycle_trigger_count[GC_TYPE_SIZE]; + u64 alloc_count[GC_TYPE_SIZE]; + + u64 total_mark_count = 0; + u64 total_sweep_count = 0; + + i64 total_mark_time = 0; + i64 total_sweep_time = 0; + + i64 max_mark_time = 0; + i64 max_sweep_time = 0; + + bool flag_concurrent_mark_triggered = false; + + std::chrono::time_point start_time; + + void stamp() { + start_time = std::chrono::high_resolution_clock::now(); + } + + void elapsed_mark_time() { + auto end = std::chrono::high_resolution_clock::now(); + auto mark_time = (end - start_time).count(); + ++total_mark_count; + total_mark_time += mark_time; + max_mark_time = max_mark_time > mark_time ? max_mark_time : mark_time; + } + + void elapsed_sweep_time() { + auto end = std::chrono::high_resolution_clock::now(); + auto sweep_time = (end - start_time).count(); + ++total_sweep_count; + total_sweep_time += sweep_time; + max_sweep_time = max_sweep_time > sweep_time ? max_sweep_time : sweep_time; + } + + void init(); + f64 gc_time_ms() const; + f64 avg_time_ms() const; + f64 avg_mark_time_ms() const; + f64 avg_sweep_time_ms() const; + f64 max_mark_time_ms() const; + f64 max_sweep_time_ms() const; +}; + +} \ No newline at end of file diff --git a/test/gc_test.nas b/test/gc_test.nas index cfcdf4c..814afff 100644 --- a/test/gc_test.nas +++ b/test/gc_test.nas @@ -23,15 +23,18 @@ var test_func = func(test_processes...) { ); var info = runtime.gc.info(); - println("+##-gc-----------------"); - println("| average : ", info.average, " ms"); - println("| max gc : ", info.max_gc, " ms"); - println("| max mark : ", info.max_mark, " ms"); - println("| max sweep: ", info.max_sweep, " ms"); - println("+----------------------"); + println("+##-gc----------------------"); + println("| avg gc cycle : ", int(1000 / info.average), " exec/sec"); + println("| avg mark : ", int(1000 / info.avg_mark), " exec/sec"); + println("| avg sweep : ", int(1000 / info.avg_sweep), " exec/sec"); + println("| mark count : ", info.mark_count); + println("| sweep count : ", info.sweep_count); + println("| max mark : ", info.max_mark, " ms"); + println("| max sweep : ", info.max_sweep, " ms"); + println("+---------------------------"); } -var MAX_ITER_NUM = 1e5; +var MAX_ITER_NUM = 0.5e5; var append_vec = func { var res = [];