optimize gc but get new issue
Some checks are pending
Nasal Interpreter Test / mac-aarch64 (push) Waiting to run
Nasal Interpreter Test / linux-x86_64 (push) Waiting to run

Signed-off-by: ValKmjolnir <lhk101lhk101@qq.com>
This commit is contained in:
ValKmjolnir
2026-04-14 00:16:43 +08:00
parent db9d59d030
commit 01a51a78c5
7 changed files with 81 additions and 20 deletions

View File

@@ -115,6 +115,15 @@ void gc::mark_func(std::queue<var>& bfs_queue, nas_func& function) {
}
void gc::mark_upval(std::queue<var>& bfs_queue, nas_upval& upval) {
if (upval.on_stack) {
for (u64 i = 0; i < upval.size; ++i) {
if (upval.stack_frame_offset[i].type > vm_type::vm_num) {
bfs_queue.push(upval.stack_frame_offset[i]);
}
}
return;
}
for (auto& i : upval.elems) {
if (i.type > vm_type::vm_num) {
bfs_queue.push(i);
@@ -132,7 +141,7 @@ void gc::mark_ghost(std::queue<var>& bfs_queue, nas_ghost& ghost) {
void gc::mark_co(std::queue<var>& bfs_queue, nas_co& co) {
bfs_queue.push(co.ctx.funcr);
bfs_queue.push(co.ctx.upvalr);
for (var* i = co.ctx.stack; i<=co.ctx.top; ++i) {
for (var* i = co.ctx.stack; i <= co.ctx.top; ++i) {
if (i->type > vm_type::vm_num) {
bfs_queue.push(*i);
}
@@ -151,7 +160,7 @@ void gc::sweep() {
// if threshold is too small, too many allocated objects will be marked as "found"
// objects with "found" will be marked to "uncollected" in the next gc cycle
// this will cause memory wasting.
const i64 threshold = 4096;
const i64 threshold = status.calc_sweep_threshold();
for (i64 it = 0; it < threshold; ++it) {
const auto index = current_sweep_index - it;
if (index < 0) {

View File

@@ -112,10 +112,12 @@ std::ostream& operator<<(std::ostream& out, nas_func& func) {
}
out << ") {..}";
out << " entry: 0x" << std::hex << func.entry << std::dec;
return out;
}
void nas_func::clear() {
entry = 0;
dynamic_parameter_index = -1;
local.clear();
upval.clear();

View File

@@ -250,13 +250,22 @@ public:
nas_upval(): on_stack(true), size(0), stack_frame_offset(nullptr) {}
var& operator[](usize n) {
return on_stack? stack_frame_offset[n] : elems[n];
return on_stack ? stack_frame_offset[n] : elems[n];
}
void clear() {
on_stack = true;
elems.clear();
size = 0;
stack_frame_offset = nullptr;
}
void move_from_stack() {
on_stack = false;
elems.resize(size);
for (u64 i = 0; i < size; ++i) {
elems[i] = stack_frame_offset[i];
}
}
};

View File

@@ -434,9 +434,9 @@ inline void vm::o_newf() {
func.upval = ctx.funcr.func().upval;
// function created in the same local scope shares same closure
var upval = (ctx.upvalr.is_nil())?
ngc.alloc(vm_type::vm_upval):
ctx.upvalr;
var upval = (ctx.upvalr.is_nil())
? ngc.alloc(vm_type::vm_upval)
: ctx.upvalr;
// if no upval scope exists, now it's time to create one
if (ctx.upvalr.is_nil()) {
upval.upval().size = ctx.funcr.func().local_size;
@@ -1231,13 +1231,7 @@ inline void vm::o_ret() {
// synchronize upvalue
if (up.is_upval()) {
auto& upval = up.upval();
auto size = func.func().local_size;
upval.on_stack = false;
upval.elems.resize(size);
for (u64 i = 0; i < size; ++i) {
upval.elems[i] = local[i];
}
up.upval().move_from_stack();
}
// cannot use gc.cort to judge,

View File

@@ -56,6 +56,20 @@ f64 gc_stat::max_sweep_time_ms() const {
return (max_sweep_time * 1000.0) / den;
}
i64 gc_stat::calc_sweep_threshold() {
const i64 min_threshold = 4096;
if (!total_sweep_count) {
return min_threshold;
}
// expect max sweep time = 0.1 ms
last_sweep_threshold = static_cast<i64>(0.1f / avg_sweep_time_ms() * last_sweep_threshold);
if (last_sweep_threshold < min_threshold) {
last_sweep_threshold = min_threshold;
}
return last_sweep_threshold;
}
void gc_stat::dump_info() const {
util::windows_code_page_manager wm;
wm.set_utf8_output();

View File

@@ -20,6 +20,7 @@ struct gc_stat {
i64 max_mark_time = 0;
i64 max_sweep_time = 0;
i64 last_sweep_threshold = 512;
std::chrono::time_point<std::chrono::high_resolution_clock> start_time;
@@ -50,6 +51,7 @@ struct gc_stat {
f64 avg_sweep_time_ms() const;
f64 max_mark_time_ms() const;
f64 max_sweep_time_ms() const;
i64 calc_sweep_threshold();
void dump_info() const;
};

View File

@@ -1,6 +1,13 @@
use std.runtime;
use std.os;
var prev_info = runtime.gc.info();
var delta = func(prev, curr, member) {
var d = int(curr[member] - prev[member]);
var t = d < 0 ? "-" : (d == 0 ? "" : "+");
return " (" ~ t ~ d ~ ")";
}
var test_func = func(test_processes...) {
var test_process_total = maketimestamp();
test_process_total.stamp();
@@ -17,6 +24,10 @@ var test_func = func(test_processes...) {
var end_info = runtime.gc.info();
var gc_total_end = end_info.total;
var duration = time_stamp.elapsedMSec();
if (duration == nil) {
println(time_stamp.elapsedMSec);
die("remaining issue: time_stamp.elapsedMSec() entry = 0x559.");
}
print(" ", duration, " ms,\tgc ",
int((gc_total_end-gc_total_begin)*100/duration), "%,\t",
int(1000/(duration/size(test_processes))*10)/10, " test(s)/sec",
@@ -24,15 +35,16 @@ var test_func = func(test_processes...) {
);
var info = runtime.gc.info();
println("+----##-gc----------------------------");
println("+----##-gc-------------------------------");
println("| avg gc cycle : ", int(1000 / info.average), " exec/sec");
println("| avg mark : ", int(1000 / info.avg_mark), " exec/sec");
println("| avg sweep : ", int(1000 / info.avg_sweep), " exec/sec");
println("| mark count : ", info.mark_count);
println("| sweep count : ", info.sweep_count);
println("| mark count : ", info.mark_count, delta(prev_info, info, "mark_count"));
println("| sweep count : ", info.sweep_count, delta(prev_info, info, "sweep_count"));
println("| max mark : ", info.max_mark, " ms");
println("| max sweep : ", info.max_sweep, " ms");
println("+-------------------------------------");
println("+----------------------------------------");
prev_info = info;
}
var MAX_ITER_NUM = 0.5e5;
@@ -99,13 +111,28 @@ var append_tree = func {
var res = [];
for (var i=0; i<MAX_ITER_NUM; i+=1) {
append(res, {
a: {b: {c:[]}},
a: {b: {c:[1, 2, 3, 4]}},
d: {e: {}},
j: {k: {l:{m:[{a:{b:{c:[{}]}}}]}}}
j: {k: {l:{m:[{a:{b:{c:[{}, {}]}}}]}}}
});
}
}
var append_deep_tree = func {
var res = {};
var tmp = [];
for (var i = 0; i < MAX_ITER_NUM; i += 1) {
tmp = [[[tmp]]];
}
res["vec"] = tmp;
tmp = {};
for (var i = 0; i < MAX_ITER_NUM; i += 1) {
tmp = {a : {a : {a : tmp}}};
}
res["hash"] = tmp;
return res;
}
for (var i = 0; i < 10; i += 1) {
test_func(
append_vec,
@@ -154,6 +181,10 @@ for (var i = 0; i < 10; i += 1) {
append_tree,
append_tree,
append_tree
append_tree,
append_deep_tree,
append_deep_tree,
append_deep_tree
);
}