optimize gc extend
Some checks failed
Nasal Interpreter Test / mac-aarch64 (push) Has been cancelled
Nasal Interpreter Test / linux-x86_64 (push) Has been cancelled

This commit is contained in:
ValKmjolnir
2026-04-10 00:01:54 +08:00
parent a4cc792010
commit cbdfcc396a
5 changed files with 78 additions and 96 deletions

View File

@@ -61,11 +61,11 @@ void ghost_for_test_destructor(void* ptr) {
std::cout << "}\n";
}
void ghost_for_test_gc_marker(void* ptr, std::vector<var>* bfs_queue) {
void ghost_for_test_gc_marker(void* ptr, std::queue<var>* bfs_queue) {
std::cout << "ghost_for_test::mark (0x";
std::cout << std::hex << reinterpret_cast<u64>(ptr) << std::dec << ") {\n";
bfs_queue->push_back(static_cast<ghost_obj*>(ptr)->test_string);
bfs_queue->push(static_cast<ghost_obj*>(ptr)->test_string);
std::cout << " mark 0x" << std::hex;
std::cout << reinterpret_cast<u64>(ptr) << std::dec << "->test_string;\n";

View File

@@ -27,73 +27,37 @@ void gc::count_sweep_time() {
}
void gc::mark() {
std::vector<var> bfs;
std::queue<var> bfs;
mark_context_root(bfs);
// concurrent mark
if (memory.size() > UINT16_MAX * 16 && bfs.size() > 16) {
auto size = bfs.size();
std::thread t0(&gc::concurrent_mark, this, std::ref(bfs), 0, size/4);
std::thread t1(&gc::concurrent_mark, this, std::ref(bfs), size/4, size/2);
std::thread t2(&gc::concurrent_mark, this, std::ref(bfs), size/2, size/4*3);
std::thread t3(&gc::concurrent_mark, this, std::ref(bfs), size/4*3, size);
t0.join();
t1.join();
t2.join();
t3.join();
return;
}
// normal mark
while (!bfs.empty()) {
var value = bfs.back();
bfs.pop_back();
if (value.type<=vm_type::vm_num ||
value.val.gcobj->mark!=nas_val::gc_status::uncollected) {
var value = bfs.front();
bfs.pop();
if (value.type <= vm_type::vm_num ||
value.val.gcobj->mark != nas_val::gc_status::uncollected) {
continue;
}
mark_var(bfs, value);
}
}
void gc::concurrent_mark(std::vector<var>& vec, usize begin, usize end) {
std::vector<var> bfs;
for (auto i = begin; i<end; ++i) {
var value = vec[i];
if (value.type<=vm_type::vm_num ||
value.val.gcobj->mark!=nas_val::gc_status::uncollected) {
continue;
}
mark_var(bfs, value);
}
while (!bfs.empty()) {
var value = bfs.back();
bfs.pop_back();
if (value.type<=vm_type::vm_num ||
value.val.gcobj->mark!=nas_val::gc_status::uncollected) {
continue;
}
mark_var(bfs, value);
}
}
void gc::mark_context_root(std::vector<var>& bfs_queue) {
void gc::mark_context_root(std::queue<var>& bfs_queue) {
// scan global
for (usize i = 0; i < main_context_global_size; ++i) {
auto& val = main_context_global[i];
if (val.type > vm_type::vm_num) {
bfs_queue.push_back(val);
bfs_queue.push(val);
}
}
// scan now running context, this context maybe related to coroutine or main
for (var* i = running_context->stack; i <= running_context->top; ++i) {
if (i->type > vm_type::vm_num) {
bfs_queue.push_back(*i);
bfs_queue.push(*i);
}
}
bfs_queue.push_back(running_context->funcr);
bfs_queue.push_back(running_context->upvalr);
bfs_queue.push_back(temp);
bfs_queue.push(running_context->funcr);
bfs_queue.push(running_context->upvalr);
bfs_queue.push(temp);
if (!cort) {
return;
@@ -102,14 +66,14 @@ void gc::mark_context_root(std::vector<var>& bfs_queue) {
// coroutine is running, so scan main process stack from mctx
for (var* i = main_context.stack; i <= main_context.top; ++i) {
if (i->type > vm_type::vm_num) {
bfs_queue.push_back(*i);
bfs_queue.push(*i);
}
}
bfs_queue.push_back(main_context.funcr);
bfs_queue.push_back(main_context.upvalr);
bfs_queue.push(main_context.funcr);
bfs_queue.push(main_context.upvalr);
}
void gc::mark_var(std::vector<var>& bfs_queue, var& value) {
void gc::mark_var(std::queue<var>& bfs_queue, var& value) {
value.val.gcobj->mark = nas_val::gc_status::found;
switch(value.type) {
case vm_type::vm_vec: mark_vec(bfs_queue, value.vec()); break;
@@ -123,62 +87,62 @@ void gc::mark_var(std::vector<var>& bfs_queue, var& value) {
}
}
void gc::mark_vec(std::vector<var>& bfs_queue, nas_vec& vec) {
void gc::mark_vec(std::queue<var>& bfs_queue, nas_vec& vec) {
for (auto& i : vec.elems) {
if (i.type > vm_type::vm_num) {
bfs_queue.push_back(i);
bfs_queue.push(i);
}
}
}
void gc::mark_hash(std::vector<var>& bfs_queue, nas_hash& hash) {
void gc::mark_hash(std::queue<var>& bfs_queue, nas_hash& hash) {
for (auto& i : hash.elems) {
if (i.second.type > vm_type::vm_num) {
bfs_queue.push_back(i.second);
bfs_queue.push(i.second);
}
}
}
void gc::mark_func(std::vector<var>& bfs_queue, nas_func& function) {
void gc::mark_func(std::queue<var>& bfs_queue, nas_func& function) {
for (auto& i : function.local) {
if (i.type > vm_type::vm_num) {
bfs_queue.push_back(i);
bfs_queue.push(i);
}
}
for (auto& i : function.upval) {
bfs_queue.push_back(i);
bfs_queue.push(i);
}
}
void gc::mark_upval(std::vector<var>& bfs_queue, nas_upval& upval) {
void gc::mark_upval(std::queue<var>& bfs_queue, nas_upval& upval) {
for (auto& i : upval.elems) {
if (i.type > vm_type::vm_num) {
bfs_queue.push_back(i);
bfs_queue.push(i);
}
}
}
void gc::mark_ghost(std::vector<var>& bfs_queue, nas_ghost& ghost) {
void gc::mark_ghost(std::queue<var>& bfs_queue, nas_ghost& ghost) {
if (!ghost.gc_mark_function) {
return;
}
ghost.gc_mark_function(ghost.pointer, &bfs_queue);
}
void gc::mark_co(std::vector<var>& bfs_queue, nas_co& co) {
bfs_queue.push_back(co.ctx.funcr);
bfs_queue.push_back(co.ctx.upvalr);
void gc::mark_co(std::queue<var>& bfs_queue, nas_co& co) {
bfs_queue.push(co.ctx.funcr);
bfs_queue.push(co.ctx.upvalr);
for (var* i = co.ctx.stack; i<=co.ctx.top; ++i) {
if (i->type > vm_type::vm_num) {
bfs_queue.push_back(*i);
bfs_queue.push(*i);
}
}
}
void gc::mark_map(std::vector<var>& bfs_queue, nas_map& mp) {
void gc::mark_map(std::queue<var>& bfs_queue, nas_map& mp) {
for (const auto& i : mp.mapper) {
if (i.second->type > vm_type::vm_num) {
bfs_queue.push_back(*i.second);
bfs_queue.push(*i.second);
}
}
}
@@ -189,17 +153,19 @@ void gc::sweep() {
// this will cause memory wasting.
const i64 threshold = 4096;
for (i64 it = 0; it < threshold; ++it) {
if (current_sweep_index - it < 0) {
const auto index = current_sweep_index - it;
if (index < 0) {
break;
}
auto i = memory[current_sweep_index - it];
if (i->mark==nas_val::gc_status::uncollected) {
unused[static_cast<u32>(i->type)-static_cast<u32>(vm_type::vm_str)].push_back(i);
auto i = memory[index];
if (i->mark == nas_val::gc_status::uncollected) {
unused[static_cast<u32>(i->type) - static_cast<u32>(vm_type::vm_str)].push_back(i);
i->mark = nas_val::gc_status::collected;
} else if (i->mark==nas_val::gc_status::found) {
} else if (i->mark == nas_val::gc_status::found) {
i->mark = nas_val::gc_status::uncollected;
}
}
current_sweep_index -= threshold;
if (current_sweep_index < 0) {
in_incremental_sweep_stage = false;
@@ -208,10 +174,10 @@ void gc::sweep() {
}
void gc::extend(const vm_type type) {
const u32 index = static_cast<u32>(type)-static_cast<u32>(vm_type::vm_str);
const u32 index = static_cast<u32>(type) - static_cast<u32>(vm_type::vm_str);
status.object_size[index] += incr[index];
for (u64 i = 0; i<incr[index]; ++i) {
for (u64 i = 0; i < incr[index]; ++i) {
// no need to check, will be killed if memory is not enough
nas_val* tmp = new nas_val(type);
@@ -239,8 +205,10 @@ void gc::extend(const vm_type type) {
default: break;
}
// if incr[index] = 1, this will always be 1
incr[index] = incr[index] + incr[index];
if (incr[index] > max_incr[index]) {
incr[index] = max_incr[index];
}
}
void gc::init(const std::vector<std::string>& constant_strings,
@@ -296,8 +264,9 @@ void gc::clear() {
}
var gc::alloc(const vm_type type) {
const u32 index = static_cast<u32>(type)-static_cast<u32>(vm_type::vm_str);
const u32 index = static_cast<u32>(type) - static_cast<u32>(vm_type::vm_str);
++status.alloc_count[index];
// if still in incremental sweep stage? do it
// if not in incremental sweep stage, run a new gc cycle
if (in_incremental_sweep_stage) {

View File

@@ -9,6 +9,7 @@
#include <iomanip>
#include <vector>
#include <queue>
#include <chrono>
#include <thread>
#include <cstring>
@@ -52,9 +53,9 @@ struct gc {
/* heap increase size */
u64 incr[GC_TYPE_SIZE] = {
256, // vm_str
256, // vm_vec
256, // vm_hash
4, // vm_str
4, // vm_vec
2, // vm_hash
256, // vm_func
256, // vm_upval
4, // vm_obj
@@ -62,6 +63,17 @@ struct gc {
1, // vm_map
};
const u64 max_incr[GC_TYPE_SIZE] = {
8192, // vm_str
8192, // vm_vec
4096, // vm_hash
2048, // vm_func
2048, // vm_upval
256, // vm_obj
256, // vm_co
64, // vm_map
};
// total object count
u64 total_object_count = 0;
@@ -83,16 +95,15 @@ private:
void count_mark_time();
void count_sweep_time();
void mark();
void concurrent_mark(std::vector<var>&, usize, usize);
void mark_context_root(std::vector<var>&);
void mark_var(std::vector<var>&, var&);
void mark_vec(std::vector<var>&, nas_vec&);
void mark_hash(std::vector<var>&, nas_hash&);
void mark_func(std::vector<var>&, nas_func&);
void mark_upval(std::vector<var>&, nas_upval&);
void mark_ghost(std::vector<var>&, nas_ghost&);
void mark_co(std::vector<var>&, nas_co&);
void mark_map(std::vector<var>&, nas_map&);
void mark_context_root(std::queue<var>&);
void mark_var(std::queue<var>&, var&);
void mark_vec(std::queue<var>&, nas_vec&);
void mark_hash(std::queue<var>&, nas_hash&);
void mark_func(std::queue<var>&, nas_func&);
void mark_upval(std::queue<var>&, nas_upval&);
void mark_ghost(std::queue<var>&, nas_ghost&);
void mark_co(std::queue<var>&, nas_co&);
void mark_map(std::queue<var>&, nas_map&);
void sweep();
public:

View File

@@ -7,6 +7,7 @@
#include <sstream>
#include <iostream>
#include <vector>
#include <queue>
#include <unordered_map>
namespace nasal {
@@ -270,7 +271,7 @@ public:
struct nas_ghost {
private:
using destructor = void (*)(void*);
using marker = void (*)(void*, std::vector<var>*);
using marker = void (*)(void*, std::queue<var>*);
public:
std::string type_name;

View File

@@ -17,13 +17,14 @@ var test_func = func(test_processes...) {
var end_info = runtime.gc.info();
var gc_total_end = end_info.total;
var duration = time_stamp.elapsedMSec();
println(" ", duration, " ms,\tgc ",
print(" ", duration, " ms,\tgc ",
int((gc_total_end-gc_total_begin)*100/duration), "%,\t",
int(1000/(duration/size(test_processes))*10)/10, " test(s)/sec"
int(1000/(duration/size(test_processes))*10)/10, " test(s)/sec",
"\n"
);
var info = runtime.gc.info();
println("+##-gc----------------------");
println("+----##-gc----------------------------");
println("| avg gc cycle : ", int(1000 / info.average), " exec/sec");
println("| avg mark : ", int(1000 / info.avg_mark), " exec/sec");
println("| avg sweep : ", int(1000 / info.avg_sweep), " exec/sec");
@@ -31,7 +32,7 @@ var test_func = func(test_processes...) {
println("| sweep count : ", info.sweep_count);
println("| max mark : ", info.max_mark, " ms");
println("| max sweep : ", info.max_sweep, " ms");
println("+---------------------------");
println("+-------------------------------------");
}
var MAX_ITER_NUM = 0.5e5;