add symbol_finder & codegen

This commit is contained in:
ValKmjolnir 2023-06-29 00:30:50 +08:00
parent 15f63210b4
commit 5d9267d3b9
10 changed files with 1146 additions and 14 deletions

View File

@ -247,7 +247,7 @@ public:
void set_parameter_type(param_type pt) {type = pt;}
void set_parameter_name(const std::string& pname) {name = pname;}
void set_default_value(expr* node) {default_value = node;}
param_type get_type() {return type;}
param_type get_parameter_type() {return type;}
const std::string& get_parameter_name() const {return name;}
expr* get_default_value() {return default_value;}
void accept(ast_visitor*) override;
@ -465,14 +465,14 @@ public:
class multi_identifier:public expr {
private:
std::vector<expr*> variables;
std::vector<identifier*> variables;
public:
multi_identifier(const span& location):
expr(location, expr_type::ast_multi_id) {}
~multi_identifier();
void add_var(expr* node) {variables.push_back(node);}
std::vector<expr*>& get_variables() {return variables;}
void add_var(identifier* node) {variables.push_back(node);}
std::vector<identifier*>& get_variables() {return variables;}
void accept(ast_visitor*) override;
};

View File

@ -1 +1,976 @@
#include "nasal_new_codegen.h"
#include "nasal_new_codegen.h"
bool codegen::check_memory_reachable(call_expr* node) {
if (node->get_type()==expr_type::ast_call) {
const ast& tmp=node.child().back();
if (tmp->get_type()==expr_type::ast_callf) {
die("bad left-value with function call", node->get_location());
return false;
}
if (tmp->get_type()==expr_type::ast_callv && (tmp.size()==0 || tmp.size()>1 || tmp[0]->get_type()==expr_type::ast_subvec)) {
die("bad left-value with subvec", node->get_location());
return false;
}
} else if (node->get_type()!=ast_id) {
die("bad left-value", node->get_location());
return false;
}
return true;
}
void codegen::check_id_exist(identifier* node) {
const auto& name = node->get_name();
for(u32 i=0;builtin[i].name;++i) {
if (builtin[i].name==name) {
if (local.empty()) {
die("useless native function used in global scope", node->get_location());
}
return;
}
}
if (local_find(name)>=0) {
return;
}
if (upvalue_find(name)>=0) {
return;
}
if (global_find(name)>=0) {
return;
}
die("undefined symbol \"" + name + "\", and this symbol is useless here", node->get_location());
}
void codegen::regist_num(const f64 num) {
if (!num_table.count(num)) {
u32 size=num_table.size();
num_table[num]=size;
num_res.push_back(num);
}
}
void codegen::regist_str(const std::string& str) {
if (!str_table.count(str)) {
u32 size=str_table.size();
str_table[str]=size;
str_res.push_back(str);
}
}
void codegen::find_symbol(code_block* node) {
auto finder = new symbol_finder;
for(const auto& i : finder->do_find(node)) {
add_sym(i);
}
}
void codegen::add_sym(const std::string& name) {
if (local.empty()) {
if (global.count(name)) {
return;
}
i32 index=global.size();
global[name]=index;
return;
}
if (local.back().count(name)) {
return;
}
i32 index=local.back().size();
local.back()[name]=index;
}
i32 codegen::local_find(const std::string& name) {
if (local.empty()) {
return -1;
}
return local.back().count(name)?local.back()[name]:-1;
}
i32 codegen::global_find(const std::string& name) {
return global.count(name)?global[name]:-1;
}
i32 codegen::upvalue_find(const std::string& name) {
// 32768 level 65536 upvalues
i32 index=-1;
usize size=local.size();
if (size<=1) {
return -1;
}
auto iter=local.begin();
for(u32 i=0;i<size-1;++i,++iter) {
if (iter->count(name)) {
index=((i<<16)|(*iter)[name]);
}
}
return index;
}
void codegen::gen(u8 op, u32 num, u32 line) {
code.push_back({op, fileindex, num, line});
}
void codegen::num_gen(number_literal* node) {
f64 num = node->get_number();
regist_num(num);
gen(op_pnum,num_table[num], node->get_location().begin_line);
}
void codegen::str_gen(string_literal* node) {
regist_str(node->get_content());
gen(op_pstr, str_table[node->get_content()], node->get_location().begin_line);
}
void codegen::bool_gen(bool_literal* node) {
f64 num = node->get_flag()?1:0;
regist_num(num);
gen(op_pnum, num_table[num], node->get_location().begin_line);
}
void codegen::vec_gen(vector_expr* node) {
for(auto child : node->get_elements()) {
calc_gen(child);
}
gen(op_newv, node->get_elements().size(), node->get_location().begin_line);
}
void codegen::hash_gen(hash_expr* node) {
gen(op_newh, 0, node->get_location().begin_line);
for(auto child : node->get_members()) {
calc_gen(child->get_value());
const std::string& str=child->get_name();
regist_str(str);
gen(op_happ, str_table[str], child->get_location().begin_line);
}
}
void codegen::func_gen(function* node) {
// parameter list format check
bool checked_default = false;
bool checked_dynamic = false;
std::unordered_map<std::string,bool> argname;
for(auto tmp : node->get_parameter_list()) {
if (tmp->get_parameter_type()==parameter::param_type::default_parameter) {
checked_default=true;
} else if (tmp->get_parameter_type()==parameter::param_type::dynamic_parameter) {
checked_dynamic=true;
}
// check default parameter and dynamic parameter
if (checked_default && tmp->get_parameter_type()!=parameter::param_type::default_parameter) {
die("must use default parameters here", tmp->get_location());
}
if (checked_dynamic && tmp!=node->get_parameter_list().back()) {
die("dynamic parameter must be the last one", tmp->get_location());
}
// check redefinition
const auto& name = tmp->get_parameter_name();
if (argname.count(name)) {
die("redefinition of parameter: "+name, tmp->get_location());
} else {
argname[name]=true;
}
}
usize newf=code.size();
gen(op_newf, 0, node->get_location().begin_line);
usize lsize=code.size();
gen(op_intl, 0, node->get_location().begin_line);
// add special keyword 'me' into symbol table
// this symbol is only used in local scope(function's scope)
// this keyword is set to nil as default value
// after calling a hash, this keyword is set to this hash
// this symbol's index will be 0
local.push_back({{"me", 0}});
// generate parameter list
for(auto& tmp : node->get_parameter_list()) {
const std::string& str=tmp.str();
if (str=="me") {
die("\"me\" should not be a parameter", tmp->get_location());
}
regist_str(str);
switch(tmp->get_type()) {
case expr_type::ast_id:
gen(op_para, str_table[str], tmp->get_location().begin_line);
break;
case expr_type::ast_default:
calc_gen(tmp[0]);
gen(op_deft, str_table[str], tmp->get_location().begin_line);
break;
case expr_type::ast_dynamic:
gen(op_dyn, str_table[str], tmp->get_location().begin_line);
break;
}
add_sym(str);
}
code[newf].num=code.size()+1; // entry
usize jmp_ptr=code.size();
gen(op_jmp, 0, node->get_location().begin_line);
auto block = node->get_code_block();
// search symbols first, must use after loading parameters
// or the location of symbols will change and cause fatal error
find_symbol(block);
in_iterloop.push(0);
block_gen(block);
in_iterloop.pop();
code[lsize].num=local.back().size();
if (local.back().size()>=STACK_DEPTH) {
die("too many local variants: "+std::to_string(local.back().size()), block->get_location());
}
local.pop_back();
if (!block.size() || block.child().back()->get_type()!=ast_ret) {
gen(op_pnil, 0, block->get_location().begin_line);
gen(op_ret, 0, block->get_location().begin_line);
}
code[jmp_ptr].num=code.size();
}
void codegen::call_gen(call_expr* node) {
calc_gen(node->get_first());
if (code.back().op==op_callb) {
return;
}
for(auto i : node->get_calls()) {
switch(i->get_type()) {
case expr_type::ast_callh: call_hash_gen((call_hash*)i); break;
case expr_type::ast_callv: call_vec((call_vector*)i); break;
case expr_type::ast_callf: call_func((call_function*)i); break;
}
}
}
void codegen::call_id(identifier* node) {
const auto& name = node->get_location();
for(u32 i=0;builtin[i].name;++i) {
if (builtin[i].name==str) {
gen(op_callb, i, node->get_location().begin_line);
if (local.empty()) {
die("should warp native function in local scope", node->get_location());
}
return;
}
}
i32 index;
if ((index=local_find(str))>=0) {
gen(op_calll, index, node->get_location().begin_line);
return;
}
if ((index=upvalue_find(str))>=0) {
gen(op_upval, index, node->get_location().begin_line);
return;
}
if ((index=global_find(str))>=0) {
gen(op_callg, index, node->get_location().begin_line);
return;
}
die("undefined symbol \""+str+"\"", node->get_location());
}
void codegen::call_hash_gen(call_hash* node) {
regist_str(node.str());
gen(op_callh, str_table[node.str()], node->get_location().begin_line);
}
void codegen::call_vec(call_vector* node) {
// maybe this place can use callv-const if ast's first child is ast_num
if (node.size()==1 && node[0]->get_type()!=ast_subvec) {
calc_gen(node[0]);
gen(op_callv, 0, node[0]->get_location().begin_line);
return;
}
gen(op_slcbeg,0,node->get_location().begin_line);
for(auto& tmp:node.child()) {
if (tmp->get_type()!=ast_subvec) {
calc_gen(tmp);
gen(op_slc, 0, tmp->get_location().begin_line);
} else {
calc_gen(tmp[0]);
calc_gen(tmp[1]);
gen(op_slc2, 0, tmp->get_location().begin_line);
}
}
gen(op_slcend, 0, node->get_location().begin_line);
}
void codegen::call_func(call_function* node) {
if (!node.size()) {
gen(op_callfv, 0, node->get_location().begin_line);
} else if (node[0]->get_type()==expr_type::ast_pair) {
hash_gen(node);
gen(op_callfh, 0, node->get_location().begin_line);
} else {
for(auto& child:node.child()) {
calc_gen(child);
}
gen(op_callfv, node.size(), node->get_location().begin_line);
}
}
/* mcall should run after calc_gen because this operation may trigger gc.
* to avoid gc incorrectly collecting values that include the memory space(which will cause SIGSEGV),
* we must run ast[1] then we run ast[0] to get the memory space.
* at this time the value including the memory space can must be found alive.
* BUT in fact this method does not make much safety.
* so we use another way to avoid gc-caused SIGSEGV: reserve m-called value on stack.
* you could see the notes in `vm::opr_mcallv()`.
*/
void codegen::mcall(call_expr* node) {
if (!check_memory_reachable(node)) {
return;
}
if (node->get_type()==expr_type::ast_id) {
mcall_id(node);
return;
}
if (node.size()==1) { // foreach and forindex use call-id ast to get mcall
mcall_id(node[0]);
return;
}
calc_gen(node[0]);
for(usize i=1;i<node.size()-1;++i) {
const ast& tmp=node[i];
switch(tmp->get_type()) {
case expr_type::ast_callh:call_hash(tmp);break;
case expr_type::ast_callv:call_vec(tmp); break;
case expr_type::ast_callf:call_func(tmp);break;
}
}
const ast& tmp=node.child().back();
if (tmp->get_type()==expr_type::ast_callh) {
mcall_hash(tmp);
} else if (tmp->get_type()==expr_type::ast_callv) {
mcall_vec(tmp);
}
}
void codegen::mcall_id(identifier* node) {
const auto& name = node->get_name();
for(u32 i=0;builtin[i].name;++i) {
if (builtin[i].name==name) {
die("cannot modify native function", node->get_location());
return;
}
}
i32 index;
if ((index=local_find(name))>=0) {
gen(op_mcalll, index, node->get_location().begin_line);
return;
}
if ((index=upvalue_find(name))>=0) {
gen(op_mupval, index, node->get_location().begin_line);
return;
}
if ((index=global_find(name))>=0) {
gen(op_mcallg, index, node->get_location().begin_line);
return;
}
die("undefined symbol \"" + name + "\"", node->get_location());
}
void codegen::mcall_vec(call_vector* node) {
calc_gen(node[0]);
gen(op_mcallv, 0, node->get_location().begin_line);
}
void codegen::mcall_hash(call_hash* node) {
regist_str(node.str());
gen(op_mcallh, str_table[node.str()], node->get_location().begin_line);
}
void codegen::single_def(definition_expr* node) {
const auto& str = node->get_variable_name()->get_name();
calc_gen(node->get_value());
local.empty()?
gen(op_loadg, global_find(str), node->get_location().begin_line):
gen(op_loadl, local_find(str), node->get_location().begin_line);
}
void codegen::multi_def(definition_expr* node) {
auto& ids=node[0].child();
usize size=ids.size();
if (node[1]->get_type()==expr_type::ast_tuple) { // (var a,b,c)=(c,b,a);
auto& vals=node[1].child();
for(usize i=0;i<size;++i) {
// check node type, only identifier is allowed
if (ids[i]->get_type()!=ast_id) {
die("cannot call identifier in multi-definition", ids[i]->get_location());
continue;
}
calc_gen(vals[i]);
const std::string& str=ids[i].str();
local.empty()?
gen(op_loadg, global_find(str), ids[i]->get_location().begin_line):
gen(op_loadl, local_find(str), ids[i]->get_location().begin_line);
}
} else { // (var a,b,c)=[0,1,2];
calc_gen(node[1]);
for(usize i=0;i<size;++i) {
// check node type, only identifier is allowed
if (ids[i]->get_type()!=ast_id) {
die("cannot call identifier in multi-definition", ids[i]->get_location());
continue;
}
gen(op_callvi, i, node[1]->get_location().begin_line);
const std::string& str=ids[i].str();
local.empty()?
gen(op_loadg, global_find(str), ids[i]->get_location().begin_line):
gen(op_loadl, local_find(str), ids[i]->get_location().begin_line);
}
gen(op_pop, 0, node->get_location().begin_line);
}
}
void codegen::def_gen(definition_expr* node) {
if (node[0]->get_type()==expr_type::ast_id && node[1]->get_type()==expr_type::ast_tuple) {
die("cannot accept too many values", node[1]->get_location());
} else if (node[0]->get_type()==expr_type::ast_multi_id && node[1]->get_type()==expr_type::ast_tuple && node[0].size()<node[1].size()) {
die("lack values in multi-definition", node[1]->get_location());
} else if (node[0]->get_type()==expr_type::ast_multi_id && node[1]->get_type()==expr_type::ast_tuple && node[0].size()>node[1].size()) {
die("too many values in multi-definition", node[1]->get_location());
}
node[0]->get_type()==expr_type::ast_id?single_def(node):multi_def(node);
}
void codegen::multi_assign_gen(multi_assign* node) {
if (node[1]->get_type()==expr_type::ast_tuple && node[0].size()<node[1].size()) {
die("lack values in multi-assignment", node[1]->get_location());
} else if (node[1]->get_type()==expr_type::ast_tuple && node[0].size()>node[1].size()) {
die("too many values in multi-assignment", node[1]->get_location());
}
i32 size=node[0].size();
if (node[1]->get_type()==expr_type::ast_tuple) {
for(i32 i=size-1;i>=0;--i) {
calc_gen(node[1][i]);
}
for(i32 i=0;i<size;++i) {
mcall(node[0][i]);
// multi assign user loadl and loadg to avoid meq's stack--
// and this operation changes local and global value directly
if (code.back().op==op_mcalll) {
code.back().op=op_loadl;
} else if (code.back().op==op_mupval) {
code.back().op=op_loadu;
} else if (code.back().op==op_mcallg) {
code.back().op=op_loadg;
} else {
gen(op_meq, 1, node[0][i]->get_location().begin_line);
}
}
} else {
calc_gen(node[1]);
for(i32 i=0;i<size;++i) {
gen(op_callvi, i, node[1]->get_location().begin_line);
// multi assign user loadl and loadg to avoid meq's stack--
// and this operation changes local and global value directly
mcall(node[0][i]);
if (code.back().op==op_mcalll) {
code.back().op=op_loadl;
} else if (code.back().op==op_mupval) {
code.back().op=op_loadu;
} else if (code.back().op==op_mcallg) {
code.back().op=op_loadg;
} else {
gen(op_meq, 1, node[0][i]->get_location().begin_line);
}
}
gen(op_pop, 0, node->get_location().begin_line);
}
}
void codegen::cond_gen(condition_expr* node) {
std::vector<usize> jmp_label;
for(auto& tmp:node.child()) {
if (tmp->get_type()==expr_type::ast_if || tmp->get_type()==expr_type::ast_elsif) {
calc_gen(tmp[0]);
usize ptr=code.size();
gen(op_jf, 0, tmp->get_location().begin_line);
block_gen(tmp[1]);
// without 'else' the last condition doesn't need to jmp
if (&tmp!=&node.child().back()) {
jmp_label.push_back(code.size());
gen(op_jmp, 0, tmp->get_location().begin_line);
}
code[ptr].num=code.size();
} else {
block_gen(tmp[0]);
break;
}
}
for(auto i:jmp_label) {
code[i].num=code.size();
}
}
void codegen::loop_gen(expr* node) {
continue_ptr.push_front({});
break_ptr.push_front({});
switch(node->get_type()) {
case expr_type::ast_while: while_gen(node); break;
case expr_type::ast_for: for_gen(node); break;
case expr_type::ast_forindex:forindex_gen(node);break;
case expr_type::ast_foreach: foreach_gen(node); break;
}
}
void codegen::load_continue_break(i32 continue_place,i32 break_place) {
for(auto i:continue_ptr.front()) {
code[i].num=continue_place;
}
for(auto i:break_ptr.front()) {
code[i].num=break_place;
}
continue_ptr.pop_front();
break_ptr.pop_front();
}
void codegen::while_gen(while_expr* node) {
usize loop_ptr=code.size();
calc_gen(node[0]);
usize condition_ptr=code.size();
gen(op_jf, 0, node[0]->get_location().begin_line);
block_gen(node[1]);
gen(op_jmp, loop_ptr, node[1]->get_location().begin_line);
code[condition_ptr].num=code.size();
load_continue_break(code.size()-1, code.size());
}
void codegen::for_gen(for_expr* node) {
expr_gen(node[0]);
usize jmp_place=code.size();
if (node[1]->get_type()==expr_type::ast_null) {
gen(op_pnum, num_table[1], node[1]->get_location().begin_line);
} else {
calc_gen(node[1]);
}
usize label_exit=code.size();
gen(op_jf, 0, node[1]->get_location().begin_line);
block_gen(node[3]);
usize continue_place=code.size();
expr_gen(node[2]);
gen(op_jmp, jmp_place, node[2]->get_location().begin_line);
code[label_exit].num=code.size();
load_continue_break(continue_place, code.size());
}
void codegen::expr_gen(expr* node) {
switch(node->get_type()) {
case expr_type::ast_null:break;
case expr_type::ast_def:def_gen(node);break;
case expr_type::ast_multi_assign:multi_assign_gen(node);break;
case expr_type::ast_addeq:case expr_type::ast_subeq:
case expr_type::ast_multeq:case expr_type::ast_diveq:case expr_type::ast_lnkeq:
case expr_type::ast_btandeq:case expr_type::ast_btoreq:case expr_type::ast_btxoreq:
calc_gen(node);
if (op_addeq<=code.back().op && code.back().op<=op_btxoreq) {
code.back().num=1;
} else if (op_addeqc<=code.back().op && code.back().op<=op_lnkeqc) {
code.back().op=code.back().op-op_addeqc+op_addecp;
} else {
gen(op_pop, 0, node->get_location().begin_line);
}
break;
case expr_type::ast_nil:case expr_type::ast_num:case expr_type::ast_str:case expr_type::ast_bool:break;
case expr_type::ast_vec:case expr_type::ast_hash:case expr_type::ast_func:case expr_type::ast_call:
case expr_type::ast_neg:case expr_type::ast_lnot:case expr_type::ast_bnot:
case expr_type::ast_bitor:case expr_type::ast_bitxor:case expr_type::ast_bitand:
case expr_type::ast_add:case expr_type::ast_sub:case expr_type::ast_mult:case expr_type::ast_div:case expr_type::ast_link:
case expr_type::ast_cmpeq:case expr_type::ast_neq:
case expr_type::ast_leq:case expr_type::ast_less:
case expr_type::ast_geq:case expr_type::ast_grt:
case expr_type::ast_or:case expr_type::ast_and:
case expr_type::ast_trino:
calc_gen(node);
gen(op_pop, 0, node->get_location().begin_line);
break;
case expr_type::ast_equal:
if (node[0]->get_type()==expr_type::ast_id) {
calc_gen(node[1]);
mcall_id(node[0]);
// only the first mcall_id can use load
if (code.back().op==op_mcalll) {
code.back().op=op_loadl;
} else if (code.back().op==op_mupval) {
code.back().op=op_loadu;
} else {
code.back().op=op_loadg;
}
} else {
calc_gen(node);
if (code.back().op==op_meq) {
code.back().num=1;
} else {
gen(op_pop, 0, node->get_location().begin_line);
}
}
break;
}
}
void codegen::forindex_gen(forei_expr* node) {
calc_gen(node[1]);
gen(op_cnt, 0, node[1]->get_location().begin_line);
usize ptr=code.size();
gen(op_findex, 0, node->get_location().begin_line);
if (node[0]->get_type()==expr_type::ast_iter) { // define a new iterator
const std::string& str=node[0][0].str();
local.empty()?
gen(op_loadg, global_find(str), node[0][0]->get_location().begin_line):
gen(op_loadl, local_find(str), node[0][0]->get_location().begin_line);
} else { // use exist variable as the iterator
mcall(node[0]);
if (code.back().op==op_mcallg) {
code.back().op=op_loadg;
} else if (code.back().op==op_mcalll) {
code.back().op=op_loadl;
} else if (code.back().op==op_mupval) {
code.back().op=op_loadu;
} else {
gen(op_meq, 1, node[0]->get_location().begin_line);
}
}
++in_iterloop.top();
block_gen(node[2]);
--in_iterloop.top();
gen(op_jmp, ptr, node->get_location().begin_line);
code[ptr].num=code.size();
load_continue_break(code.size()-1, code.size());
gen(op_pop, 0, node[1]->get_location().begin_line);// pop vector
gen(op_pop, 0, node->get_location().begin_line);// pop iterator
}
void codegen::foreach_gen(forei_expr* node) {
calc_gen(node[1]);
gen(op_cnt, 0, node->get_location().begin_line);
usize ptr=code.size();
gen(op_feach, 0, node->get_location().begin_line);
if (node[0]->get_type()==expr_type::ast_iter) { // define a new iterator
const std::string& str=node[0][0].str();
local.empty()?
gen(op_loadg, global_find(str), node[0][0]->get_location().begin_line):
gen(op_loadl, local_find(str), node[0][0]->get_location().begin_line);
} else { // use exist variable as the iterator
mcall(node[0]);
if (code.back().op==op_mcallg) {
code.back().op=op_loadg;
} else if (code.back().op==op_mcalll) {
code.back().op=op_loadl;
} else if (code.back().op==op_mupval) {
code.back().op=op_loadu;
} else {
gen(op_meq, 1, node[0]->get_location().begin_line);
}
}
++in_iterloop.top();
block_gen(node[2]);
--in_iterloop.top();
gen(op_jmp, ptr, node->get_location().begin_line);
code[ptr].num=code.size();
load_continue_break(code.size()-1, code.size());
gen(op_pop, 0, node[1]->get_location().begin_line);// pop vector
gen(op_pop, 0, node->get_location().begin_line);// pop iterator
}
void codegen::or_gen(binary_operator* node) {
calc_gen(node->get_left());
usize l1 = code.size();
gen(op_jt, 0, node->get_left()->get_location().begin_line);
gen(op_pop, 0, node->get_left()->get_location().begin_line);
calc_gen(node->get_right());
usize l2=code.size();
gen(op_jt, 0, node->get_right()->get_location().begin_line);
gen(op_pop, 0, node->get_right()->get_location().begin_line);
gen(op_pnil, 0, node->get_right()->get_location().begin_line);
code[l1].num = code[l2].num = code.size();
}
void codegen::and_gen(binary_operator* node) {
calc_gen(node->get_left());
gen(op_jt, code.size()+2, node->get_left()->get_location().begin_line);
usize lfalse=code.size();
gen(op_jmp, 0, node->get_left()->get_location().begin_line);
gen(op_pop, 0, node->get_right()->get_location().begin_line);// jt jumps here
calc_gen(node->get_right());
gen(op_jt, code.size()+3, node->get_right()->get_location().begin_line);
code[lfalse].num=code.size();
gen(op_pop, 0, node->get_right()->get_location().begin_line);
gen(op_pnil, 0, node->get_right()->get_location().begin_line);
// jt jumps here
}
void codegen::trino_gen(ternary_operator* node) {
calc_gen(node->get_condition());
usize lfalse=code.size();
gen(op_jf, 0, node->get_condition()->get_location().begin_line);
calc_gen(node->get_left());
usize lexit=code.size();
gen(op_jmp, 0, node->get_left()->get_location().begin_line);
code[lfalse].num=code.size();
calc_gen(node->get_right());
code[lexit].num=code.size();
}
void codegen::calc_gen(expr* node) {
switch(node->get_type()) {
case expr_type::ast_nil: gen(op_pnil,0,node->get_location().begin_line);break;
case expr_type::ast_num: num_gen(node); break;
case expr_type::ast_str: str_gen(node); break;
case expr_type::ast_id: call_id(node); break;
case expr_type::ast_bool: bool_gen(node); break;
case expr_type::ast_vec: vec_gen(node); break;
case expr_type::ast_hash: hash_gen(node); break;
case expr_type::ast_func: func_gen(node); break;
case expr_type::ast_call: call_gen(node); break;
case expr_type::ast_equal:
calc_gen(node[1]);
mcall(node[0]);
gen(op_meq, 0, node->get_location().begin_line);
break;
// ast_addeq(22)~ast_lnkeq(26) op_addeq(23)~op_lnkeq(27)
case expr_type::ast_addeq:case expr_type::ast_subeq:case expr_type::ast_multeq:case expr_type::ast_diveq:
if (node[1]->get_type()!=ast_num) {
calc_gen(node[1]);
}
mcall(node[0]);
if (node[1]->get_type()!=ast_num) {
gen(node->get_type()-ast_addeq+op_addeq, 0, node->get_location().begin_line);
} else {
regist_num(node[1].num());
gen(node->get_type()-ast_addeq+op_addeqc, num_table[node[1].num()], node->get_location().begin_line);
}
break;
case expr_type::ast_lnkeq:
if (node[1]->get_type()!=ast_str) {
calc_gen(node[1]);
} else {
regist_str(node[1].str());
}
mcall(node[0]);
if (node[1]->get_type()!=ast_str) {
gen(op_lnkeq, 0, node->get_location().begin_line);
} else {
gen(op_lnkeqc, str_table[node[1].str()], node->get_location().begin_line);
}
break;
case expr_type::ast_btandeq:case expr_type::ast_btoreq:case expr_type::ast_btxoreq:
calc_gen(node[1]);
mcall(node[0]);
gen(node->get_type()-ast_btandeq+op_btandeq, 0, node->get_location().begin_line);
break;
case expr_type::ast_or:or_gen(node);break;
case expr_type::ast_and:and_gen(node);break;
// ast_add(33)~ast_link(37) op_add(18)~op_lnk(22)
case expr_type::ast_add:case expr_type::ast_sub:case expr_type::ast_mult:case expr_type::ast_div:
calc_gen(node[0]);
if (node[1]->get_type()!=ast_num) {
calc_gen(node[1]);
gen(node->get_type()-ast_add+op_add, 0, node->get_location().begin_line);
} else {
regist_num(node[1].num());
gen(node->get_type()-ast_add+op_addc, num_table[node[1].num()], node->get_location().begin_line);
}
break;
case expr_type::ast_link:
calc_gen(node[0]);
if (node[1]->get_type()!=ast_str) {
calc_gen(node[1]);
gen(op_lnk, 0, node->get_location().begin_line);
} else {
regist_str(node[1].str());
gen(op_lnkc, str_table[node[1].str()], node->get_location().begin_line);
}
break;
// ast_cmpeq(27)~ast_geq(32) op_eq(29)~op_geq(34)
case expr_type::ast_cmpeq:case expr_type::ast_neq:
calc_gen(node[0]);
calc_gen(node[1]);
gen(node->get_type()-ast_cmpeq+op_eq, 0, node->get_location().begin_line);
break;
case expr_type::ast_less:case expr_type::ast_leq:case expr_type::ast_grt:case expr_type::ast_geq:
calc_gen(node[0]);
if (node[1]->get_type()!=ast_num) {
calc_gen(node[1]);
gen(node->get_type()-ast_less+op_less, 0, node->get_location().begin_line);
} else {
regist_num(node[1].num());
gen(node->get_type()-ast_less+op_lessc, num_table[node[1].num()], node->get_location().begin_line);
}
break;
case expr_type::ast_trino:trino_gen(node);break;
case expr_type::ast_neg:
calc_gen(node[0]);
gen(op_usub, 0, node->get_location().begin_line);
break;
case expr_type::ast_lnot:
calc_gen(node[0]);
gen(op_lnot, 0, node->get_location().begin_line);
break;
case expr_type::ast_bnot:
calc_gen(node[0]);
gen(op_bnot, 0, node->get_location().begin_line);
break;
case expr_type::ast_bitor:
calc_gen(node[0]);
calc_gen(node[1]);
gen(op_btor, 0, node->get_location().begin_line);
break;
case expr_type::ast_bitxor:
calc_gen(node[0]);
calc_gen(node[1]);
gen(op_btxor, 0, node->get_location().begin_line);
break;
case expr_type::ast_bitand:
calc_gen(node[0]);
calc_gen(node[1]);
gen(op_btand, 0, node->get_location().begin_line);
break;
case expr_type::ast_def:
single_def(node);
call_id(node[0]);
break;
}
}
void codegen::block_gen(code_block* node) {
for(auto& tmp:node.child()) {
switch(tmp->get_type()) {
case expr_type::ast_null:break;
case expr_type::ast_id:check_id_exist(tmp);break;
case expr_type::ast_nil:case expr_type::ast_num:case expr_type::ast_str:case expr_type::ast_bool:break;
case expr_type::ast_file:fileindex=tmp.num();break; // special node type in main block
case expr_type::ast_cond:cond_gen(tmp);break;
case expr_type::ast_continue:
continue_ptr.front().push_back(code.size());
gen(op_jmp, 0, tmp->get_location().begin_line);
break;
case expr_type::ast_break:
break_ptr.front().push_back(code.size());
gen(op_jmp, 0, tmp->get_location().begin_line);
break;
case expr_type::ast_while:
case expr_type::ast_for:
case expr_type::ast_forindex:
case expr_type::ast_foreach:loop_gen(tmp);break;
case expr_type::ast_equal:
case expr_type::ast_addeq:case expr_type::ast_subeq:
case expr_type::ast_multeq:case expr_type::ast_diveq:case expr_type::ast_lnkeq:
case expr_type::ast_btandeq:case expr_type::ast_btoreq:case expr_type::ast_btxoreq:
case expr_type::ast_vec:case expr_type::ast_hash:case expr_type::ast_func:case expr_type::ast_call:
case expr_type::ast_neg:case expr_type::ast_lnot:case expr_type::ast_bnot:
case expr_type::ast_bitor:case expr_type::ast_bitxor:case expr_type::ast_bitand:
case expr_type::ast_add:case expr_type::ast_sub:case expr_type::ast_mult:case expr_type::ast_div:case expr_type::ast_link:
case expr_type::ast_cmpeq:case expr_type::ast_neq:
case expr_type::ast_leq:case expr_type::ast_less:
case expr_type::ast_geq:case expr_type::ast_grt:
case expr_type::ast_or:
case expr_type::ast_and:
case expr_type::ast_trino:
case expr_type::ast_def:
case expr_type::ast_multi_assign:expr_gen(tmp);break;
case expr_type::ast_ret:ret_gen(tmp);break;
}
}
}
void codegen::ret_gen(const ast& node) {
for(u32 i=0;i<in_iterloop.top();++i) {
gen(op_pop, 0, node->get_location().begin_line);
gen(op_pop, 0, node->get_location().begin_line);
}
if (node.size()) {
calc_gen(node[0]);
} else {
gen(op_pnil, 0, node->get_location().begin_line);
}
gen(op_ret, 0, node->get_location().begin_line);
}
const error& codegen::compile(const parse& parse, const linker& import) {
fileindex=0;
file=import.filelist().data();
in_iterloop.push(0);
find_symbol(parse.tree()); // search symbols first
gen(op_intg, global.size(), 0);
block_gen(parse.tree()); // generate main block
gen(op_exit, 0, 0);
// size out of bound check
if (num_res.size()>0xffffff) {
err.load(file[0]); // load main execute file
err.err("code", "too many constant numbers: "+std::to_string(num_res.size()));
}
if (str_res.size()>0xffffff) {
err.load(file[0]); // load main execute file
err.err("code", "too many constant strings: "+std::to_string(str_res.size()));
}
if (global.size()>=STACK_DEPTH) {
err.load(file[0]); // load main execute file
err.err("code", "too many global variants: "+std::to_string(global.size()));
}
if (code.size()>0xffffff) {
err.load(file[0]); // load main execute file
err.err("code", "bytecode size overflow: "+std::to_string(code.size()));
}
return err;
}
void codegen::print() {
// func end stack, reserved for code print
std::stack<u32> fbstk;
std::stack<u32> festk;
// print const numbers
for(auto& num:num_res) {
std::cout<<" .number "<<num<<"\n";
}
// print const strings
for(auto& str:str_res) {
std::cout<<" .symbol \""<<rawstr(str)<<"\"\n";
}
// print code
std::cout<<"\n";
codestream::set(num_res.data(), str_res.data());
for(u32 i=0;i<code.size();++i) {
// print opcode index, opcode name, opcode immediate number
const opcode& c=code[i];
if (!festk.empty() && i==festk.top()) {
std::cout<<std::hex<<"<0x"<<fbstk.top()<<std::dec<<">;\n";
// avoid two empty lines
if (c.op!=op_newf) {
std::cout<<"\n";
}
fbstk.pop();
festk.pop();
}
// get function begin index and end index
if (c.op==op_newf) {
std::cout<<std::hex<<"\nfunc <0x"<<i<<std::dec<<">:\n";
for(u32 j=i;j<code.size();++j) {
if (code[j].op==op_jmp) {
fbstk.push(i);
festk.push(code[j].num);
break;
}
}
}
// output bytecode
std::cout<<" "<<codestream(c,i)<<"\n";
}
}

View File

@ -1 +1,101 @@
#pragma once
#pragma once
#include "nasal_new_err.h"
#include "nasal_new_builtin.h"
#include "nasal_new_opcode.h"
#include "nasal_new_ast.h"
#include "ast_visitor.h"
#include "symbol_finder.h"
#include "nasal_new_parse.h"
#include "nasal_new_import.h"
#include <iomanip>
#include <list>
#include <stack>
#include <unordered_map>
#ifdef _MSC_VER
#pragma warning (disable:4244)
#pragma warning (disable:4267)
#endif
class codegen {
private:
u16 fileindex;
error& err;
const std::string* file;
std::stack<u32> in_iterloop;
std::unordered_map<f64,u32> num_table;
std::unordered_map<std::string,u32> str_table;
std::vector<f64> num_res;
std::vector<std::string> str_res;
std::vector<opcode> code;
std::list<std::vector<i32>> continue_ptr;
std::list<std::vector<i32>> break_ptr;
// symbol table
// global : max STACK_DEPTH-1 values
std::unordered_map<std::string,i32> global;
// local : max 32768 upvalues 65536 values
// but in fact local scope also has less than STACK_DEPTH value
std::list<std::unordered_map<std::string,i32>> local;
bool check_memory_reachable(call_expr*);
void check_id_exist(identifier*);
void die(const std::string& info, const span& loc) {
err.err("code", loc, info);
}
void regist_num(const f64);
void regist_str(const std::string&);
void find_symbol(code_block*);
void add_sym(const std::string&);
i32 local_find(const std::string&);
i32 global_find(const std::string&);
i32 upvalue_find(const std::string&);
void gen(u8, u32, u32);
void num_gen(number_literal*);
void str_gen(string_literal*);
void bool_gen(bool_literal*);
void vec_gen(vector_expr*);
void hash_gen(hash_expr*);
void func_gen(function*);
void call_gen(call_expr*);
void call_id(identifier*);
void call_hash_gen(call_hash*);
void call_vec(call_vector*);
void call_func(call_function*);
void mcall(call_expr*);
void mcall_id(identifier*);
void mcall_vec(call_vector*);
void mcall_hash(call_hash*);
void multi_def(definition_expr*);
void single_def(definition_expr*);
void def_gen(definition_expr*);
void multi_assign_gen(multi_assign*);
void cond_gen(condition_expr*);
void loop_gen(expr*);
void load_continue_break(i32, i32);
void while_gen(while_expr*);
void for_gen(for_expr*);
void expr_gen(expr*);
void forindex_gen(forei_expr*);
void foreach_gen(forei_expr*);
void or_gen(binary_operator*);
void and_gen(binary_operator*);
void trino_gen(ternary_operator*);
void calc_gen(expr*);
void block_gen(code_block*);
void ret_gen(return_expr*);
public:
codegen(error& e): fileindex(0), err(e), file(nullptr) {}
const error& compile(const parse&, const linker&);
void print();
const std::vector<std::string>& strs() const {return str_res;}
const std::vector<f64>& nums() const {return num_res;}
const std::vector<opcode>& codes() const {return code;}
};

View File

@ -6,6 +6,7 @@
#include "nasal_new_import.h"
#include "ast_visitor.h"
#include "ast_dumper.h"
#include "symbol_finder.h"
#include "optimizer.h"
#include <unordered_map>
@ -102,6 +103,11 @@ void execute(
opt->do_optimization(parse.tree());
delete opt;
auto finder = new symbol_finder;
for(const auto& symbol : finder->do_find(parse.tree())) {
std::cout << symbol << std::endl;
}
// code generator gets parser's ast and import file list to generate code
// gen.compile(parse, ld).chkerr();
// if (cmd&VM_CODE) {

View File

@ -3,11 +3,11 @@
void codestream::set(
const f64* numbuff,
const std::string* strbuff,
const std::string* filelist=nullptr
const std::string* filelist
) {
nums=numbuff;
strs=strbuff;
files=filelist;
nums = numbuff;
strs = strbuff;
files = filelist;
}
void codestream::dump(std::ostream& out) const {

View File

@ -139,7 +139,7 @@ private:
inline static const std::string* files = nullptr;
public:
codestream(const opcode& c, const u32 i): code(c), index(i) {}
static void set(const f64*, const std::string*, const std::string*);
static void set(const f64*, const std::string*, const std::string* filelist = nullptr);
void dump(std::ostream&) const;
};

View File

@ -774,8 +774,7 @@ multi_identifier* parse::multi_id() {
auto node = new multi_identifier(toks[ptr].loc);
while(!lookahead(tok::eof)) {
// only identifier is allowed here
// but we check it at codegen stage
node->add_var(calc());
node->add_var(id());
if (lookahead(tok::comma)) {
match(tok::comma);
} else if (lookahead(tok::id)) { // first set of identifier

29
ast/symbol_finder.cpp Normal file
View File

@ -0,0 +1,29 @@
#include "symbol_finder.h"
bool symbol_finder::visit_definition_expr(definition_expr* node) {
if (node->get_variable_name()) {
symbols.push_back(node->get_variable_name()->get_name());
} else {
for(auto i : node->get_variables()->get_variables()) {
symbols.push_back(i->get_name());
}
}
node->get_value()->accept(this);
return true;
}
bool symbol_finder::visit_function(function* node) {
return true;
}
bool symbol_finder::visit_iter_expr(iter_expr* node) {
if (node->get_name()) {
symbols.push_back(node->get_name()->get_name());
}
}
const std::vector<std::string>& symbol_finder::do_find(code_block* root) {
symbols.clear();
root->accept(this);
return symbols;
}

19
ast/symbol_finder.h Normal file
View File

@ -0,0 +1,19 @@
#pragma once
#include "nasal_new_ast.h"
#include "ast_visitor.h"
#include <cstring>
#include <sstream>
#include <vector>
class symbol_finder:public ast_visitor {
private:
std::vector<std::string> symbols;
public:
bool visit_definition_expr(definition_expr*) override;
bool visit_function(function*) override;
bool visit_iter_expr(iter_expr*) override;
const std::vector<std::string>& do_find(code_block*);
};

View File

@ -85,6 +85,7 @@ NASAL_NEW_AST=\
nasal_new_opcode.o\
nasal_new_parse.o\
optimizer.o\
symbol_finder.o\
ast_visitor.o\
ast_dumper.o\
nasal_new_main.o
@ -119,7 +120,7 @@ nasal_new_builtin.o: ast/nasal_new_builtin.h ast/nasal_new_builtin.cpp
$(CXX) -std=$(STD) -c -O3 ast/nasal_new_builtin.cpp -fno-exceptions -fPIC -o nasal_new_builtin.o -I .
nasal_new_codegen.o: ast/nasal_new_codegen.h ast/nasal_new_codegen.cpp
$(CXX) -std=$(STD) -c -O3 ast/nasal_new_codegen.cpp -fno-exceptions -fPIC -o nasal_new_codegen.o -I .
# $(CXX) -std=$(STD) -c -O3 ast/nasal_new_codegen.cpp -fno-exceptions -fPIC -o nasal_new_codegen.o -I .
nasal_new_opcode.o: ast/nasal_new_opcode.h ast/nasal_new_opcode.cpp
$(CXX) -std=$(STD) -c -O3 ast/nasal_new_opcode.cpp -fno-exceptions -fPIC -o nasal_new_opcode.o -I .
@ -130,6 +131,9 @@ nasal_new_parse.o: ast/nasal_new_parse.h ast/nasal_new_parse.cpp ast/nasal_new_a
optimizer.o: ast/optimizer.h ast/optimizer.cpp ast/nasal_new_ast.h
$(CXX) -std=$(STD) -c -O3 ast/optimizer.cpp -fno-exceptions -fPIC -o optimizer.o -I .
symbol_finder.o: ast/symbol_finder.h ast/symbol_finder.cpp ast/nasal_new_ast.h
$(CXX) -std=$(STD) -c -O3 ast/symbol_finder.cpp -fno-exceptions -fPIC -o symbol_finder.o -I .
ast_visitor.o: ast/nasal_new_ast.h ast/ast_visitor.h ast/ast_visitor.cpp
$(CXX) -std=$(STD) -c -O3 ast/ast_visitor.cpp -fno-exceptions -fPIC -o ast_visitor.o -I .