diff --git a/ast/nasal_new_ast.h b/ast/nasal_new_ast.h index 0def471..50e816b 100644 --- a/ast/nasal_new_ast.h +++ b/ast/nasal_new_ast.h @@ -247,7 +247,7 @@ public: void set_parameter_type(param_type pt) {type = pt;} void set_parameter_name(const std::string& pname) {name = pname;} void set_default_value(expr* node) {default_value = node;} - param_type get_type() {return type;} + param_type get_parameter_type() {return type;} const std::string& get_parameter_name() const {return name;} expr* get_default_value() {return default_value;} void accept(ast_visitor*) override; @@ -465,14 +465,14 @@ public: class multi_identifier:public expr { private: - std::vector variables; + std::vector variables; public: multi_identifier(const span& location): expr(location, expr_type::ast_multi_id) {} ~multi_identifier(); - void add_var(expr* node) {variables.push_back(node);} - std::vector& get_variables() {return variables;} + void add_var(identifier* node) {variables.push_back(node);} + std::vector& get_variables() {return variables;} void accept(ast_visitor*) override; }; diff --git a/ast/nasal_new_codegen.cpp b/ast/nasal_new_codegen.cpp index 84b8848..35ffeed 100644 --- a/ast/nasal_new_codegen.cpp +++ b/ast/nasal_new_codegen.cpp @@ -1 +1,976 @@ -#include "nasal_new_codegen.h" \ No newline at end of file +#include "nasal_new_codegen.h" + +bool codegen::check_memory_reachable(call_expr* node) { + if (node->get_type()==expr_type::ast_call) { + const ast& tmp=node.child().back(); + if (tmp->get_type()==expr_type::ast_callf) { + die("bad left-value with function call", node->get_location()); + return false; + } + if (tmp->get_type()==expr_type::ast_callv && (tmp.size()==0 || tmp.size()>1 || tmp[0]->get_type()==expr_type::ast_subvec)) { + die("bad left-value with subvec", node->get_location()); + return false; + } + } else if (node->get_type()!=ast_id) { + die("bad left-value", node->get_location()); + return false; + } + return true; +} + +void codegen::check_id_exist(identifier* node) { + const auto& name = node->get_name(); + for(u32 i=0;builtin[i].name;++i) { + if (builtin[i].name==name) { + if (local.empty()) { + die("useless native function used in global scope", node->get_location()); + } + return; + } + } + + if (local_find(name)>=0) { + return; + } + if (upvalue_find(name)>=0) { + return; + } + if (global_find(name)>=0) { + return; + } + die("undefined symbol \"" + name + "\", and this symbol is useless here", node->get_location()); +} + +void codegen::regist_num(const f64 num) { + if (!num_table.count(num)) { + u32 size=num_table.size(); + num_table[num]=size; + num_res.push_back(num); + } +} + +void codegen::regist_str(const std::string& str) { + if (!str_table.count(str)) { + u32 size=str_table.size(); + str_table[str]=size; + str_res.push_back(str); + } +} + +void codegen::find_symbol(code_block* node) { + auto finder = new symbol_finder; + for(const auto& i : finder->do_find(node)) { + add_sym(i); + } +} + +void codegen::add_sym(const std::string& name) { + if (local.empty()) { + if (global.count(name)) { + return; + } + i32 index=global.size(); + global[name]=index; + return; + } + if (local.back().count(name)) { + return; + } + i32 index=local.back().size(); + local.back()[name]=index; +} + +i32 codegen::local_find(const std::string& name) { + if (local.empty()) { + return -1; + } + return local.back().count(name)?local.back()[name]:-1; +} + +i32 codegen::global_find(const std::string& name) { + return global.count(name)?global[name]:-1; +} + +i32 codegen::upvalue_find(const std::string& name) { + // 32768 level 65536 upvalues + i32 index=-1; + usize size=local.size(); + if (size<=1) { + return -1; + } + auto iter=local.begin(); + for(u32 i=0;icount(name)) { + index=((i<<16)|(*iter)[name]); + } + } + return index; +} + +void codegen::gen(u8 op, u32 num, u32 line) { + code.push_back({op, fileindex, num, line}); +} + +void codegen::num_gen(number_literal* node) { + f64 num = node->get_number(); + regist_num(num); + gen(op_pnum,num_table[num], node->get_location().begin_line); +} + +void codegen::str_gen(string_literal* node) { + regist_str(node->get_content()); + gen(op_pstr, str_table[node->get_content()], node->get_location().begin_line); +} + +void codegen::bool_gen(bool_literal* node) { + f64 num = node->get_flag()?1:0; + regist_num(num); + gen(op_pnum, num_table[num], node->get_location().begin_line); +} + +void codegen::vec_gen(vector_expr* node) { + for(auto child : node->get_elements()) { + calc_gen(child); + } + gen(op_newv, node->get_elements().size(), node->get_location().begin_line); +} + +void codegen::hash_gen(hash_expr* node) { + gen(op_newh, 0, node->get_location().begin_line); + for(auto child : node->get_members()) { + calc_gen(child->get_value()); + const std::string& str=child->get_name(); + regist_str(str); + gen(op_happ, str_table[str], child->get_location().begin_line); + } +} + +void codegen::func_gen(function* node) { + // parameter list format check + bool checked_default = false; + bool checked_dynamic = false; + std::unordered_map argname; + for(auto tmp : node->get_parameter_list()) { + if (tmp->get_parameter_type()==parameter::param_type::default_parameter) { + checked_default=true; + } else if (tmp->get_parameter_type()==parameter::param_type::dynamic_parameter) { + checked_dynamic=true; + } + // check default parameter and dynamic parameter + if (checked_default && tmp->get_parameter_type()!=parameter::param_type::default_parameter) { + die("must use default parameters here", tmp->get_location()); + } + if (checked_dynamic && tmp!=node->get_parameter_list().back()) { + die("dynamic parameter must be the last one", tmp->get_location()); + } + // check redefinition + const auto& name = tmp->get_parameter_name(); + if (argname.count(name)) { + die("redefinition of parameter: "+name, tmp->get_location()); + } else { + argname[name]=true; + } + } + + usize newf=code.size(); + gen(op_newf, 0, node->get_location().begin_line); + usize lsize=code.size(); + gen(op_intl, 0, node->get_location().begin_line); + + // add special keyword 'me' into symbol table + // this symbol is only used in local scope(function's scope) + // this keyword is set to nil as default value + // after calling a hash, this keyword is set to this hash + // this symbol's index will be 0 + local.push_back({{"me", 0}}); + + // generate parameter list + for(auto& tmp : node->get_parameter_list()) { + const std::string& str=tmp.str(); + if (str=="me") { + die("\"me\" should not be a parameter", tmp->get_location()); + } + regist_str(str); + switch(tmp->get_type()) { + case expr_type::ast_id: + gen(op_para, str_table[str], tmp->get_location().begin_line); + break; + case expr_type::ast_default: + calc_gen(tmp[0]); + gen(op_deft, str_table[str], tmp->get_location().begin_line); + break; + case expr_type::ast_dynamic: + gen(op_dyn, str_table[str], tmp->get_location().begin_line); + break; + } + add_sym(str); + } + + code[newf].num=code.size()+1; // entry + usize jmp_ptr=code.size(); + gen(op_jmp, 0, node->get_location().begin_line); + + auto block = node->get_code_block(); + // search symbols first, must use after loading parameters + // or the location of symbols will change and cause fatal error + find_symbol(block); + in_iterloop.push(0); + block_gen(block); + in_iterloop.pop(); + code[lsize].num=local.back().size(); + if (local.back().size()>=STACK_DEPTH) { + die("too many local variants: "+std::to_string(local.back().size()), block->get_location()); + } + local.pop_back(); + + if (!block.size() || block.child().back()->get_type()!=ast_ret) { + gen(op_pnil, 0, block->get_location().begin_line); + gen(op_ret, 0, block->get_location().begin_line); + } + code[jmp_ptr].num=code.size(); +} + +void codegen::call_gen(call_expr* node) { + calc_gen(node->get_first()); + if (code.back().op==op_callb) { + return; + } + for(auto i : node->get_calls()) { + switch(i->get_type()) { + case expr_type::ast_callh: call_hash_gen((call_hash*)i); break; + case expr_type::ast_callv: call_vec((call_vector*)i); break; + case expr_type::ast_callf: call_func((call_function*)i); break; + } + } +} + +void codegen::call_id(identifier* node) { + const auto& name = node->get_location(); + for(u32 i=0;builtin[i].name;++i) { + if (builtin[i].name==str) { + gen(op_callb, i, node->get_location().begin_line); + if (local.empty()) { + die("should warp native function in local scope", node->get_location()); + } + return; + } + } + i32 index; + if ((index=local_find(str))>=0) { + gen(op_calll, index, node->get_location().begin_line); + return; + } + if ((index=upvalue_find(str))>=0) { + gen(op_upval, index, node->get_location().begin_line); + return; + } + if ((index=global_find(str))>=0) { + gen(op_callg, index, node->get_location().begin_line); + return; + } + die("undefined symbol \""+str+"\"", node->get_location()); +} + +void codegen::call_hash_gen(call_hash* node) { + regist_str(node.str()); + gen(op_callh, str_table[node.str()], node->get_location().begin_line); +} + +void codegen::call_vec(call_vector* node) { + // maybe this place can use callv-const if ast's first child is ast_num + if (node.size()==1 && node[0]->get_type()!=ast_subvec) { + calc_gen(node[0]); + gen(op_callv, 0, node[0]->get_location().begin_line); + return; + } + gen(op_slcbeg,0,node->get_location().begin_line); + for(auto& tmp:node.child()) { + if (tmp->get_type()!=ast_subvec) { + calc_gen(tmp); + gen(op_slc, 0, tmp->get_location().begin_line); + } else { + calc_gen(tmp[0]); + calc_gen(tmp[1]); + gen(op_slc2, 0, tmp->get_location().begin_line); + } + } + gen(op_slcend, 0, node->get_location().begin_line); +} + +void codegen::call_func(call_function* node) { + if (!node.size()) { + gen(op_callfv, 0, node->get_location().begin_line); + } else if (node[0]->get_type()==expr_type::ast_pair) { + hash_gen(node); + gen(op_callfh, 0, node->get_location().begin_line); + } else { + for(auto& child:node.child()) { + calc_gen(child); + } + gen(op_callfv, node.size(), node->get_location().begin_line); + } +} + +/* mcall should run after calc_gen because this operation may trigger gc. +* to avoid gc incorrectly collecting values that include the memory space(which will cause SIGSEGV), +* we must run ast[1] then we run ast[0] to get the memory space. +* at this time the value including the memory space can must be found alive. +* BUT in fact this method does not make much safety. +* so we use another way to avoid gc-caused SIGSEGV: reserve m-called value on stack. +* you could see the notes in `vm::opr_mcallv()`. +*/ +void codegen::mcall(call_expr* node) { + if (!check_memory_reachable(node)) { + return; + } + if (node->get_type()==expr_type::ast_id) { + mcall_id(node); + return; + } + if (node.size()==1) { // foreach and forindex use call-id ast to get mcall + mcall_id(node[0]); + return; + } + calc_gen(node[0]); + for(usize i=1;iget_type()) { + case expr_type::ast_callh:call_hash(tmp);break; + case expr_type::ast_callv:call_vec(tmp); break; + case expr_type::ast_callf:call_func(tmp);break; + } + } + const ast& tmp=node.child().back(); + if (tmp->get_type()==expr_type::ast_callh) { + mcall_hash(tmp); + } else if (tmp->get_type()==expr_type::ast_callv) { + mcall_vec(tmp); + } +} + +void codegen::mcall_id(identifier* node) { + const auto& name = node->get_name(); + for(u32 i=0;builtin[i].name;++i) { + if (builtin[i].name==name) { + die("cannot modify native function", node->get_location()); + return; + } + } + i32 index; + if ((index=local_find(name))>=0) { + gen(op_mcalll, index, node->get_location().begin_line); + return; + } + if ((index=upvalue_find(name))>=0) { + gen(op_mupval, index, node->get_location().begin_line); + return; + } + if ((index=global_find(name))>=0) { + gen(op_mcallg, index, node->get_location().begin_line); + return; + } + die("undefined symbol \"" + name + "\"", node->get_location()); +} + +void codegen::mcall_vec(call_vector* node) { + calc_gen(node[0]); + gen(op_mcallv, 0, node->get_location().begin_line); +} + +void codegen::mcall_hash(call_hash* node) { + regist_str(node.str()); + gen(op_mcallh, str_table[node.str()], node->get_location().begin_line); +} + +void codegen::single_def(definition_expr* node) { + const auto& str = node->get_variable_name()->get_name(); + calc_gen(node->get_value()); + local.empty()? + gen(op_loadg, global_find(str), node->get_location().begin_line): + gen(op_loadl, local_find(str), node->get_location().begin_line); +} + +void codegen::multi_def(definition_expr* node) { + auto& ids=node[0].child(); + usize size=ids.size(); + if (node[1]->get_type()==expr_type::ast_tuple) { // (var a,b,c)=(c,b,a); + auto& vals=node[1].child(); + for(usize i=0;iget_type()!=ast_id) { + die("cannot call identifier in multi-definition", ids[i]->get_location()); + continue; + } + calc_gen(vals[i]); + const std::string& str=ids[i].str(); + local.empty()? + gen(op_loadg, global_find(str), ids[i]->get_location().begin_line): + gen(op_loadl, local_find(str), ids[i]->get_location().begin_line); + } + } else { // (var a,b,c)=[0,1,2]; + calc_gen(node[1]); + for(usize i=0;iget_type()!=ast_id) { + die("cannot call identifier in multi-definition", ids[i]->get_location()); + continue; + } + gen(op_callvi, i, node[1]->get_location().begin_line); + const std::string& str=ids[i].str(); + local.empty()? + gen(op_loadg, global_find(str), ids[i]->get_location().begin_line): + gen(op_loadl, local_find(str), ids[i]->get_location().begin_line); + } + gen(op_pop, 0, node->get_location().begin_line); + } +} + +void codegen::def_gen(definition_expr* node) { + if (node[0]->get_type()==expr_type::ast_id && node[1]->get_type()==expr_type::ast_tuple) { + die("cannot accept too many values", node[1]->get_location()); + } else if (node[0]->get_type()==expr_type::ast_multi_id && node[1]->get_type()==expr_type::ast_tuple && node[0].size()get_location()); + } else if (node[0]->get_type()==expr_type::ast_multi_id && node[1]->get_type()==expr_type::ast_tuple && node[0].size()>node[1].size()) { + die("too many values in multi-definition", node[1]->get_location()); + } + node[0]->get_type()==expr_type::ast_id?single_def(node):multi_def(node); +} + +void codegen::multi_assign_gen(multi_assign* node) { + if (node[1]->get_type()==expr_type::ast_tuple && node[0].size()get_location()); + } else if (node[1]->get_type()==expr_type::ast_tuple && node[0].size()>node[1].size()) { + die("too many values in multi-assignment", node[1]->get_location()); + } + i32 size=node[0].size(); + if (node[1]->get_type()==expr_type::ast_tuple) { + for(i32 i=size-1;i>=0;--i) { + calc_gen(node[1][i]); + } + for(i32 i=0;iget_location().begin_line); + } + } + } else { + calc_gen(node[1]); + for(i32 i=0;iget_location().begin_line); + // multi assign user loadl and loadg to avoid meq's stack-- + // and this operation changes local and global value directly + mcall(node[0][i]); + if (code.back().op==op_mcalll) { + code.back().op=op_loadl; + } else if (code.back().op==op_mupval) { + code.back().op=op_loadu; + } else if (code.back().op==op_mcallg) { + code.back().op=op_loadg; + } else { + gen(op_meq, 1, node[0][i]->get_location().begin_line); + } + } + gen(op_pop, 0, node->get_location().begin_line); + } +} + +void codegen::cond_gen(condition_expr* node) { + std::vector jmp_label; + for(auto& tmp:node.child()) { + if (tmp->get_type()==expr_type::ast_if || tmp->get_type()==expr_type::ast_elsif) { + calc_gen(tmp[0]); + usize ptr=code.size(); + gen(op_jf, 0, tmp->get_location().begin_line); + block_gen(tmp[1]); + // without 'else' the last condition doesn't need to jmp + if (&tmp!=&node.child().back()) { + jmp_label.push_back(code.size()); + gen(op_jmp, 0, tmp->get_location().begin_line); + } + code[ptr].num=code.size(); + } else { + block_gen(tmp[0]); + break; + } + } + for(auto i:jmp_label) { + code[i].num=code.size(); + } +} + +void codegen::loop_gen(expr* node) { + continue_ptr.push_front({}); + break_ptr.push_front({}); + switch(node->get_type()) { + case expr_type::ast_while: while_gen(node); break; + case expr_type::ast_for: for_gen(node); break; + case expr_type::ast_forindex:forindex_gen(node);break; + case expr_type::ast_foreach: foreach_gen(node); break; + } +} + +void codegen::load_continue_break(i32 continue_place,i32 break_place) { + for(auto i:continue_ptr.front()) { + code[i].num=continue_place; + } + for(auto i:break_ptr.front()) { + code[i].num=break_place; + } + continue_ptr.pop_front(); + break_ptr.pop_front(); +} + +void codegen::while_gen(while_expr* node) { + usize loop_ptr=code.size(); + calc_gen(node[0]); + usize condition_ptr=code.size(); + gen(op_jf, 0, node[0]->get_location().begin_line); + + block_gen(node[1]); + gen(op_jmp, loop_ptr, node[1]->get_location().begin_line); + code[condition_ptr].num=code.size(); + load_continue_break(code.size()-1, code.size()); +} + +void codegen::for_gen(for_expr* node) { + expr_gen(node[0]); + usize jmp_place=code.size(); + if (node[1]->get_type()==expr_type::ast_null) { + gen(op_pnum, num_table[1], node[1]->get_location().begin_line); + } else { + calc_gen(node[1]); + } + usize label_exit=code.size(); + gen(op_jf, 0, node[1]->get_location().begin_line); + + block_gen(node[3]); + usize continue_place=code.size(); + expr_gen(node[2]); + gen(op_jmp, jmp_place, node[2]->get_location().begin_line); + code[label_exit].num=code.size(); + + load_continue_break(continue_place, code.size()); +} + +void codegen::expr_gen(expr* node) { + switch(node->get_type()) { + case expr_type::ast_null:break; + case expr_type::ast_def:def_gen(node);break; + case expr_type::ast_multi_assign:multi_assign_gen(node);break; + case expr_type::ast_addeq:case expr_type::ast_subeq: + case expr_type::ast_multeq:case expr_type::ast_diveq:case expr_type::ast_lnkeq: + case expr_type::ast_btandeq:case expr_type::ast_btoreq:case expr_type::ast_btxoreq: + calc_gen(node); + if (op_addeq<=code.back().op && code.back().op<=op_btxoreq) { + code.back().num=1; + } else if (op_addeqc<=code.back().op && code.back().op<=op_lnkeqc) { + code.back().op=code.back().op-op_addeqc+op_addecp; + } else { + gen(op_pop, 0, node->get_location().begin_line); + } + break; + case expr_type::ast_nil:case expr_type::ast_num:case expr_type::ast_str:case expr_type::ast_bool:break; + case expr_type::ast_vec:case expr_type::ast_hash:case expr_type::ast_func:case expr_type::ast_call: + case expr_type::ast_neg:case expr_type::ast_lnot:case expr_type::ast_bnot: + case expr_type::ast_bitor:case expr_type::ast_bitxor:case expr_type::ast_bitand: + case expr_type::ast_add:case expr_type::ast_sub:case expr_type::ast_mult:case expr_type::ast_div:case expr_type::ast_link: + case expr_type::ast_cmpeq:case expr_type::ast_neq: + case expr_type::ast_leq:case expr_type::ast_less: + case expr_type::ast_geq:case expr_type::ast_grt: + case expr_type::ast_or:case expr_type::ast_and: + case expr_type::ast_trino: + calc_gen(node); + gen(op_pop, 0, node->get_location().begin_line); + break; + case expr_type::ast_equal: + if (node[0]->get_type()==expr_type::ast_id) { + calc_gen(node[1]); + mcall_id(node[0]); + // only the first mcall_id can use load + if (code.back().op==op_mcalll) { + code.back().op=op_loadl; + } else if (code.back().op==op_mupval) { + code.back().op=op_loadu; + } else { + code.back().op=op_loadg; + } + } else { + calc_gen(node); + if (code.back().op==op_meq) { + code.back().num=1; + } else { + gen(op_pop, 0, node->get_location().begin_line); + } + } + break; + } +} + +void codegen::forindex_gen(forei_expr* node) { + calc_gen(node[1]); + gen(op_cnt, 0, node[1]->get_location().begin_line); + usize ptr=code.size(); + gen(op_findex, 0, node->get_location().begin_line); + if (node[0]->get_type()==expr_type::ast_iter) { // define a new iterator + const std::string& str=node[0][0].str(); + local.empty()? + gen(op_loadg, global_find(str), node[0][0]->get_location().begin_line): + gen(op_loadl, local_find(str), node[0][0]->get_location().begin_line); + } else { // use exist variable as the iterator + mcall(node[0]); + if (code.back().op==op_mcallg) { + code.back().op=op_loadg; + } else if (code.back().op==op_mcalll) { + code.back().op=op_loadl; + } else if (code.back().op==op_mupval) { + code.back().op=op_loadu; + } else { + gen(op_meq, 1, node[0]->get_location().begin_line); + } + } + ++in_iterloop.top(); + block_gen(node[2]); + --in_iterloop.top(); + gen(op_jmp, ptr, node->get_location().begin_line); + code[ptr].num=code.size(); + load_continue_break(code.size()-1, code.size()); + gen(op_pop, 0, node[1]->get_location().begin_line);// pop vector + gen(op_pop, 0, node->get_location().begin_line);// pop iterator +} + +void codegen::foreach_gen(forei_expr* node) { + calc_gen(node[1]); + gen(op_cnt, 0, node->get_location().begin_line); + usize ptr=code.size(); + gen(op_feach, 0, node->get_location().begin_line); + if (node[0]->get_type()==expr_type::ast_iter) { // define a new iterator + const std::string& str=node[0][0].str(); + local.empty()? + gen(op_loadg, global_find(str), node[0][0]->get_location().begin_line): + gen(op_loadl, local_find(str), node[0][0]->get_location().begin_line); + } else { // use exist variable as the iterator + mcall(node[0]); + if (code.back().op==op_mcallg) { + code.back().op=op_loadg; + } else if (code.back().op==op_mcalll) { + code.back().op=op_loadl; + } else if (code.back().op==op_mupval) { + code.back().op=op_loadu; + } else { + gen(op_meq, 1, node[0]->get_location().begin_line); + } + } + ++in_iterloop.top(); + block_gen(node[2]); + --in_iterloop.top(); + gen(op_jmp, ptr, node->get_location().begin_line); + code[ptr].num=code.size(); + load_continue_break(code.size()-1, code.size()); + gen(op_pop, 0, node[1]->get_location().begin_line);// pop vector + gen(op_pop, 0, node->get_location().begin_line);// pop iterator +} + +void codegen::or_gen(binary_operator* node) { + calc_gen(node->get_left()); + usize l1 = code.size(); + gen(op_jt, 0, node->get_left()->get_location().begin_line); + + gen(op_pop, 0, node->get_left()->get_location().begin_line); + calc_gen(node->get_right()); + usize l2=code.size(); + gen(op_jt, 0, node->get_right()->get_location().begin_line); + + gen(op_pop, 0, node->get_right()->get_location().begin_line); + gen(op_pnil, 0, node->get_right()->get_location().begin_line); + + code[l1].num = code[l2].num = code.size(); +} + +void codegen::and_gen(binary_operator* node) { + calc_gen(node->get_left()); + gen(op_jt, code.size()+2, node->get_left()->get_location().begin_line); + + usize lfalse=code.size(); + gen(op_jmp, 0, node->get_left()->get_location().begin_line); + gen(op_pop, 0, node->get_right()->get_location().begin_line);// jt jumps here + + calc_gen(node->get_right()); + gen(op_jt, code.size()+3, node->get_right()->get_location().begin_line); + + code[lfalse].num=code.size(); + gen(op_pop, 0, node->get_right()->get_location().begin_line); + gen(op_pnil, 0, node->get_right()->get_location().begin_line); + // jt jumps here +} + +void codegen::trino_gen(ternary_operator* node) { + calc_gen(node->get_condition()); + usize lfalse=code.size(); + gen(op_jf, 0, node->get_condition()->get_location().begin_line); + calc_gen(node->get_left()); + usize lexit=code.size(); + gen(op_jmp, 0, node->get_left()->get_location().begin_line); + code[lfalse].num=code.size(); + calc_gen(node->get_right()); + code[lexit].num=code.size(); +} + +void codegen::calc_gen(expr* node) { + switch(node->get_type()) { + case expr_type::ast_nil: gen(op_pnil,0,node->get_location().begin_line);break; + case expr_type::ast_num: num_gen(node); break; + case expr_type::ast_str: str_gen(node); break; + case expr_type::ast_id: call_id(node); break; + case expr_type::ast_bool: bool_gen(node); break; + case expr_type::ast_vec: vec_gen(node); break; + case expr_type::ast_hash: hash_gen(node); break; + case expr_type::ast_func: func_gen(node); break; + case expr_type::ast_call: call_gen(node); break; + case expr_type::ast_equal: + calc_gen(node[1]); + mcall(node[0]); + gen(op_meq, 0, node->get_location().begin_line); + break; + // ast_addeq(22)~ast_lnkeq(26) op_addeq(23)~op_lnkeq(27) + case expr_type::ast_addeq:case expr_type::ast_subeq:case expr_type::ast_multeq:case expr_type::ast_diveq: + if (node[1]->get_type()!=ast_num) { + calc_gen(node[1]); + } + mcall(node[0]); + if (node[1]->get_type()!=ast_num) { + gen(node->get_type()-ast_addeq+op_addeq, 0, node->get_location().begin_line); + } else { + regist_num(node[1].num()); + gen(node->get_type()-ast_addeq+op_addeqc, num_table[node[1].num()], node->get_location().begin_line); + } + break; + case expr_type::ast_lnkeq: + if (node[1]->get_type()!=ast_str) { + calc_gen(node[1]); + } else { + regist_str(node[1].str()); + } + mcall(node[0]); + if (node[1]->get_type()!=ast_str) { + gen(op_lnkeq, 0, node->get_location().begin_line); + } else { + gen(op_lnkeqc, str_table[node[1].str()], node->get_location().begin_line); + } + break; + case expr_type::ast_btandeq:case expr_type::ast_btoreq:case expr_type::ast_btxoreq: + calc_gen(node[1]); + mcall(node[0]); + gen(node->get_type()-ast_btandeq+op_btandeq, 0, node->get_location().begin_line); + break; + case expr_type::ast_or:or_gen(node);break; + case expr_type::ast_and:and_gen(node);break; + // ast_add(33)~ast_link(37) op_add(18)~op_lnk(22) + case expr_type::ast_add:case expr_type::ast_sub:case expr_type::ast_mult:case expr_type::ast_div: + calc_gen(node[0]); + if (node[1]->get_type()!=ast_num) { + calc_gen(node[1]); + gen(node->get_type()-ast_add+op_add, 0, node->get_location().begin_line); + } else { + regist_num(node[1].num()); + gen(node->get_type()-ast_add+op_addc, num_table[node[1].num()], node->get_location().begin_line); + } + break; + case expr_type::ast_link: + calc_gen(node[0]); + if (node[1]->get_type()!=ast_str) { + calc_gen(node[1]); + gen(op_lnk, 0, node->get_location().begin_line); + } else { + regist_str(node[1].str()); + gen(op_lnkc, str_table[node[1].str()], node->get_location().begin_line); + } + break; + // ast_cmpeq(27)~ast_geq(32) op_eq(29)~op_geq(34) + case expr_type::ast_cmpeq:case expr_type::ast_neq: + calc_gen(node[0]); + calc_gen(node[1]); + gen(node->get_type()-ast_cmpeq+op_eq, 0, node->get_location().begin_line); + break; + case expr_type::ast_less:case expr_type::ast_leq:case expr_type::ast_grt:case expr_type::ast_geq: + calc_gen(node[0]); + if (node[1]->get_type()!=ast_num) { + calc_gen(node[1]); + gen(node->get_type()-ast_less+op_less, 0, node->get_location().begin_line); + } else { + regist_num(node[1].num()); + gen(node->get_type()-ast_less+op_lessc, num_table[node[1].num()], node->get_location().begin_line); + } + break; + case expr_type::ast_trino:trino_gen(node);break; + case expr_type::ast_neg: + calc_gen(node[0]); + gen(op_usub, 0, node->get_location().begin_line); + break; + case expr_type::ast_lnot: + calc_gen(node[0]); + gen(op_lnot, 0, node->get_location().begin_line); + break; + case expr_type::ast_bnot: + calc_gen(node[0]); + gen(op_bnot, 0, node->get_location().begin_line); + break; + case expr_type::ast_bitor: + calc_gen(node[0]); + calc_gen(node[1]); + gen(op_btor, 0, node->get_location().begin_line); + break; + case expr_type::ast_bitxor: + calc_gen(node[0]); + calc_gen(node[1]); + gen(op_btxor, 0, node->get_location().begin_line); + break; + case expr_type::ast_bitand: + calc_gen(node[0]); + calc_gen(node[1]); + gen(op_btand, 0, node->get_location().begin_line); + break; + case expr_type::ast_def: + single_def(node); + call_id(node[0]); + break; + } +} + +void codegen::block_gen(code_block* node) { + for(auto& tmp:node.child()) { + switch(tmp->get_type()) { + case expr_type::ast_null:break; + case expr_type::ast_id:check_id_exist(tmp);break; + case expr_type::ast_nil:case expr_type::ast_num:case expr_type::ast_str:case expr_type::ast_bool:break; + case expr_type::ast_file:fileindex=tmp.num();break; // special node type in main block + case expr_type::ast_cond:cond_gen(tmp);break; + case expr_type::ast_continue: + continue_ptr.front().push_back(code.size()); + gen(op_jmp, 0, tmp->get_location().begin_line); + break; + case expr_type::ast_break: + break_ptr.front().push_back(code.size()); + gen(op_jmp, 0, tmp->get_location().begin_line); + break; + case expr_type::ast_while: + case expr_type::ast_for: + case expr_type::ast_forindex: + case expr_type::ast_foreach:loop_gen(tmp);break; + case expr_type::ast_equal: + case expr_type::ast_addeq:case expr_type::ast_subeq: + case expr_type::ast_multeq:case expr_type::ast_diveq:case expr_type::ast_lnkeq: + case expr_type::ast_btandeq:case expr_type::ast_btoreq:case expr_type::ast_btxoreq: + case expr_type::ast_vec:case expr_type::ast_hash:case expr_type::ast_func:case expr_type::ast_call: + case expr_type::ast_neg:case expr_type::ast_lnot:case expr_type::ast_bnot: + case expr_type::ast_bitor:case expr_type::ast_bitxor:case expr_type::ast_bitand: + case expr_type::ast_add:case expr_type::ast_sub:case expr_type::ast_mult:case expr_type::ast_div:case expr_type::ast_link: + case expr_type::ast_cmpeq:case expr_type::ast_neq: + case expr_type::ast_leq:case expr_type::ast_less: + case expr_type::ast_geq:case expr_type::ast_grt: + case expr_type::ast_or: + case expr_type::ast_and: + case expr_type::ast_trino: + case expr_type::ast_def: + case expr_type::ast_multi_assign:expr_gen(tmp);break; + case expr_type::ast_ret:ret_gen(tmp);break; + } + } +} + +void codegen::ret_gen(const ast& node) { + for(u32 i=0;iget_location().begin_line); + gen(op_pop, 0, node->get_location().begin_line); + } + if (node.size()) { + calc_gen(node[0]); + } else { + gen(op_pnil, 0, node->get_location().begin_line); + } + gen(op_ret, 0, node->get_location().begin_line); +} + +const error& codegen::compile(const parse& parse, const linker& import) { + fileindex=0; + file=import.filelist().data(); + in_iterloop.push(0); + find_symbol(parse.tree()); // search symbols first + gen(op_intg, global.size(), 0); + block_gen(parse.tree()); // generate main block + gen(op_exit, 0, 0); + + // size out of bound check + if (num_res.size()>0xffffff) { + err.load(file[0]); // load main execute file + err.err("code", "too many constant numbers: "+std::to_string(num_res.size())); + } + if (str_res.size()>0xffffff) { + err.load(file[0]); // load main execute file + err.err("code", "too many constant strings: "+std::to_string(str_res.size())); + } + if (global.size()>=STACK_DEPTH) { + err.load(file[0]); // load main execute file + err.err("code", "too many global variants: "+std::to_string(global.size())); + } + if (code.size()>0xffffff) { + err.load(file[0]); // load main execute file + err.err("code", "bytecode size overflow: "+std::to_string(code.size())); + } + return err; +} + +void codegen::print() { + // func end stack, reserved for code print + std::stack fbstk; + std::stack festk; + + // print const numbers + for(auto& num:num_res) { + std::cout<<" .number "<;\n"; + // avoid two empty lines + if (c.op!=op_newf) { + std::cout<<"\n"; + } + fbstk.pop(); + festk.pop(); + } + + // get function begin index and end index + if (c.op==op_newf) { + std::cout<:\n"; + for(u32 j=i;j +#include +#include +#include + +#ifdef _MSC_VER +#pragma warning (disable:4244) +#pragma warning (disable:4267) +#endif + +class codegen { +private: + u16 fileindex; + error& err; + const std::string* file; + std::stack in_iterloop; + std::unordered_map num_table; + std::unordered_map str_table; + std::vector num_res; + std::vector str_res; + std::vector code; + std::list> continue_ptr; + std::list> break_ptr; + + // symbol table + // global : max STACK_DEPTH-1 values + std::unordered_map global; + // local : max 32768 upvalues 65536 values + // but in fact local scope also has less than STACK_DEPTH value + std::list> local; + + bool check_memory_reachable(call_expr*); + void check_id_exist(identifier*); + + void die(const std::string& info, const span& loc) { + err.err("code", loc, info); + } + + void regist_num(const f64); + void regist_str(const std::string&); + void find_symbol(code_block*); + void add_sym(const std::string&); + i32 local_find(const std::string&); + i32 global_find(const std::string&); + i32 upvalue_find(const std::string&); + + void gen(u8, u32, u32); + + void num_gen(number_literal*); + void str_gen(string_literal*); + void bool_gen(bool_literal*); + void vec_gen(vector_expr*); + void hash_gen(hash_expr*); + void func_gen(function*); + void call_gen(call_expr*); + void call_id(identifier*); + void call_hash_gen(call_hash*); + void call_vec(call_vector*); + void call_func(call_function*); + void mcall(call_expr*); + void mcall_id(identifier*); + void mcall_vec(call_vector*); + void mcall_hash(call_hash*); + void multi_def(definition_expr*); + void single_def(definition_expr*); + void def_gen(definition_expr*); + void multi_assign_gen(multi_assign*); + void cond_gen(condition_expr*); + void loop_gen(expr*); + void load_continue_break(i32, i32); + void while_gen(while_expr*); + void for_gen(for_expr*); + void expr_gen(expr*); + void forindex_gen(forei_expr*); + void foreach_gen(forei_expr*); + void or_gen(binary_operator*); + void and_gen(binary_operator*); + void trino_gen(ternary_operator*); + void calc_gen(expr*); + void block_gen(code_block*); + void ret_gen(return_expr*); + +public: + codegen(error& e): fileindex(0), err(e), file(nullptr) {} + const error& compile(const parse&, const linker&); + void print(); + const std::vector& strs() const {return str_res;} + const std::vector& nums() const {return num_res;} + const std::vector& codes() const {return code;} +}; diff --git a/ast/nasal_new_main.cpp b/ast/nasal_new_main.cpp index 75fbf95..1a2aeaf 100644 --- a/ast/nasal_new_main.cpp +++ b/ast/nasal_new_main.cpp @@ -6,6 +6,7 @@ #include "nasal_new_import.h" #include "ast_visitor.h" #include "ast_dumper.h" +#include "symbol_finder.h" #include "optimizer.h" #include @@ -102,6 +103,11 @@ void execute( opt->do_optimization(parse.tree()); delete opt; + auto finder = new symbol_finder; + for(const auto& symbol : finder->do_find(parse.tree())) { + std::cout << symbol << std::endl; + } + // code generator gets parser's ast and import file list to generate code // gen.compile(parse, ld).chkerr(); // if (cmd&VM_CODE) { diff --git a/ast/nasal_new_opcode.cpp b/ast/nasal_new_opcode.cpp index bbe9ef5..6004308 100644 --- a/ast/nasal_new_opcode.cpp +++ b/ast/nasal_new_opcode.cpp @@ -3,11 +3,11 @@ void codestream::set( const f64* numbuff, const std::string* strbuff, - const std::string* filelist=nullptr + const std::string* filelist ) { - nums=numbuff; - strs=strbuff; - files=filelist; + nums = numbuff; + strs = strbuff; + files = filelist; } void codestream::dump(std::ostream& out) const { diff --git a/ast/nasal_new_opcode.h b/ast/nasal_new_opcode.h index e2428f6..c34d831 100644 --- a/ast/nasal_new_opcode.h +++ b/ast/nasal_new_opcode.h @@ -139,7 +139,7 @@ private: inline static const std::string* files = nullptr; public: codestream(const opcode& c, const u32 i): code(c), index(i) {} - static void set(const f64*, const std::string*, const std::string*); + static void set(const f64*, const std::string*, const std::string* filelist = nullptr); void dump(std::ostream&) const; }; diff --git a/ast/nasal_new_parse.cpp b/ast/nasal_new_parse.cpp index 5660f36..c4398e0 100644 --- a/ast/nasal_new_parse.cpp +++ b/ast/nasal_new_parse.cpp @@ -774,8 +774,7 @@ multi_identifier* parse::multi_id() { auto node = new multi_identifier(toks[ptr].loc); while(!lookahead(tok::eof)) { // only identifier is allowed here - // but we check it at codegen stage - node->add_var(calc()); + node->add_var(id()); if (lookahead(tok::comma)) { match(tok::comma); } else if (lookahead(tok::id)) { // first set of identifier diff --git a/ast/symbol_finder.cpp b/ast/symbol_finder.cpp new file mode 100644 index 0000000..7b2cdf2 --- /dev/null +++ b/ast/symbol_finder.cpp @@ -0,0 +1,29 @@ +#include "symbol_finder.h" + +bool symbol_finder::visit_definition_expr(definition_expr* node) { + if (node->get_variable_name()) { + symbols.push_back(node->get_variable_name()->get_name()); + } else { + for(auto i : node->get_variables()->get_variables()) { + symbols.push_back(i->get_name()); + } + } + node->get_value()->accept(this); + return true; +} + +bool symbol_finder::visit_function(function* node) { + return true; +} + +bool symbol_finder::visit_iter_expr(iter_expr* node) { + if (node->get_name()) { + symbols.push_back(node->get_name()->get_name()); + } +} + +const std::vector& symbol_finder::do_find(code_block* root) { + symbols.clear(); + root->accept(this); + return symbols; +} \ No newline at end of file diff --git a/ast/symbol_finder.h b/ast/symbol_finder.h new file mode 100644 index 0000000..7444644 --- /dev/null +++ b/ast/symbol_finder.h @@ -0,0 +1,19 @@ +#pragma once + +#include "nasal_new_ast.h" +#include "ast_visitor.h" + +#include +#include +#include + +class symbol_finder:public ast_visitor { +private: + std::vector symbols; + +public: + bool visit_definition_expr(definition_expr*) override; + bool visit_function(function*) override; + bool visit_iter_expr(iter_expr*) override; + const std::vector& do_find(code_block*); +}; \ No newline at end of file diff --git a/makefile b/makefile index 89209bb..e8e9bbe 100644 --- a/makefile +++ b/makefile @@ -85,6 +85,7 @@ NASAL_NEW_AST=\ nasal_new_opcode.o\ nasal_new_parse.o\ optimizer.o\ + symbol_finder.o\ ast_visitor.o\ ast_dumper.o\ nasal_new_main.o @@ -119,7 +120,7 @@ nasal_new_builtin.o: ast/nasal_new_builtin.h ast/nasal_new_builtin.cpp $(CXX) -std=$(STD) -c -O3 ast/nasal_new_builtin.cpp -fno-exceptions -fPIC -o nasal_new_builtin.o -I . nasal_new_codegen.o: ast/nasal_new_codegen.h ast/nasal_new_codegen.cpp - $(CXX) -std=$(STD) -c -O3 ast/nasal_new_codegen.cpp -fno-exceptions -fPIC -o nasal_new_codegen.o -I . +# $(CXX) -std=$(STD) -c -O3 ast/nasal_new_codegen.cpp -fno-exceptions -fPIC -o nasal_new_codegen.o -I . nasal_new_opcode.o: ast/nasal_new_opcode.h ast/nasal_new_opcode.cpp $(CXX) -std=$(STD) -c -O3 ast/nasal_new_opcode.cpp -fno-exceptions -fPIC -o nasal_new_opcode.o -I . @@ -130,6 +131,9 @@ nasal_new_parse.o: ast/nasal_new_parse.h ast/nasal_new_parse.cpp ast/nasal_new_a optimizer.o: ast/optimizer.h ast/optimizer.cpp ast/nasal_new_ast.h $(CXX) -std=$(STD) -c -O3 ast/optimizer.cpp -fno-exceptions -fPIC -o optimizer.o -I . +symbol_finder.o: ast/symbol_finder.h ast/symbol_finder.cpp ast/nasal_new_ast.h + $(CXX) -std=$(STD) -c -O3 ast/symbol_finder.cpp -fno-exceptions -fPIC -o symbol_finder.o -I . + ast_visitor.o: ast/nasal_new_ast.h ast/ast_visitor.h ast/ast_visitor.cpp $(CXX) -std=$(STD) -c -O3 ast/ast_visitor.cpp -fno-exceptions -fPIC -o ast_visitor.o -I .