From fa618eb97fbc91e0bd953935f53e7d569af556b7 Mon Sep 17 00:00:00 2001 From: Li Haokun Date: Tue, 3 Aug 2021 18:55:11 +0800 Subject: [PATCH] variables can be used before definition change program to command line change trace back info change print function of nasal_vec and nasal_hash --- README.md | 4 ++ main.cpp | 112 ++++++++++++++++++++++-------------------------- nasal_builtin.h | 71 +++++++++++++++--------------- nasal_codegen.h | 38 +++++++++++++--- nasal_gc.h | 21 ++++++--- nasal_lexer.h | 2 +- nasal_vm.h | 70 ++++++++++++++++-------------- 7 files changed, 178 insertions(+), 140 deletions(-) diff --git a/README.md b/README.md index 0d7272d..92dbe27 100644 --- a/README.md +++ b/README.md @@ -844,5 +844,9 @@ You will get an error of 'undefined symbol', instead of nothing happening in mos This change is __controversial__ among FGPRC's members. So maybe in the future i will use dynamic analysis again to cater to the habits of senior programmers. +(2021/8/3 update) __Now i use scanning ast twice to reload symbols. +So this difference does not exist from this update.__ +But a new difference is that if you call a variable before defining it, you'll get nil instead of 'undefined error'. + In this new interpreter, function doesn't put dynamic arguments into vector 'arg' automatically. So if you use 'arg' without definition, you'll get an error of 'undefined symbol'. diff --git a/main.cpp b/main.cpp index ff61bef..4fdd3b3 100644 --- a/main.cpp +++ b/main.cpp @@ -1,42 +1,30 @@ #include "nasal.h" -void help_interact() -{ - std::cout - <<">> [ ] input a file name to execute. \n" - <<">> [help ] show help. \n" - <<">> [ast ] view abstract syntax tree. \n" - <<">> [code ] view byte code. \n" - <<">> [exec ] execute program on bytecode vm.\n" - <<">> [logo ] print logo of nasal . \n" - <<">> [exit ] quit nasal interpreter. \n"; - return; -} void help_cmd() { std::cout #ifdef _WIN32 <<"use command \'chcp 65001\' if want to use unicode.\n" #endif - <<"nasal [option]|[file]\n" - <<" input 0 argument to use the interactive interpreter.\n" + <<"nasal [option]\n" <<"option:\n" <<" -h, --help | get help.\n" - <<" -v, --version | get version of nasal interpreter.\n" + <<" -v, --version | get version of nasal interpreter.\n\n" + <<"nasal [file]\n" + <<"file:\n" + <<" input file name to execute script file.\n\n" + <<"nasal [option] [file]\n" + <<"option:\n" + <<" --lex | view token info.\n" + <<" --ast | view abstract syntax tree.\n" + <<" --code | view bytecode.\n" + <<" --exec | execute script file.\n" + <<" --time | execute and get the running time.\n" <<"file:\n" <<" input file name to execute script file.\n"; return; } -void info() -{ - std::cout - <<">> thanks to https://github.com/andyross/nasal\n" - <<">> code: https://github.com/ValKmjolnir/Nasal-Interpreter\n" - <<">> code: https://gitee.com/valkmjolnir/Nasal-Interpreter\n" - <<">> info: http://wiki.flightgear.org/Nasal_scripting_language\n" - <<">> input \"help\" to get help .\n"; - return; -} + void logo() { std::cout @@ -45,14 +33,21 @@ void logo() <<" / \\/ / _` / __|/ _` | | \n" <<" / /\\ / (_| \\__ \\ (_| | | \n" <<" \\_\\ \\/ \\__,_|___/\\__,_|_|\n" - <<" nasal interpreter ver 7.0 \n"; + <<"nasal interpreter ver 7.0\n" + <<"thanks to : https://github.com/andyross/nasal\n" + <<"code repo : https://github.com/ValKmjolnir/Nasal-Interpreter\n" + <<"code repo : https://gitee.com/valkmjolnir/Nasal-Interpreter\n" + <<"lang info : http://wiki.flightgear.org/Nasal_scripting_language\n" + <<"input \"nasal -h\" to get help .\n"; return; } + void die(const char* stage,std::string& filename) { - std::cout<<">> ["<: error(s) occurred,stop.\n"; + std::cout<<">> ["<: error(s) occurred,stop.\n"; return; } + void execute(std::string& file,std::string& command) { nasal_lexer lexer; @@ -67,13 +62,18 @@ void execute(std::string& file,std::string& command) die("lexer",file); return; } + if(command=="--lex") + { + lexer.print_token(); + return; + } parse.main_process(lexer.get_token_list()); if(parse.get_error()) { die("parse",file); return; } - if(command=="ast") + if(command=="--ast") { parse.get_root().print_ast(0); return; @@ -91,7 +91,7 @@ void execute(std::string& file,std::string& command) die("codegen",file); return; } - if(command=="code") + if(command=="--code") { codegen.print_byte_code(); return; @@ -101,55 +101,45 @@ void execute(std::string& file,std::string& command) codegen.get_num_table(), import.get_file() ); - vm.run(codegen.get_exec_code()); + if(command=="--exec") + vm.run(codegen.get_exec_code()); + else if(command=="--time") + { + clock_t begin=clock(); + vm.run(codegen.get_exec_code()); + std::cout<<"process exited after "<<((double)(clock()-begin))/CLOCKS_PER_SEC<<"s.\n"; + } vm.clear(); return; } -void interact() -{ -#ifdef _WIN32 - // use chcp 65001 to use unicode io - system("chcp 65001"); -#endif - std::string command,file="null"; - logo(); - info(); - while(1) - { - std::cout<<">> "; - std::cin>>command; - if(command=="help") - help_interact(); - else if(command=="logo") - logo(); - else if(command=="exit") - return; - else if(command=="ast" || command=="code" || command=="exec") - execute(file,command); - else - file=command; - } -} int main(int argc,const char* argv[]) { std::string command,file="null"; - if(argc==1) - interact(); - else if(argc==2 && (!strcmp(argv[1],"-v") || !strcmp(argv[1],"--version"))) + if(argc==2 && (!strcmp(argv[1],"-v") || !strcmp(argv[1],"--version"))) logo(); else if(argc==2 && (!strcmp(argv[1],"-h") || !strcmp(argv[1],"--help"))) help_cmd(); else if(argc==2 && argv[1][0]!='-') { file=argv[1]; - command="exec"; + command="--exec"; + execute(file,command); + } + else if(argc==3 && + (!strcmp(argv[1],"--lex") || + !strcmp(argv[1],"--ast") || + !strcmp(argv[1],"--code") || + !strcmp(argv[1],"--exec") || + !strcmp(argv[1],"--time"))) + { + file=argv[2]; + command=argv[1]; execute(file,command); - return 0; } else { std::cout - <<"invalid command.\n" + <<"invalid argument(s).\n" <<"use nasal -h to get help.\n"; } return 0; diff --git a/nasal_builtin.h b/nasal_builtin.h index e65c2c6..07f1961 100644 --- a/nasal_builtin.h +++ b/nasal_builtin.h @@ -175,7 +175,7 @@ nasal_val* builtin_system(std::vector& local_scope,nasal_gc& gc) builtin_err("system","\"str\" must be string"); return nullptr; } - ret_addr->ptr.num=(double)system(str_addr->ptr.str->data()); + ret_addr->ptr.num=(double)system(str_addr->ptr.str->c_str()); return ret_addr; } @@ -252,37 +252,38 @@ nasal_val* builtin_fout(std::vector& local_scope,nasal_gc& gc) nasal_val* builtin_split(std::vector& local_scope,nasal_gc& gc) { - nasal_val* delimeter_val_addr=local_scope[1]; - nasal_val* string_val_addr=local_scope[2]; - if(delimeter_val_addr->type!=vm_str) + nasal_val* deli_val_addr=local_scope[1]; + nasal_val* str_val_addr=local_scope[2]; + if(deli_val_addr->type!=vm_str) { builtin_err("split","\"delimeter\" must be string"); return nullptr; } - if(string_val_addr->type!=vm_str) + if(str_val_addr->type!=vm_str) { builtin_err("split","\"string\" must be string"); return nullptr; } - std::string delimeter=*delimeter_val_addr->ptr.str; - std::string source=*string_val_addr->ptr.str; - int delimeter_len=delimeter.length(); - int source_len=source.length(); - nasal_val* ret_addr=gc.builtin_alloc(vm_vec); - std::vector& ref_vec=ret_addr->ptr.vec->elems; - std::string tmp=""; + std::string& delimeter=*deli_val_addr->ptr.str; + std::string& source=*str_val_addr->ptr.str; + size_t delimeter_len=delimeter.length(); + size_t source_len=source.length(); + // push it to local scope to avoid being sweeped + local_scope.push_back(gc.gc_alloc(vm_vec)); + + std::vector& vec=local_scope.back()->ptr.vec->elems; if(!delimeter_len) { for(int i=0;iptr.str=source[i]; - ref_vec.push_back(str_addr); + vec.push_back(gc.gc_alloc(vm_str)); + *vec.back()->ptr.str=source[i]; } - return ret_addr; + return local_scope.back(); } - + + std::string tmp=""; for(int i=0;i& local_scope,nasal_gc& gc) { if(tmp.length()) { - nasal_val* str_addr=gc.builtin_alloc(vm_str); - *str_addr->ptr.str=tmp; - ref_vec.push_back(str_addr); + vec.push_back(gc.gc_alloc(vm_str)); + *vec.back()->ptr.str=tmp; tmp=""; } i+=delimeter_len-1; @@ -310,12 +310,11 @@ nasal_val* builtin_split(std::vector& local_scope,nasal_gc& gc) } if(tmp.length()) { - nasal_val* str_addr=gc.builtin_alloc(vm_str); - *str_addr->ptr.str=tmp; - ref_vec.push_back(str_addr); + vec.push_back(gc.gc_alloc(vm_str)); + *vec.back()->ptr.str=tmp; tmp=""; } - return ret_addr; + return local_scope.back(); } nasal_val* builtin_rand(std::vector& local_scope,nasal_gc& gc) { @@ -330,11 +329,11 @@ nasal_val* builtin_rand(std::vector& local_scope,nasal_gc& gc) srand((unsigned int)val_addr->ptr.num); return gc.nil_addr; } - double num=0; - for(int i=0;i<5;++i) - num=(num+rand())*(1.0/(RAND_MAX+1.0)); + nasal_val* ret_addr=gc.gc_alloc(vm_num); - ret_addr->ptr.num=num; + ret_addr->ptr.num=0; + for(int i=0;i<5;++i) + ret_addr->ptr.num=(ret_addr->ptr.num+rand())*(1.0/(RAND_MAX+1.0)); return ret_addr; } nasal_val* builtin_id(std::vector& local_scope,nasal_gc& gc) @@ -342,7 +341,7 @@ nasal_val* builtin_id(std::vector& local_scope,nasal_gc& gc) nasal_val* val_addr=local_scope[1]; nasal_val* ret_addr=gc.gc_alloc(vm_str); char buf[32]; - sprintf(buf,"0x%p",val_addr); + sprintf(buf,"%p",val_addr); *ret_addr->ptr.str=buf; return ret_addr; } @@ -652,15 +651,17 @@ nasal_val* builtin_keys(std::vector& local_scope,nasal_gc& gc) builtin_err("keys","\"hash\" must be hash"); return nullptr; } - nasal_val* ret_addr=gc.builtin_alloc(vm_vec); - std::vector& ref_vec=ret_addr->ptr.vec->elems; + + // push vector into local scope to avoid being sweeped + local_scope.push_back(gc.gc_alloc(vm_vec)); + std::vector& vec=local_scope.back()->ptr.vec->elems; for(auto iter:hash_addr->ptr.hash->elems) { - nasal_val* str_addr=gc.builtin_alloc(vm_str); + nasal_val* str_addr=gc.gc_alloc(vm_str); *str_addr->ptr.str=iter.first; - ref_vec.push_back(str_addr); + vec.push_back(str_addr); } - return ret_addr; + return local_scope.back(); } nasal_val* builtin_import(std::vector& local_scope,nasal_gc& gc) { @@ -799,7 +800,7 @@ nasal_val* builtin_cmp(std::vector& local_scope,nasal_gc& gc) return nullptr; } nasal_val* ret_addr=gc.gc_alloc(vm_num); - ret_addr->ptr.num=strcmp(a_addr->ptr.str->data(),b_addr->ptr.str->data()); + ret_addr->ptr.num=strcmp(a_addr->ptr.str->c_str(),b_addr->ptr.str->c_str()); return ret_addr; } nasal_val* builtin_chr(std::vector& local_scope,nasal_gc& gc) diff --git a/nasal_codegen.h b/nasal_codegen.h index 5411349..13cc5fe 100644 --- a/nasal_codegen.h +++ b/nasal_codegen.h @@ -210,6 +210,7 @@ private: void die(std::string,int); void regist_number(double); void regist_string(std::string&); + void find_symbol(nasal_ast&); void add_sym(std::string&); int local_find(std::string&); int global_find(std::string&); @@ -278,6 +279,32 @@ void nasal_codegen::regist_string(std::string& str) return; } +void nasal_codegen::find_symbol(nasal_ast& node) +{ + // symbol definition checked here + // if find a function, return + if(node.get_type()==ast_func) + return; + // find definition, check + else if(node.get_type()==ast_def) + { + if(node.get_children()[0].get_type()==ast_multi_id) + for(auto& i:node.get_children()[0].get_children()) + add_sym(i.get_str()); + else + add_sym(node.get_children()[0].get_str()); + find_symbol(node.get_children()[1]); + } + // find iterator(foreach, forindex), check + else if(node.get_type()==ast_new_iter) + add_sym(node.get_children()[0].get_str()); + // check children + else + for(auto& i:node.get_children()) + find_symbol(i); + return; +} + void nasal_codegen::add_sym(std::string& name) { if(local.empty()) @@ -419,6 +446,9 @@ void nasal_codegen::func_gen(nasal_ast& ast) gen(op_jmp,0,0); nasal_ast& block=ast.get_children()[1]; + // search symbols first, must use after loading parameters + // or the location of symbols will change and cause fatal error + find_symbol(block); block_gen(block); for(auto& i:local) exec_code[local_label].num+=i.size(); @@ -602,7 +632,6 @@ void nasal_codegen::mcall_hash(nasal_ast& ast) void nasal_codegen::single_def(nasal_ast& ast) { std::string& str=ast.get_children()[0].get_str(); - add_sym(str); calc_gen(ast.get_children()[1]); local.empty()?gen(op_loadg,global_find(str),ast.get_line()):gen(op_loadl,local_find(str),ast.get_line()); return; @@ -618,7 +647,6 @@ void nasal_codegen::multi_def(nasal_ast& ast) { calc_gen(vals[i]); std::string& str=ids[i].get_str(); - add_sym(str); local.empty()?gen(op_loadg,global_find(str),ids[i].get_line()):gen(op_loadl,local_find(str),ids[i].get_line()); } } @@ -629,7 +657,6 @@ void nasal_codegen::multi_def(nasal_ast& ast) { gen(op_callvi,i,ast.get_children()[1].get_line()); std::string& str=ids[i].get_str(); - add_sym(str); local.empty()?gen(op_loadg,global_find(str),ids[i].get_line()):gen(op_loadl,local_find(str),ids[i].get_line()); } gen(op_pop,0,ast.get_line()); @@ -840,7 +867,6 @@ void nasal_codegen::forindex_gen(nasal_ast& ast) if(ast.get_children()[0].get_type()==ast_new_iter) { std::string& str=ast.get_children()[0].get_children()[0].get_str(); - add_sym(str); local.empty()? gen(op_loadg,global_find(str),ast.get_children()[0].get_children()[0].get_line()) :gen(op_loadl,local_find(str),ast.get_children()[0].get_children()[0].get_line()); @@ -869,7 +895,6 @@ void nasal_codegen::foreach_gen(nasal_ast& ast) if(ast.get_children()[0].get_type()==ast_new_iter) { std::string& str=ast.get_children()[0].get_children()[0].get_str(); - add_sym(str); local.empty()? gen(op_loadg,global_find(str),ast.get_children()[0].get_children()[0].get_line()) :gen(op_loadl,local_find(str),ast.get_children()[0].get_children()[0].get_line()); @@ -1148,6 +1173,9 @@ void nasal_codegen::main_progress(nasal_ast& ast) global.clear(); local.clear(); + + // search symbols first + find_symbol(ast); gen(op_intg,0,0); for(auto& tmp:ast.get_children()) { diff --git a/nasal_gc.h b/nasal_gc.h index 11a7617..d4d98b6 100644 --- a/nasal_gc.h +++ b/nasal_gc.h @@ -96,6 +96,11 @@ nasal_val** nasal_vec::get_mem(int index) } void nasal_vec::print() { + if(!elems.size()) + { + std::cout<<"[]"; + return; + } std::cout<<'['; for(auto i:elems) { @@ -108,20 +113,19 @@ void nasal_vec::print() case vm_hash: i->ptr.hash->print(); break; case vm_func: std::cout<<"func(...){...}"; break; } - std::cout<<','; + std::cout<<",]"[i==elems.back()]; } - std::cout<<']'; return; } /*functions of nasal_hash*/ nasal_val* nasal_hash::get_val(std::string& key) { - nasal_val* ret_addr=nullptr; if(elems.count(key)) return elems[key]; else if(elems.count("parents")) { + nasal_val* ret_addr=nullptr; nasal_val* val_addr=elems["parents"]; if(val_addr->type==vm_vec) for(auto i:val_addr->ptr.vec->elems) @@ -136,11 +140,11 @@ nasal_val* nasal_hash::get_val(std::string& key) } nasal_val** nasal_hash::get_mem(std::string& key) { - nasal_val** mem_addr=nullptr; if(elems.count(key)) return &elems[key]; else if(elems.count("parents")) { + nasal_val** mem_addr=nullptr; nasal_val* val_addr=elems["parents"]; if(val_addr->type==vm_vec) for(auto i:val_addr->ptr.vec->elems) @@ -155,6 +159,12 @@ nasal_val** nasal_hash::get_mem(std::string& key) } void nasal_hash::print() { + if(!elems.size()) + { + std::cout<<"{}"; + return; + } + size_t iter=0; std::cout<<'{'; for(auto& i:elems) { @@ -169,9 +179,8 @@ void nasal_hash::print() case vm_hash: tmp->ptr.hash->print(); break; case vm_func: std::cout<<"func(...){...}"; break; } - std::cout<<','; + std::cout<<",}"[(++iter)==elems.size()]; } - std::cout<<'}'; return; } diff --git a/nasal_lexer.h b/nasal_lexer.h index e97ee8c..6eab0ba 100644 --- a/nasal_lexer.h +++ b/nasal_lexer.h @@ -124,7 +124,7 @@ void nasal_lexer::openfile(std::string& filename) if(fin.fail()) { ++error; - std::cout<<">> [lexer] cannot open file \""<> [lexer] cannot open file <"<.\n"; fin.close(); return; } diff --git a/nasal_vm.h b/nasal_vm.h index 325ab1e..b8c0a85 100644 --- a/nasal_vm.h +++ b/nasal_vm.h @@ -18,10 +18,12 @@ private: /* values used for debug */ std::vector bytecode; // bytecode std::vector files; // files - + /* debug functions */ + void bytecodeinfo(uint32_t); void stackinfo(int); void die(std::string); void stackoverflow(); + /* vm calculation functions*/ bool condition(nasal_val*); void opr_intg(); void opr_intl(); @@ -132,6 +134,14 @@ void nasal_vm::clear() imm.clear(); return; } +void nasal_vm::bytecodeinfo(uint32_t p) +{ + printf("\t0x%.8x: %s 0x%.8x",p,code_table[bytecode[p].op].name,bytecode[p].num); + if(bytecode[p].op==op_callb) + printf(":%s",builtin_func[bytecode[p].num].name); + printf(" (%s line %d)\n",files[bytecode[p].fidx].c_str(),bytecode[p].line); + return; +} void nasal_vm::stackinfo(int limit) { printf("vm stack(limit %d):\n",limit); @@ -143,10 +153,10 @@ void nasal_vm::stackinfo(int limit) switch(stack_top[-i]->type) { case vm_nil: printf("\t%p nil\n",stack_top[-i]);break; - case vm_num: printf("\t%p num :%lf\n",stack_top[-i],stack_top[-i]->ptr.num);break; - case vm_str: printf("\t%p str :",stack_top[-i]->ptr.str);raw_string(*stack_top[-i]->ptr.str);putchar('\n');break; + case vm_num: printf("\t%p num:%lf\n",stack_top[-i],stack_top[-i]->ptr.num);break; + case vm_str: printf("\t%p str:",stack_top[-i]->ptr.str);raw_string(*stack_top[-i]->ptr.str);putchar('\n');break; case vm_func: printf("\t%p func\n",stack_top[-i]->ptr.func);break; - case vm_vec: printf("\t%p vec \n",stack_top[-i]->ptr.vec); break; + case vm_vec: printf("\t%p vec\n",stack_top[-i]->ptr.vec);break; case vm_hash: printf("\t%p hash\n",stack_top[-i]->ptr.hash);break; default: printf("\t%p unknown\n",stack_top[-i]);break; } @@ -156,20 +166,12 @@ void nasal_vm::stackinfo(int limit) void nasal_vm::die(std::string str) { printf(">> [vm] error at 0x%.8x: %s\ntrace back:\n",pc,str.c_str()); - // add error pc into ret_stack - ret.push(pc); // trace back will use ret_stack + bytecodeinfo(pc); while(!ret.empty()) { - uint32_t point=ret.top(); + bytecodeinfo(ret.top()); ret.pop(); - printf( - "\t0x%.8x: %s 0x%.8x (%s line %d)\n", - point, - code_table[bytecode[point].op].name, - bytecode[point].num, - files[bytecode[point].fidx].c_str(), - bytecode[point].line); } stackinfo(10); gc.val_stack[STACK_MAX_DEPTH-1]=(nasal_val*)0xffff; @@ -177,17 +179,21 @@ void nasal_vm::die(std::string str) } void nasal_vm::stackoverflow() { - printf(">> [vm] stack overflow\nlast called(limit 10):\n"); - for(int i=0;i<10 && !ret.empty();++i,ret.pop()) + printf(">> [vm] stack overflow\ntrace back:\n"); + for(uint32_t same_cnt=0,point=0,last_point=0;!ret.empty();last_point=point,ret.pop()) { - uint32_t point=ret.top(); - printf( - "\t0x%.8x: %s 0x%.8x (%s line %d)\n", - point, - code_table[bytecode[point].op].name, - bytecode[point].num, - files[bytecode[point].fidx].c_str(), - bytecode[point].line); + point=ret.top(); + if(point!=last_point) + { + if(same_cnt) + { + printf("\t0x%.8x: %d same call(s) ...\n",last_point,same_cnt); + same_cnt=0; + } + bytecodeinfo(point); + } + else + ++same_cnt; } stackinfo(10); return; @@ -709,7 +715,7 @@ inline void nasal_vm::opr_callh() inline void nasal_vm::opr_callfv() { // get parameter list and function value - int args_size=imm[pc]; + uint32_t args_size=imm[pc]; nasal_val** vec=stack_top-args_size+1; nasal_val* func_addr=vec[-1]; if(func_addr->type!=vm_func) @@ -724,8 +730,8 @@ inline void nasal_vm::opr_callfv() auto& ref_default=ref_func.default_para; auto& ref_closure=gc.local.back(); - int offset=ref_func.offset; - int para_size=ref_func.key_table.size(); + uint32_t offset=ref_func.offset; + uint32_t para_size=ref_func.key_table.size(); // load arguments if(args_sizepara_size,for 0 to args_size will cause corruption - int min_size=std::min(para_size,args_size); + uint32_t min_size=std::min(para_size,args_size); for(int i=0;i& exec) { - uint64_t count[op_ret+1]={0}; + //uint64_t count[op_ret+1]={0}; void* opr_table[]= { &&nop, &&intg, &&intl, &&offset, @@ -970,15 +976,15 @@ void nasal_vm::run(std::vector& exec) imm.push_back(i.num); } + // set canary and program counter auto& canary=gc.val_stack[STACK_MAX_DEPTH-1]; - // clock_t begin=clock(); pc=0; + // run goto *code[pc]; nop: if(canary && canary!=(nasal_val*)0xffff) stackoverflow(); - // std::cout<<">> [vm] process exited after "<<((double)(clock()-begin))/CLOCKS_PER_SEC<<"s.\n"; // debug // for(int i=0;i<15;++i) // {