diff --git a/README.md b/README.md index b5acb40..3c724d9 100644 --- a/README.md +++ b/README.md @@ -46,16 +46,18 @@ this interpreter to a useful tool in your own projects(such as a script in your ## How to Compile Better choose the latest update of the interpreter. +Download the source code and build it! +It's quite easy to build this interpreter. MUST USE -O2/-O3 if want to optimize the interpreter! Also remember to use g++ or clang++. -> [cpp compiler] -std=c++11 -O2 main.cpp -o nasal.exe +> [cpp compiler] -std=c++11 -O3 main.cpp -o nasal.exe -fno-exceptions Or use this in linux/macOS/Unix -> [cpp compiler] -std=c++11 -O2 main.cpp -o nasal +> [cpp compiler] -std=c++11 -O3 main.cpp -o nasal -fno-exceptions ## How to Use? @@ -150,11 +152,13 @@ I decide to save the ast interpreter after releasing v4.0. Because it took me a ### Version 5.0(last update 2021/3/7) -I change my mind.AST interpreter leaves me too much things to do. +I change my mind. +AST interpreter leaves me too much things to do. -If i continue saving this interpreter,it will be harder for me to make the bytecode vm become more efficient. +If i continue saving this interpreter, +it will be harder for me to make the bytecode vm become more efficient. -## Byte Code VM +## Byte Code Virtual Machine ### Version 4.0 (last update 2020/12/17) @@ -170,7 +174,7 @@ There's an example of byte code below: for(var i=0;i<4000000;i+=1); ``` -```asm +```MIPS .number 0 .number 4e+006 .number 1 @@ -221,7 +225,7 @@ So the bytecode generator changed a lot. for(var i=0;i<4000000;i+=1); ``` -```asm +```MIPS .number 4e+006 0x00000000: intg 0x00000001 0x00000001: pzero 0x00000000 @@ -269,7 +273,7 @@ var f=func(x,y){return x+y;} f(1024,2048); ``` -```asm +```MIPS .number 1024 .number 2048 .symbol x @@ -323,7 +327,7 @@ codegen will generate byte code by nasal_codegen::call_gen() instead of nasal_co and the last child of the ast will be generated by nasal_codegen::mcall_gen(). So the bytecode is totally different now: -```asm +```MIPS .number 10 .number 2 .symbol _ @@ -394,7 +398,7 @@ is deleted from nasal_vm, and now nasal_vm use nasal_val** mem_addr to store the memory address. This will not cause fatal errors because the memory address is used __immediately__ after getting it. -### version 7.0 (2021/10/8) +### version 7.0 (last update 2021/10/8) 2021/6/26 update: @@ -421,7 +425,7 @@ op_addc,op_subc,op_mulc,op_divc,op_lnkc,op_addeqc,op_subeqc,op_muleqc,op_diveqc, Now the bytecode of test/bigloop.nas seems like this: -```asm +```MIPS .number 4e+006 .number 1 0x00000000: intg 0x00000001 @@ -450,7 +454,7 @@ var (a,b)=(1,2); a=b=0; ``` -```asm +```MIPS .number 2 0x00000000: intg 0x00000002 0x00000001: pone 0x00000000 @@ -475,9 +479,52 @@ New value type is added: vm_obj. This type is reserved for user to define their own value types. Related API will be added in the future. -## Test data +Fully functional closure: +Add new operands that get and set upvalues. +Delete an old operand 'op_offset'. -### version 6.5(i5-8250U windows10 2021/6/19) +2021/10/13 update: + +The format of output information of bytecodes changes to this: + +```MIPS +0x0000017c: jmp 0x181 +0x0000017d: calll 0x1 +0x0000017e: calll 0x1 +0x0000017f: callfv 0x1 +0x00000180: ret +0x00000181: newf 0x185 +0x00000182: intl 0x2 +0x00000183: para 0x29 ("f") +0x00000184: jmp 0x19d +0x00000185: newf 0x189 +0x00000186: intl 0x2 +0x00000187: para 0x1d ("x") +0x00000188: jmp 0x19c +0x00000189: calll 0x1 +0x0000018a: lessc 0x12 (2.000000) +0x0000018b: jf 0x18e +0x0000018c: calll 0x1 +0x0000018d: ret +0x0000018e: upval 0x0[0x1] +0x0000018f: upval 0x0[0x1] +0x00000190: callfv 0x1 +0x00000191: calll 0x1 +0x00000192: subc 0x13 (1.000000) +0x00000193: callfv 0x1 +0x00000194: upval 0x0[0x1] +0x00000195: upval 0x0[0x1] +0x00000196: callfv 0x1 +0x00000197: calll 0x1 +0x00000198: subc 0x12 (2.000000) +0x00000199: callfv 0x1 +0x0000019a: add +0x0000019b: ret +``` + +## Benchmark + +### version 6.5 (i5-8250U windows10 2021/6/19) running time and gc time: @@ -524,7 +571,7 @@ operands calling total times: |quick_sort.nas|16226|5561|4144|3524|2833| |bfs.nas|24707|16297|14606|14269|8672| -### version 7.0(i5-8250U ubuntu-WSL on windows10 2021/6/29) +### version 7.0 (i5-8250U ubuntu-WSL on windows10 2021/6/29) running time: @@ -541,7 +588,7 @@ running time: |quick_sort.nas|0s|great improvement| |bfs.nas|0.0156s|great improvement| -## How to Use Nasal to Program +## Use Nasal to Program ### basic value type @@ -587,27 +634,18 @@ var d={ member2:'str', 'member3':'member\'s name can also be a string constant', "member4":"also this", - function:func() - { + function:func(){ var a=me.member2~me.member3; return a; } }; -var f=func(x,y,z) -{ - return nil; -} -var f=func -{ - return 1024; -} -var f=func(x,y,z,default_para1=1,default_para2=2) -{ +var f=func(x,y,z){return nil;} +var f=func{return 1024;} +var f=func(x,y,z,default_para1=1,default_para2=2){ return x+y+z+default_para1+default_para2; } -var f=func(x,y,z,dynamic_para...) -{ +var f=func(x,y,z,dynamic_para...){ var sum=0; foreach(var i;dynamic_para) sum+=i; @@ -713,20 +751,33 @@ func(x,y){return x+y}(0,1); func(x){return 1/(1+math.exp(-x));}(0.5); ``` +There's an interesting test file 'y-combinator.nas', +try it for fun: +```javascript +var fib=func(f){ + return f(f); +}( + func(f){ + return func(x){ + if(x<2) return x; + return f(f)(x-1)+f(f)(x-2); + } + } +); +``` + ### closure Use closure to OOP. ```javascript -var f=func() -{ +var f=func(){ var a=1; return func(){return a;}; } print(f()()); -var student=func(name,age) -{ +var student=func(name,age){ var val={ name:name, age:age @@ -741,7 +792,7 @@ var student=func(name,age) } ``` -### built-in functions +### native functions Must import lib.nas or has these functions' definitions inside your code. @@ -779,7 +830,7 @@ nasal_ref builtin_print(std::vector& local,nasal_gc& gc) case vm_vec: i.vec()->print(); break; case vm_hash: i.hash()->print(); break; case vm_func: std::cout<<"func(...){...}"; break; - case vm_obj: std::cout<<""; break; + case vm_obj: std::cout<<""; break; } std::cout<&); - void print_op(int); + void print_op(uint32_t); void print_byte_code(); std::vector& get_str_table(){return str_res_table;} std::vector& get_num_table(){return num_res_table;} @@ -479,6 +481,8 @@ void nasal_codegen::func_gen(const nasal_ast& ast) find_symbol(block); block_gen(block); exec_code[local_label].num=local.back().size(); + if(local.back().size()>65536) + die("too many local variants: "+std::to_string(local.back().size())+".",block.get_line()); local.pop_back(); if(!block.get_children().size() || block.get_children().back().get_type()!=ast_ret) @@ -1288,35 +1292,43 @@ void nasal_codegen::main_progress(const nasal_ast& ast,const std::vector=STACK_MAX_DEPTH) die("too many global variants: "+std::to_string(global.size())+".",0); return; } -void nasal_codegen::print_op(int index) +void nasal_codegen::print_op(uint32_t index) { // print opcode index,opcode name,opcode immediate number - printf("0x%.8x: %s 0x%.8x",index,code_table[exec_code[index].op].name,exec_code[index].num); + const opcode& code=exec_code[index]; + printf("0x%.8x: %s ",index,code_table[code.op].name); // print detail info - switch(exec_code[index].op) + switch(code.op) { case op_addc:case op_subc:case op_mulc:case op_divc: case op_addeqc:case op_subeqc:case op_muleqc:case op_diveqc: case op_lessc:case op_leqc:case op_grtc:case op_geqc: - case op_pnum:printf(" (%lf)\n",num_res_table[exec_code[index].num]);break; - case op_callb:printf(" (%s)\n",builtin_func[exec_code[index].num].name);break; - case op_happ: - case op_pstr: + case op_pnum: + printf("0x%x (%lf)\n",code.num,num_res_table[code.num]);break; + case op_callvi:case op_newv:case op_callfv: + case op_intg:case op_intl: + case op_newf:case op_jmp:case op_jt:case op_jf: + printf("0x%x\n",code.num);break; + case op_callb: + printf("0x%x <%s>\n",code.num,builtin_func[code.num].name);break; + case op_callg:case op_mcallg:case op_loadg: + case op_calll:case op_mcalll:case op_loadl: + printf("0x%x\n",code.num);break; + case op_upval:case op_mupval:case op_loadu: + printf("0x%x[0x%x]\n",(code.num>>16)&0xffff,code.num&0xffff);break; + case op_happ:case op_pstr: case op_lnkc:case op_lnkeqc: - case op_callh: - case op_mcallh: - case op_para: - case op_defpara: - case op_dynpara: - printf(" ("); - raw_string(str_res_table[exec_code[index].num]); - printf(")\n"); + case op_callh:case op_mcallh: + case op_para:case op_defpara:case op_dynpara: + printf("0x%x (\"",code.num); + raw_string(str_res_table[code.num]); + printf("\")\n"); break; default:printf("\n");break; } @@ -1325,19 +1337,15 @@ void nasal_codegen::print_op(int index) void nasal_codegen::print_byte_code() { - if(num_res_table.size()) - std::cout<<".number"< bytecode; // bytecode std::vector files; // files /* debug functions */ - void bytecodeinfo(uint32_t); + void bytecodeinfo(const uint32_t); void traceback(); - void stackinfo(int); + void stackinfo(const uint32_t); void die(std::string); void stackoverflow(); /* vm calculation functions*/ bool condition(nasal_ref); + void opr_nop(); void opr_intg(); void opr_intl(); void opr_loadg(); @@ -141,12 +142,13 @@ void nasal_vm::clear() imm.clear(); return; } -void nasal_vm::bytecodeinfo(uint32_t p) +void nasal_vm::bytecodeinfo(const uint32_t p) { - printf("\t0x%.8x: %s 0x%.8x",p,code_table[bytecode[p].op].name,bytecode[p].num); - if(bytecode[p].op==op_callb) - printf(":%s",builtin_func[bytecode[p].num].name); - printf(" (%s line %d)\n",files[bytecode[p].fidx].c_str(),bytecode[p].line); + const opcode& code=bytecode[p]; + printf("\t0x%.8x: %s 0x%x",p,code_table[code.op].name,code.num); + if(code.op==op_callb) + printf(" <%s>",builtin_func[code.num].name); + printf(" (%s line %d)\n",files[code.fidx].c_str(),code.line); return; } void nasal_vm::traceback() @@ -172,41 +174,42 @@ void nasal_vm::traceback() printf("\t0x%.8x: %d same call(s) ...\n",last_point,same_cnt); return; } -void nasal_vm::stackinfo(int limit) +void nasal_vm::stackinfo(const uint32_t limit) { printf("vm stack(limit %d):\n",limit); uint32_t same_cnt=0; nasal_ref last_ptr={vm_none,0xffffffff}; - for(int i=0;i=gc.val_stack;++i) + for(uint32_t i=0;i=gc.val_stack;++i,--stack_top) { - if(stack_top[-i]==last_ptr) + if(stack_top[0]==last_ptr) { ++same_cnt; continue; } if(same_cnt) { - printf("\t%p ... | %d same value(s)\n",last_ptr.value.gcobj,same_cnt); + printf("\t... | %d same value(s)\n",same_cnt); same_cnt=0; } - last_ptr=stack_top[-i]; - printf("\t%p ",stack_top[-i].value.gcobj); - switch(stack_top[-i].type) + last_ptr=stack_top[0]; + const nasal_val* ptr=stack_top[0].value.gcobj; + putchar('\t'); + switch(stack_top[0].type) { - case vm_none: printf("undefined");break; - case vm_nil: printf("nil | gc.nil");break; - case vm_num: printf("num | %lf",stack_top[-i].value.num);break; - case vm_str: printf("str | ");raw_string(*stack_top[-i].value.gcobj->ptr.str);break; - case vm_func: printf("func | func(%lu para){..}",stack_top[-i].value.gcobj->ptr.func->key_table.size());break; - case vm_vec: printf("vec | [%lu val]",stack_top[-i].value.gcobj->ptr.vec->elems.size());break; - case vm_hash: printf("hash | {%lu member}",stack_top[-i].value.gcobj->ptr.hash->elems.size());break; - case vm_obj: printf("user data");break; + case vm_none: printf("null |");break; + case vm_nil: printf("nil |");break; + case vm_num: printf("num | %lf",stack_top[0].num());break; + case vm_str: printf("str | <%p> ",ptr);raw_string(*stack_top[0].str());break; + case vm_func: printf("func | <%p> func{entry=0x%x}",ptr,stack_top[0].func()->entry);break; + case vm_vec: printf("vec | <%p> [%lu val]",ptr,stack_top[0].vec()->elems.size());break; + case vm_hash: printf("hash | <%p> {%lu member}",ptr,stack_top[0].hash()->elems.size());break; + case vm_obj: printf("obj | <%p>",ptr);break; default: printf("unknown");break; } putchar('\n'); } if(same_cnt) - printf("\t%p ... | %d same value(s)\n",last_ptr.value.gcobj,same_cnt); + printf("\t... | %d same value(s)\n",same_cnt); return; } void nasal_vm::die(std::string str) @@ -231,7 +234,7 @@ inline bool nasal_vm::condition(nasal_ref val) return val.value.num; else if(val.type==vm_str) { - std::string& str=*val.str(); + const std::string& str=*val.str(); double num=str2num(str.c_str()); if(std::isnan(num)) return str.empty(); @@ -239,6 +242,7 @@ inline bool nasal_vm::condition(nasal_ref val) } return false; } +inline void nasal_vm::opr_nop(){} inline void nasal_vm::opr_intg() { // global values store on stack @@ -264,7 +268,7 @@ inline void nasal_vm::opr_loadl() } inline void nasal_vm::opr_loadu() { - func_stk.top()->upvalue[(imm[pc]&0xffff0000)>>16].vec()->elems[imm[pc]&0xffff]=(stack_top--)[0]; + func_stk.top()->upvalue[(imm[pc]>>16)&0xffff].vec()->elems[imm[pc]&0xffff]=(stack_top--)[0]; return; } inline void nasal_vm::opr_pnum() @@ -488,7 +492,8 @@ inline void nasal_vm::opr_leq(){op_cmp(<=);} inline void nasal_vm::opr_grt(){op_cmp(>);} inline void nasal_vm::opr_geq(){op_cmp(>=);} -#define op_cmp_const(type) stack_top[0]=(stack_top[0].to_number() type num_table[imm[pc]])?gc.one:gc.zero; +#define op_cmp_const(type)\ + stack_top[0]=(stack_top[0].to_number() type num_table[imm[pc]])?gc.one:gc.zero; inline void nasal_vm::opr_lessc(){op_cmp_const(<);} inline void nasal_vm::opr_leqc(){op_cmp_const(<=);} @@ -563,7 +568,7 @@ inline void nasal_vm::opr_calll() } inline void nasal_vm::opr_upval() { - (++stack_top)[0]=func_stk.top()->upvalue[(imm[pc]&0xffff0000)>>16].vec()->elems[imm[pc]&0xffff]; + (++stack_top)[0]=func_stk.top()->upvalue[(imm[pc]>>16)&0xffff].vec()->elems[imm[pc]&0xffff]; return; } inline void nasal_vm::opr_callv() @@ -775,7 +780,7 @@ inline void nasal_vm::opr_mcalll() } inline void nasal_vm::opr_mupval() { - mem_addr=&func_stk.top()->upvalue[(imm[pc]&0xffff0000)>>16].vec()->elems[imm[pc]&0xffff]; + mem_addr=&func_stk.top()->upvalue[(imm[pc]>>16)&0xffff].vec()->elems[imm[pc]&0xffff]; (++stack_top)[0]=mem_addr[0]; return; } @@ -836,7 +841,7 @@ inline void nasal_vm::opr_ret() } void nasal_vm::run(const std::vector& exec,const bool op_cnt) { - uint64_t count[op_ret+1]={0}; + uint64_t count[op_exit+1]={0}; const void* opr_table[]= { &&nop, &&intg, &&intl, &&loadg, @@ -858,7 +863,7 @@ void nasal_vm::run(const std::vector& exec,const bool op_cnt) &&callh, &&callfv, &&callfh, &&callb, &&slcbegin,&&slcend, &&slc, &&slc2, &&mcallg, &&mcalll, &&mupval, &&mcallv, - &&mcallh, &&ret + &&mcallh, &&ret, &&vmexit }; bytecode=exec; @@ -875,7 +880,7 @@ void nasal_vm::run(const std::vector& exec,const bool op_cnt) // run goto *code[pc]; -nop: +vmexit: if(canary.value.gcobj) stackoverflow(); if(op_cnt) @@ -896,10 +901,11 @@ nop: } return; // may cause stackoverflow -#define exec_operand(op,num) {op();++count[num];if(!canary.value.gcobj)goto *code[++pc];goto nop;} +#define exec_operand(op,num) {op();++count[num];if(!canary.value.gcobj)goto *code[++pc];goto vmexit;} // do not cause stackoverflow #define exec_opnodie(op,num) {op();++count[num];goto *code[++pc];} +nop: exec_opnodie(opr_nop ,op_nop ); // do nothing intg: exec_opnodie(opr_intg ,op_intg ); // stack+=imm[pc] (detected at codegen) intl: exec_opnodie(opr_intl ,op_intl ); // stack-=0 loadg: exec_opnodie(opr_loadg ,op_loadg ); // stack-=1 diff --git a/stl/result.nas b/stl/result.nas index e7b4159..c172beb 100644 --- a/stl/result.nas +++ b/stl/result.nas @@ -13,7 +13,7 @@ var ResultTrait={ }, unwrap:func(){ if(me.flag) - die("error: "~me.err); + die(me.err); return me.ok; } }; diff --git a/test/exception.nas b/test/exception.nas index 5e0ecc6..a7ea841 100644 --- a/test/exception.nas +++ b/test/exception.nas @@ -13,7 +13,7 @@ var ResultTrait={ }, unwrap:func(){ if(me.flag) - die("error: "~me.err); + die(me.err); return me.ok; } };