From 0b2fe61e6e6e7314ea44af06d1fb84790324c581 Mon Sep 17 00:00:00 2001 From: ValKmjolnir Date: Tue, 29 Jun 2021 17:18:05 +0800 Subject: [PATCH] add instruction & changes in codegen add some instructions that execute const values. the first symbol called in assignment will use op_load instead of op_meq,op_pop to assign. --- README.md | 80 ++++++++++++--- nasal_codegen.h | 145 ++++++++++++++++++++++++--- nasal_parse.h | 2 + nasal_vm.h | 254 ++++++++++++++++++++++++++++++++++-------------- 4 files changed, 375 insertions(+), 106 deletions(-) diff --git a/README.md b/README.md index 6ab0840..056c7ad 100644 --- a/README.md +++ b/README.md @@ -32,13 +32,15 @@ You could add your own built-in functions to change this interpreter to a useful Better choose the latest update of the interpreter. -MUST USE -O2/-O3 if want to optimize the interpreter! pragma gcc optimize(2) seems useless when using g++ +MUST USE -O2/-O3 if want to optimize the interpreter! -> g++ -std=c++11 -O2 main.cpp -o nasal.exe +Also remember to use g++ and clang++. + +> g++|clang++ -std=c++11 -O2 main.cpp -o nasal.exe Or use this in linux/macOS/Unix -> g++ -std=c++11 -O2 main.cpp -o nasal +> g++|clang++ -std=c++11 -O2 main.cpp -o nasal ## How to Use? @@ -117,7 +119,7 @@ If i continue saving this interpreter,it will be harder for me to make the bytec ## Byte Code Interpreter -### Version 4.0(last update 2020/12/17) +### Version 4.0 (last update 2020/12/17) I have just finished the first version of byte-code-interpreter. @@ -150,7 +152,7 @@ for(var i=0;i<4000000;i+=1); 0x0000000b: nop 0x00000000 ``` -### Version 5.0(last update 2021/3/7) +### Version 5.0 (last update 2021/3/7) I decide to optimize bytecode vm in this version. @@ -158,7 +160,7 @@ Because it takes more than 1.5s to count i from 0 to 4000000-1.This is not effic 2021/1/23 update: Now it can count from 0 to 4000000-1 in 1.5s. -### Version 6.0(last update 2021/6/1) +### Version 6.0 (last update 2021/6/1) Use loadg loadl callg calll mcallg mcalll to avoid branches. @@ -199,7 +201,7 @@ for(var i=0;i<4000000;i+=1); 0x0000000c: nop 0x00000000 ``` -### Version 6.5(last update 2021/6/24) +### Version 6.5 (last update 2021/6/24) 2021/5/31 update: Now gc can collect garbage correctly without re-collecting,which will cause fatal error. @@ -329,16 +331,62 @@ As you could see from the bytecode above,mcall/mcallv/mcallh operands' using fre And because of the new structure of mcall, addr_stack, a stack used to store the memory address, is deleted from nasal_vm, and now nasal_vm use nasal_val** mem_addr to store the memory address. This will not cause fatal errors because the memory address is used __immediately__ after getting it. -### version 7.0(latest) +### version 7.0 (latest) 2021/6/26 update: Instruction dispatch is changed from call-threading to computed-goto(with inline function).After changing the way of instruction dispatch,there is a great improvement in nasal_vm.Now vm can run test/bigloop and test/pi in 0.2s!And vm runs test/fib in 0.8s on linux.You could see the time use data below,in Test data section. -This version uses gcc extension "labels as values", which is also supported by clang.(But i don't know if MSVC supports this) +This version uses g++ extension "labels as values", which is also supported by clang++.(But i don't know if MSVC supports this) There is also a change in nasal_gc: std::vector global is deleted,now the global values are all stored on stack(from val_stack+0 to val_stack+intg-1). +2021/6/29 update: + +Add some instructions that execute const values:op_addc,op_subc,op_mulc,op_divc,op_lnkc,op_addeqc,op_subeqc,op_muleqc,op_diveqc,op_lnkeqc. + +Now the bytecode of test/bigloop.nas seems like this: + +```asm +.number 4e+006 +.number 1 +0x00000000: intg 0x00000001 +0x00000001: pzero 0x00000000 +0x00000002: loadg 0x00000000 +0x00000003: callg 0x00000000 +0x00000004: pnum 0x00000000 (4000000) +0x00000005: less 0x00000000 +0x00000006: jf 0x0000000b +0x00000007: mcallg 0x00000000 +0x00000008: addeqc 0x00000001 (1) +0x00000009: pop 0x00000000 +0x0000000a: jmp 0x00000003 +0x0000000b: nop 0x00000000 +``` + +And this test file runs in 0.1s after this update.Most of the calculations are accelerated. + +Also, assignment bytecode has changed a lot. Now the first identifier that called in assignment will use op_load to assign, instead of op_meq,op_pop. + +```javascript +var (a,b)=(1,2); +a=b=0; +``` + +```asm +.number 2 +0x00000000: intg 0x00000002 +0x00000001: pone 0x00000000 +0x00000002: loadg 0x00000000 +0x00000003: pnum 0x00000000 (2) +0x00000004: loadg 0x00000001 +0x00000005: pzero 0x00000000 +0x00000006: mcallg 0x00000001 +0x00000007: meq 0x00000000 (b=2 use meq,pop->a) +0x00000008: loadg 0x00000000 (a=b use loadg) +0x00000009: nop 0x00000000 +``` + ## Test data ### version 6.5(i5-8250U windows10 2021/6/19) @@ -388,20 +436,20 @@ operands calling total times: |quick_sort.nas|16226|5561|4144|3524|2833| |bfs.nas|24707|16297|14606|14269|8672| -### version 7.0(i5-8250U ubuntu-WSL on windows10 2021/6/26) +### version 7.0(i5-8250U ubuntu-WSL on windows10 2021/6/29) running time: |file|total time|info| |:----|:----|:----| -|pi.nas|0.17s|great improvement| +|pi.nas|0.15625s|great improvement| |fib.nas|0.75s|great improvement| -|bp.nas|0.32s(5467 epoch)|good improvement| -|bigloop.nas|0.11s|great improvement| -|mandelbrot.nas|0.04s|great improvment| -|life.nas|8.80s(windows) 1.34(ubuntu WSL)|little improvement| +|bp.nas|0.4218s(7162 epoch)|good improvement| +|bigloop.nas|0.09375s|great improvement| +|mandelbrot.nas|0.0312s|great improvement| +|life.nas|8.80s(windows) 1.25(ubuntu WSL)|little improvement| |ascii-art.nas|0.015s|little improvement| -|calc.nas|0.0625s|little improvement| +|calc.nas|0.0468s|little improvement| |quick_sort.nas|0s|great improvement| |bfs.nas|0.0156s|great improvement| diff --git a/nasal_codegen.h b/nasal_codegen.h index 4d82725..8337aa8 100644 --- a/nasal_codegen.h +++ b/nasal_codegen.h @@ -28,11 +28,21 @@ enum op_code op_mul, // * op_div, // / op_lnk, // ~ + op_addc, // + const + op_subc, // - const + op_mulc, // * const + op_divc, // / const + op_lnkc, // ~ const op_addeq, // += op_subeq, // -= op_muleq, // *= op_diveq, // /= op_lnkeq, // ~= + op_addeqc, // += const + op_subeqc, // -= const + op_muleqc, // *= const + op_diveqc, // /= const + op_lnkeqc, // ~= const op_meq, // = op_eq, // == op_neq, // != @@ -98,11 +108,21 @@ struct {op_mul, "mult "}, {op_div, "div "}, {op_lnk, "link "}, + {op_addc, "addc "}, + {op_subc, "subc "}, + {op_mulc, "multc "}, + {op_divc, "divc "}, + {op_lnkc, "lnkc "}, {op_addeq, "addeq "}, {op_subeq, "subeq "}, {op_muleq, "muleq "}, {op_diveq, "diveq "}, {op_lnkeq, "lnkeq "}, + {op_addeqc, "addeqc"}, + {op_subeqc, "subeqc"}, + {op_muleqc, "muleqc"}, + {op_diveqc, "diveqc"}, + {op_lnkeqc, "lnkeqc"}, {op_meq, "meq "}, {op_eq, "eq "}, {op_neq, "neq "}, @@ -141,8 +161,8 @@ struct struct opcode { uint8_t op; - uint32_t num; - opcode(uint8_t _op=op_nop,uint32_t _num=0) + int32_t num; + opcode(uint8_t _op=op_nop,int32_t _num=0) { op=_op; num=_num; @@ -178,7 +198,7 @@ private: void add_sym(std::string&); int local_find(std::string&); int global_find(std::string&); - void gen(unsigned char,unsigned int); + void gen(uint8_t,int32_t); void num_gen(nasal_ast&); void str_gen(nasal_ast&); void vec_gen(nasal_ast&); @@ -283,7 +303,7 @@ int nasal_codegen::global_find(std::string& name) return -1; } -void nasal_codegen::gen(uint8_t op,uint32_t num) +void nasal_codegen::gen(uint8_t op,int32_t num) { exec_code.push_back({op,num}); return; @@ -454,6 +474,7 @@ void nasal_codegen::call_hash(nasal_ast& ast) void nasal_codegen::call_vec(nasal_ast& ast) { + // maybe this place can use callv-const if ast's first child is ast_num if(ast.get_children().size()==1 && ast.get_children()[0].get_type()!=ast_subvec) { calc_gen(ast.get_children()[0]); @@ -620,8 +641,17 @@ void nasal_codegen::multi_assign_gen(nasal_ast& ast) for(int i=0;i ret; // ptr stack stores address for function to return std::stack counter; // iterator stack for forindex/foreach std::vector str_table;// symbols used in process - std::vector imm; // immediate number + std::vector imm; // immediate number nasal_val** mem_addr; // used for mem_call nasal_gc gc; // garbage collector @@ -41,11 +41,21 @@ private: void opr_mul(); void opr_div(); void opr_lnk(); + void opr_addc(); + void opr_subc(); + void opr_mulc(); + void opr_divc(); + void opr_lnkc(); void opr_addeq(); void opr_subeq(); void opr_muleq(); void opr_diveq(); void opr_lnkeq(); + void opr_addeqc(); + void opr_subeqc(); + void opr_muleqc(); + void opr_diveqc(); + void opr_lnkeqc(); void opr_meq(); void opr_eq(); void opr_neq(); @@ -298,6 +308,41 @@ inline void nasal_vm::opr_lnk() (--stack_top)[0]=new_val; return; } +inline void nasal_vm::opr_addc() +{ + nasal_val* new_val=gc.gc_alloc(vm_num); + new_val->ptr.num=stack_top[0]->to_number()+gc.num_addrs[imm[pc]]->ptr.num; + stack_top[0]=new_val; + return; +} +inline void nasal_vm::opr_subc() +{ + nasal_val* new_val=gc.gc_alloc(vm_num); + new_val->ptr.num=stack_top[0]->to_number()-gc.num_addrs[imm[pc]]->ptr.num; + stack_top[0]=new_val; + return; +} +inline void nasal_vm::opr_mulc() +{ + nasal_val* new_val=gc.gc_alloc(vm_num); + new_val->ptr.num=stack_top[0]->to_number()*gc.num_addrs[imm[pc]]->ptr.num; + stack_top[0]=new_val; + return; +} +inline void nasal_vm::opr_divc() +{ + nasal_val* new_val=gc.gc_alloc(vm_num); + new_val->ptr.num=stack_top[0]->to_number()/gc.num_addrs[imm[pc]]->ptr.num; + stack_top[0]=new_val; + return; +} +inline void nasal_vm::opr_lnkc() +{ + nasal_val* new_val=gc.gc_alloc(vm_str); + *new_val->ptr.str=stack_top[0]->to_string()+str_table[imm[pc]]; + stack_top[0]=new_val; + return; +} inline void nasal_vm::opr_addeq() { nasal_val* new_val=gc.gc_alloc(vm_num); @@ -333,6 +378,41 @@ inline void nasal_vm::opr_lnkeq() (--stack_top)[0]=mem_addr[0]=new_val; return; } +inline void nasal_vm::opr_addeqc() +{ + nasal_val* new_val=gc.gc_alloc(vm_num); + new_val->ptr.num=mem_addr[0]->to_number()+gc.num_addrs[imm[pc]]->ptr.num; + stack_top[0]=mem_addr[0]=new_val; + return; +} +inline void nasal_vm::opr_subeqc() +{ + nasal_val* new_val=gc.gc_alloc(vm_num); + new_val->ptr.num=mem_addr[0]->to_number()-gc.num_addrs[imm[pc]]->ptr.num; + stack_top[0]=mem_addr[0]=new_val; + return; +} +inline void nasal_vm::opr_muleqc() +{ + nasal_val* new_val=gc.gc_alloc(vm_num); + new_val->ptr.num=mem_addr[0]->to_number()*gc.num_addrs[imm[pc]]->ptr.num; + stack_top[0]=mem_addr[0]=new_val; + return; +} +inline void nasal_vm::opr_diveqc() +{ + nasal_val* new_val=gc.gc_alloc(vm_num); + new_val->ptr.num=mem_addr[0]->to_number()/gc.num_addrs[imm[pc]]->ptr.num; + stack_top[0]=mem_addr[0]=new_val; + return; +} +inline void nasal_vm::opr_lnkeqc() +{ + nasal_val* new_val=gc.gc_alloc(vm_str); + *new_val->ptr.str=mem_addr[0]->to_string()+str_table[imm[pc]]; + stack_top[0]=mem_addr[0]=new_val; + return; +} inline void nasal_vm::opr_meq() { mem_addr[0]=(--stack_top)[0]; @@ -510,7 +590,7 @@ inline void nasal_vm::opr_callvi() nasal_val* val=stack_top[0]; if(val->type!=vm_vec) { - die("callvi: multi-definition/multi-assignment must use a vector"); + die("callvi: must use a vector"); return; } // cannot use operator[],because this may cause overflow @@ -761,6 +841,7 @@ inline void nasal_vm::opr_ret() } void nasal_vm::run(std::vector& exec) { + int count[72]={0}; void* opr_table[]= { &&nop, &&intg, &&intl, &&offset, @@ -769,16 +850,18 @@ void nasal_vm::run(std::vector& exec) &&newh, &&newf, &&happ, &¶, &&defpara, &&dynpara, &&unot, &&usub, &&add, &&sub, &&mul, &&div, - &&lnk, &&addeq, &&subeq, &&muleq, - &&diveq, &&lnkeq, &&meq, &&eq, - &&neq, &&less, &&leq, &&grt, - &&geq, &&pop, &&jmp, &&jt, - &&jf, &&counter, &&cntpop, &&findex, - &&feach, &&callg, &&calll, &&callv, - &&callvi, &&callh, &&callfv, &&callfh, - &&callb, &&slcbegin, &&slcend, &&slc, - &&slc2, &&mcallg, &&mcalll, &&mcallv, - &&mcallh, &&ret + &&lnk, &&addc, &&subc, &&mulc, + &&divc, &&lnkc, &&addeq, &&subeq, + &&muleq, &&diveq, &&lnkeq, &&addeqc, + &&subeqc, &&muleqc, &&diveqc, &&lnkeqc, + &&meq, &&eq, &&neq, &&less, + &&leq, &&grt, &&geq, &&pop, + &&jmp, &&jt, &&jf, &&counter, + &&cntpop, &&findex, &&feach, &&callg, + &&calll, &&callv, &&callvi, &&callh, + &&callfv, &&callfh, &&callb, &&slcbegin, + &&slcend, &&slc, &&slc2, &&mcallg, + &&mcalll, &&mcallv, &&mcallh, &&ret }; std::vector code; for(auto& i:exec) @@ -795,68 +878,91 @@ nop: if(gc.val_stack[STACK_MAX_DEPTH-1]&&gc.val_stack[STACK_MAX_DEPTH-1]!=(nasal_val*)0xffff) std::cout<<">> [vm] stack overflow.\n"; std::cout<<">> [vm] process exited after "<<((double)(clock()-begin))/CLOCKS_PER_SEC<<"s.\n"; + // debug + // for(int i=0;i<15;++i) + // { + // int maxnum=0,index=0; + // for(int j=0;j<62;++j) + // if(count[j]>maxnum) + // { + // index=j; + // maxnum=count[j]; + // } + // std::cout<