change output format of information of bytecodes

This commit is contained in:
ValKmjolnir 2021-10-13 22:59:15 +08:00
parent 5d13261516
commit 818685c48d
6 changed files with 172 additions and 109 deletions

129
README.md
View File

@ -46,16 +46,18 @@ this interpreter to a useful tool in your own projects(such as a script in your
## How to Compile
Better choose the latest update of the interpreter.
Download the source code and build it!
It's quite easy to build this interpreter.
MUST USE -O2/-O3 if want to optimize the interpreter!
Also remember to use g++ or clang++.
> [cpp compiler] -std=c++11 -O2 main.cpp -o nasal.exe
> [cpp compiler] -std=c++11 -O3 main.cpp -o nasal.exe -fno-exceptions
Or use this in linux/macOS/Unix
> [cpp compiler] -std=c++11 -O2 main.cpp -o nasal
> [cpp compiler] -std=c++11 -O3 main.cpp -o nasal -fno-exceptions
## How to Use?
@ -150,11 +152,13 @@ I decide to save the ast interpreter after releasing v4.0. Because it took me a
### Version 5.0(last update 2021/3/7)
I change my mind.AST interpreter leaves me too much things to do.
I change my mind.
AST interpreter leaves me too much things to do.
If i continue saving this interpreter,it will be harder for me to make the bytecode vm become more efficient.
If i continue saving this interpreter,
it will be harder for me to make the bytecode vm become more efficient.
## Byte Code VM
## Byte Code Virtual Machine
### Version 4.0 (last update 2020/12/17)
@ -170,7 +174,7 @@ There's an example of byte code below:
for(var i=0;i<4000000;i+=1);
```
```asm
```MIPS
.number 0
.number 4e+006
.number 1
@ -221,7 +225,7 @@ So the bytecode generator changed a lot.
for(var i=0;i<4000000;i+=1);
```
```asm
```MIPS
.number 4e+006
0x00000000: intg 0x00000001
0x00000001: pzero 0x00000000
@ -269,7 +273,7 @@ var f=func(x,y){return x+y;}
f(1024,2048);
```
```asm
```MIPS
.number 1024
.number 2048
.symbol x
@ -323,7 +327,7 @@ codegen will generate byte code by nasal_codegen::call_gen() instead of nasal_co
and the last child of the ast will be generated by nasal_codegen::mcall_gen().
So the bytecode is totally different now:
```asm
```MIPS
.number 10
.number 2
.symbol _
@ -394,7 +398,7 @@ is deleted from nasal_vm,
and now nasal_vm use nasal_val** mem_addr to store the memory address.
This will not cause fatal errors because the memory address is used __immediately__ after getting it.
### version 7.0 (2021/10/8)
### version 7.0 (last update 2021/10/8)
2021/6/26 update:
@ -421,7 +425,7 @@ op_addc,op_subc,op_mulc,op_divc,op_lnkc,op_addeqc,op_subeqc,op_muleqc,op_diveqc,
Now the bytecode of test/bigloop.nas seems like this:
```asm
```MIPS
.number 4e+006
.number 1
0x00000000: intg 0x00000001
@ -450,7 +454,7 @@ var (a,b)=(1,2);
a=b=0;
```
```asm
```MIPS
.number 2
0x00000000: intg 0x00000002
0x00000001: pone 0x00000000
@ -475,9 +479,52 @@ New value type is added: vm_obj.
This type is reserved for user to define their own value types.
Related API will be added in the future.
## Test data
Fully functional closure:
Add new operands that get and set upvalues.
Delete an old operand 'op_offset'.
### version 6.5(i5-8250U windows10 2021/6/19)
2021/10/13 update:
The format of output information of bytecodes changes to this:
```MIPS
0x0000017c: jmp 0x181
0x0000017d: calll 0x1
0x0000017e: calll 0x1
0x0000017f: callfv 0x1
0x00000180: ret
0x00000181: newf 0x185
0x00000182: intl 0x2
0x00000183: para 0x29 ("f")
0x00000184: jmp 0x19d
0x00000185: newf 0x189
0x00000186: intl 0x2
0x00000187: para 0x1d ("x")
0x00000188: jmp 0x19c
0x00000189: calll 0x1
0x0000018a: lessc 0x12 (2.000000)
0x0000018b: jf 0x18e
0x0000018c: calll 0x1
0x0000018d: ret
0x0000018e: upval 0x0[0x1]
0x0000018f: upval 0x0[0x1]
0x00000190: callfv 0x1
0x00000191: calll 0x1
0x00000192: subc 0x13 (1.000000)
0x00000193: callfv 0x1
0x00000194: upval 0x0[0x1]
0x00000195: upval 0x0[0x1]
0x00000196: callfv 0x1
0x00000197: calll 0x1
0x00000198: subc 0x12 (2.000000)
0x00000199: callfv 0x1
0x0000019a: add
0x0000019b: ret
```
## Benchmark
### version 6.5 (i5-8250U windows10 2021/6/19)
running time and gc time:
@ -524,7 +571,7 @@ operands calling total times:
|quick_sort.nas|16226|5561|4144|3524|2833|
|bfs.nas|24707|16297|14606|14269|8672|
### version 7.0(i5-8250U ubuntu-WSL on windows10 2021/6/29)
### version 7.0 (i5-8250U ubuntu-WSL on windows10 2021/6/29)
running time:
@ -541,7 +588,7 @@ running time:
|quick_sort.nas|0s|great improvement|
|bfs.nas|0.0156s|great improvement|
## How to Use Nasal to Program
## Use Nasal to Program
### basic value type
@ -587,27 +634,18 @@ var d={
member2:'str',
'member3':'member\'s name can also be a string constant',
"member4":"also this",
function:func()
{
function:func(){
var a=me.member2~me.member3;
return a;
}
};
var f=func(x,y,z)
{
return nil;
}
var f=func
{
return 1024;
}
var f=func(x,y,z,default_para1=1,default_para2=2)
{
var f=func(x,y,z){return nil;}
var f=func{return 1024;}
var f=func(x,y,z,default_para1=1,default_para2=2){
return x+y+z+default_para1+default_para2;
}
var f=func(x,y,z,dynamic_para...)
{
var f=func(x,y,z,dynamic_para...){
var sum=0;
foreach(var i;dynamic_para)
sum+=i;
@ -713,20 +751,33 @@ func(x,y){return x+y}(0,1);
func(x){return 1/(1+math.exp(-x));}(0.5);
```
There's an interesting test file 'y-combinator.nas',
try it for fun:
```javascript
var fib=func(f){
return f(f);
}(
func(f){
return func(x){
if(x<2) return x;
return f(f)(x-1)+f(f)(x-2);
}
}
);
```
### closure
Use closure to OOP.
```javascript
var f=func()
{
var f=func(){
var a=1;
return func(){return a;};
}
print(f()());
var student=func(name,age)
{
var student=func(name,age){
var val={
name:name,
age:age
@ -741,7 +792,7 @@ var student=func(name,age)
}
```
### built-in functions
### native functions
Must import lib.nas or has these functions' definitions inside your code.
@ -779,7 +830,7 @@ nasal_ref builtin_print(std::vector<nasal_ref>& local,nasal_gc& gc)
case vm_vec: i.vec()->print(); break;
case vm_hash: i.hash()->print(); break;
case vm_func: std::cout<<"func(...){...}"; break;
case vm_obj: std::cout<<"<obj>"; break;
case vm_obj: std::cout<<"<object>"; break;
}
std::cout<<std::flush;
// if a nasal value is not in use,use gc::del_reference to delete it
@ -806,8 +857,7 @@ struct FUNC_TABLE
At last,warp the '__builtin_print' in a nasal file:
```javascript
var print=func(elems...)
{
var print=func(elems...){
return __builtin_print(elems);
};
```
@ -815,8 +865,7 @@ var print=func(elems...)
In fact the arguments that '__builtin_print' uses is not necessary,So writting it like this is also right:
```javascript
var print=func(elems...)
{
var print=func(elems...){
return __builtin_print;
};
```

View File

@ -138,7 +138,7 @@ int main(int argc,const char* argv[])
std::cout
<<"invalid argument(s).\n"
<<"use nasal -h to get help.\n";
exit(1);
std::exit(1);
}
return 0;
}

View File

@ -3,11 +3,11 @@
enum op_code
{
op_nop, // do nothing and end the vm main loop
op_nop, // do nothing
op_intg, // global scope size
op_intl, // local scope size
op_loadg, // load global symbol value
op_loadl, // load local symbol value
op_loadg, // load global value
op_loadl, // load local value
op_loadu, // load upvalue
op_pnum, // push constant number to the stack
op_pone, // push 1 to the stack
@ -62,9 +62,9 @@ enum op_code
op_cntpop, // pop counter
op_findex, // index counter on the top of forindex_stack plus 1
op_feach, // index counter on the top of forindex_stack plus 1 and get the value in vector
op_callg, // call value in global scope
op_calll, // call value in local scope
op_upval, // call upvalue in closure
op_callg, // get value in global scope
op_calll, // get value in local scope
op_upval, // get upvalue in closure
op_callv, // call vec[index]
op_callvi, // call vec[immediate] (used in multi-assign/multi-define)
op_callh, // call hash.label
@ -80,7 +80,8 @@ enum op_code
op_mupval, // get memory space of value in closure
op_mcallv, // get memory space of vec[index]
op_mcallh, // get memory space of hash.label
op_ret // return
op_ret, // return
op_exit // stop the virtual machine
};
struct
@ -113,7 +114,7 @@ struct
{op_sub, "sub "},
{op_mul, "mult "},
{op_div, "div "},
{op_lnk, "link "},
{op_lnk, "lnk "},
{op_addc, "addc "},
{op_subc, "subc "},
{op_mulc, "multc "},
@ -167,6 +168,7 @@ struct
{op_mcallv, "mcallv"},
{op_mcallh, "mcallh"},
{op_ret, "ret "},
{op_exit, "exit "},
{-1, nullptr },
};
@ -255,7 +257,7 @@ private:
public:
uint32_t get_error(){return error;}
void main_progress(const nasal_ast&,const std::vector<std::string>&);
void print_op(int);
void print_op(uint32_t);
void print_byte_code();
std::vector<std::string>& get_str_table(){return str_res_table;}
std::vector<double>& get_num_table(){return num_res_table;}
@ -479,6 +481,8 @@ void nasal_codegen::func_gen(const nasal_ast& ast)
find_symbol(block);
block_gen(block);
exec_code[local_label].num=local.back().size();
if(local.back().size()>65536)
die("too many local variants: "+std::to_string(local.back().size())+".",block.get_line());
local.pop_back();
if(!block.get_children().size() || block.get_children().back().get_type()!=ast_ret)
@ -1288,35 +1292,43 @@ void nasal_codegen::main_progress(const nasal_ast& ast,const std::vector<std::st
case ast_trino:calc_gen(tmp);gen(op_pop,0,tmp.get_line());break;
}
}
gen(op_nop,0,0);
gen(op_exit,0,0);
if(global.size()>=STACK_MAX_DEPTH)
die("too many global variants: "+std::to_string(global.size())+".",0);
return;
}
void nasal_codegen::print_op(int index)
void nasal_codegen::print_op(uint32_t index)
{
// print opcode index,opcode name,opcode immediate number
printf("0x%.8x: %s 0x%.8x",index,code_table[exec_code[index].op].name,exec_code[index].num);
const opcode& code=exec_code[index];
printf("0x%.8x: %s ",index,code_table[code.op].name);
// print detail info
switch(exec_code[index].op)
switch(code.op)
{
case op_addc:case op_subc:case op_mulc:case op_divc:
case op_addeqc:case op_subeqc:case op_muleqc:case op_diveqc:
case op_lessc:case op_leqc:case op_grtc:case op_geqc:
case op_pnum:printf(" (%lf)\n",num_res_table[exec_code[index].num]);break;
case op_callb:printf(" (%s)\n",builtin_func[exec_code[index].num].name);break;
case op_happ:
case op_pstr:
case op_pnum:
printf("0x%x (%lf)\n",code.num,num_res_table[code.num]);break;
case op_callvi:case op_newv:case op_callfv:
case op_intg:case op_intl:
case op_newf:case op_jmp:case op_jt:case op_jf:
printf("0x%x\n",code.num);break;
case op_callb:
printf("0x%x <%s>\n",code.num,builtin_func[code.num].name);break;
case op_callg:case op_mcallg:case op_loadg:
case op_calll:case op_mcalll:case op_loadl:
printf("0x%x\n",code.num);break;
case op_upval:case op_mupval:case op_loadu:
printf("0x%x[0x%x]\n",(code.num>>16)&0xffff,code.num&0xffff);break;
case op_happ:case op_pstr:
case op_lnkc:case op_lnkeqc:
case op_callh:
case op_mcallh:
case op_para:
case op_defpara:
case op_dynpara:
printf(" (");
raw_string(str_res_table[exec_code[index].num]);
printf(")\n");
case op_callh:case op_mcallh:
case op_para:case op_defpara:case op_dynpara:
printf("0x%x (\"",code.num);
raw_string(str_res_table[code.num]);
printf("\")\n");
break;
default:printf("\n");break;
}
@ -1325,19 +1337,15 @@ void nasal_codegen::print_op(int index)
void nasal_codegen::print_byte_code()
{
if(num_res_table.size())
std::cout<<".number"<<std::endl;
for(auto& num:num_res_table)
std::cout<<'\t'<<num<<'\n';
if(str_res_table.size())
std::cout<<".symbol"<<std::endl;
for(auto num:num_res_table)
std::cout<<".number "<<num<<'\n';
for(auto& str:str_res_table)
{
std::cout<<'\t';
std::cout<<".symbol \"";
raw_string(str);
std::cout<<std::endl;
std::cout<<"\"\n";
}
for(int i=0;i<exec_code.size();++i)
for(uint32_t i=0;i<exec_code.size();++i)
print_op(i);
return;
}

View File

@ -21,13 +21,14 @@ private:
std::vector<opcode> bytecode; // bytecode
std::vector<std::string> files; // files
/* debug functions */
void bytecodeinfo(uint32_t);
void bytecodeinfo(const uint32_t);
void traceback();
void stackinfo(int);
void stackinfo(const uint32_t);
void die(std::string);
void stackoverflow();
/* vm calculation functions*/
bool condition(nasal_ref);
void opr_nop();
void opr_intg();
void opr_intl();
void opr_loadg();
@ -141,12 +142,13 @@ void nasal_vm::clear()
imm.clear();
return;
}
void nasal_vm::bytecodeinfo(uint32_t p)
void nasal_vm::bytecodeinfo(const uint32_t p)
{
printf("\t0x%.8x: %s 0x%.8x",p,code_table[bytecode[p].op].name,bytecode[p].num);
if(bytecode[p].op==op_callb)
printf(":%s",builtin_func[bytecode[p].num].name);
printf(" (%s line %d)\n",files[bytecode[p].fidx].c_str(),bytecode[p].line);
const opcode& code=bytecode[p];
printf("\t0x%.8x: %s 0x%x",p,code_table[code.op].name,code.num);
if(code.op==op_callb)
printf(" <%s>",builtin_func[code.num].name);
printf(" (%s line %d)\n",files[code.fidx].c_str(),code.line);
return;
}
void nasal_vm::traceback()
@ -172,41 +174,42 @@ void nasal_vm::traceback()
printf("\t0x%.8x: %d same call(s) ...\n",last_point,same_cnt);
return;
}
void nasal_vm::stackinfo(int limit)
void nasal_vm::stackinfo(const uint32_t limit)
{
printf("vm stack(limit %d):\n",limit);
uint32_t same_cnt=0;
nasal_ref last_ptr={vm_none,0xffffffff};
for(int i=0;i<limit && stack_top-i>=gc.val_stack;++i)
for(uint32_t i=0;i<limit && stack_top>=gc.val_stack;++i,--stack_top)
{
if(stack_top[-i]==last_ptr)
if(stack_top[0]==last_ptr)
{
++same_cnt;
continue;
}
if(same_cnt)
{
printf("\t%p ... | %d same value(s)\n",last_ptr.value.gcobj,same_cnt);
printf("\t... | %d same value(s)\n",same_cnt);
same_cnt=0;
}
last_ptr=stack_top[-i];
printf("\t%p ",stack_top[-i].value.gcobj);
switch(stack_top[-i].type)
last_ptr=stack_top[0];
const nasal_val* ptr=stack_top[0].value.gcobj;
putchar('\t');
switch(stack_top[0].type)
{
case vm_none: printf("undefined");break;
case vm_nil: printf("nil | gc.nil");break;
case vm_num: printf("num | %lf",stack_top[-i].value.num);break;
case vm_str: printf("str | ");raw_string(*stack_top[-i].value.gcobj->ptr.str);break;
case vm_func: printf("func | func(%lu para){..}",stack_top[-i].value.gcobj->ptr.func->key_table.size());break;
case vm_vec: printf("vec | [%lu val]",stack_top[-i].value.gcobj->ptr.vec->elems.size());break;
case vm_hash: printf("hash | {%lu member}",stack_top[-i].value.gcobj->ptr.hash->elems.size());break;
case vm_obj: printf("user data");break;
case vm_none: printf("null |");break;
case vm_nil: printf("nil |");break;
case vm_num: printf("num | %lf",stack_top[0].num());break;
case vm_str: printf("str | <%p> ",ptr);raw_string(*stack_top[0].str());break;
case vm_func: printf("func | <%p> func{entry=0x%x}",ptr,stack_top[0].func()->entry);break;
case vm_vec: printf("vec | <%p> [%lu val]",ptr,stack_top[0].vec()->elems.size());break;
case vm_hash: printf("hash | <%p> {%lu member}",ptr,stack_top[0].hash()->elems.size());break;
case vm_obj: printf("obj | <%p>",ptr);break;
default: printf("unknown");break;
}
putchar('\n');
}
if(same_cnt)
printf("\t%p ... | %d same value(s)\n",last_ptr.value.gcobj,same_cnt);
printf("\t... | %d same value(s)\n",same_cnt);
return;
}
void nasal_vm::die(std::string str)
@ -231,7 +234,7 @@ inline bool nasal_vm::condition(nasal_ref val)
return val.value.num;
else if(val.type==vm_str)
{
std::string& str=*val.str();
const std::string& str=*val.str();
double num=str2num(str.c_str());
if(std::isnan(num))
return str.empty();
@ -239,6 +242,7 @@ inline bool nasal_vm::condition(nasal_ref val)
}
return false;
}
inline void nasal_vm::opr_nop(){}
inline void nasal_vm::opr_intg()
{
// global values store on stack
@ -264,7 +268,7 @@ inline void nasal_vm::opr_loadl()
}
inline void nasal_vm::opr_loadu()
{
func_stk.top()->upvalue[(imm[pc]&0xffff0000)>>16].vec()->elems[imm[pc]&0xffff]=(stack_top--)[0];
func_stk.top()->upvalue[(imm[pc]>>16)&0xffff].vec()->elems[imm[pc]&0xffff]=(stack_top--)[0];
return;
}
inline void nasal_vm::opr_pnum()
@ -488,7 +492,8 @@ inline void nasal_vm::opr_leq(){op_cmp(<=);}
inline void nasal_vm::opr_grt(){op_cmp(>);}
inline void nasal_vm::opr_geq(){op_cmp(>=);}
#define op_cmp_const(type) stack_top[0]=(stack_top[0].to_number() type num_table[imm[pc]])?gc.one:gc.zero;
#define op_cmp_const(type)\
stack_top[0]=(stack_top[0].to_number() type num_table[imm[pc]])?gc.one:gc.zero;
inline void nasal_vm::opr_lessc(){op_cmp_const(<);}
inline void nasal_vm::opr_leqc(){op_cmp_const(<=);}
@ -563,7 +568,7 @@ inline void nasal_vm::opr_calll()
}
inline void nasal_vm::opr_upval()
{
(++stack_top)[0]=func_stk.top()->upvalue[(imm[pc]&0xffff0000)>>16].vec()->elems[imm[pc]&0xffff];
(++stack_top)[0]=func_stk.top()->upvalue[(imm[pc]>>16)&0xffff].vec()->elems[imm[pc]&0xffff];
return;
}
inline void nasal_vm::opr_callv()
@ -775,7 +780,7 @@ inline void nasal_vm::opr_mcalll()
}
inline void nasal_vm::opr_mupval()
{
mem_addr=&func_stk.top()->upvalue[(imm[pc]&0xffff0000)>>16].vec()->elems[imm[pc]&0xffff];
mem_addr=&func_stk.top()->upvalue[(imm[pc]>>16)&0xffff].vec()->elems[imm[pc]&0xffff];
(++stack_top)[0]=mem_addr[0];
return;
}
@ -836,7 +841,7 @@ inline void nasal_vm::opr_ret()
}
void nasal_vm::run(const std::vector<opcode>& exec,const bool op_cnt)
{
uint64_t count[op_ret+1]={0};
uint64_t count[op_exit+1]={0};
const void* opr_table[]=
{
&&nop, &&intg, &&intl, &&loadg,
@ -858,7 +863,7 @@ void nasal_vm::run(const std::vector<opcode>& exec,const bool op_cnt)
&&callh, &&callfv, &&callfh, &&callb,
&&slcbegin,&&slcend, &&slc, &&slc2,
&&mcallg, &&mcalll, &&mupval, &&mcallv,
&&mcallh, &&ret
&&mcallh, &&ret, &&vmexit
};
bytecode=exec;
@ -875,7 +880,7 @@ void nasal_vm::run(const std::vector<opcode>& exec,const bool op_cnt)
// run
goto *code[pc];
nop:
vmexit:
if(canary.value.gcobj)
stackoverflow();
if(op_cnt)
@ -896,10 +901,11 @@ nop:
}
return;
// may cause stackoverflow
#define exec_operand(op,num) {op();++count[num];if(!canary.value.gcobj)goto *code[++pc];goto nop;}
#define exec_operand(op,num) {op();++count[num];if(!canary.value.gcobj)goto *code[++pc];goto vmexit;}
// do not cause stackoverflow
#define exec_opnodie(op,num) {op();++count[num];goto *code[++pc];}
nop: exec_opnodie(opr_nop ,op_nop ); // do nothing
intg: exec_opnodie(opr_intg ,op_intg ); // stack+=imm[pc] (detected at codegen)
intl: exec_opnodie(opr_intl ,op_intl ); // stack-=0
loadg: exec_opnodie(opr_loadg ,op_loadg ); // stack-=1

View File

@ -13,7 +13,7 @@ var ResultTrait={
},
unwrap:func(){
if(me.flag)
die("error: "~me.err);
die(me.err);
return me.ok;
}
};

View File

@ -13,7 +13,7 @@ var ResultTrait={
},
unwrap:func(){
if(me.flag)
die("error: "~me.err);
die(me.err);
return me.ok;
}
};