change code structure

This commit is contained in:
ValKmjolnir 2021-11-02 22:44:42 +08:00
parent f8e2918561
commit cd08b2d1bb
9 changed files with 172 additions and 134 deletions

View File

@ -63,7 +63,6 @@ void err()
void execute(const std::string& file,const uint32_t cmd)
{
// 33kb space on stack
nasal_lexer lexer;
nasal_parse parse;
nasal_import linker;

View File

@ -13,6 +13,7 @@ test:nasal
./nasal test/class.nas
# ./nasal test/exception.nas
./nasal -t test/fib.nas
./nasal test/filesystem.nas
./nasal test/hexdump.nas
./nasal test/json.nas
./nasal test/leetcode1319.nas

View File

@ -4,32 +4,63 @@
enum ast_node
{
ast_null=0,
ast_root,
ast_block,
ast_file, // ast_file is only used to store which file the subtree is on,codegen will generate nothing
ast_nil,ast_num,ast_str,ast_id,ast_func,ast_hash,ast_vec,
ast_hashmember,
ast_call,ast_callh,ast_callv,ast_callf,
ast_subvec,
ast_args,ast_default,ast_dynamic,
ast_and,ast_or,
ast_equal,
ast_addeq,ast_subeq,
ast_multeq,ast_diveq,
ast_lnkeq,
ast_cmpeq,ast_neq,
ast_less,ast_leq,
ast_grt,ast_geq,
ast_add,ast_sub,
ast_mult,ast_div,
ast_link,
ast_neg,ast_not,
ast_trino,
ast_for,ast_forindex,ast_foreach,ast_while,ast_new_iter,
ast_conditional,ast_if,ast_elsif,ast_else,
ast_multi_id,ast_multi_scalar,
ast_def,ast_multi_assign,
ast_continue,ast_break,ast_ret
ast_root, // mark the root node of ast
ast_block, // expression block
ast_file, // used to store which file the sub-tree is on
ast_nil, // nil keyword
ast_num, // number, basic value type
ast_str, // string, basic value type
ast_id, // identifier
ast_func, // func keyword
ast_hash, // hash, basic value type
ast_vec, // vector, basic value type
ast_hashmember,// elements in hashmap
ast_call, // mark a sub-tree of calling an identifier
ast_callh, // id.name
ast_callv, // id[index]
ast_callf, // id()
ast_subvec, // id[index:index]
ast_args, // mark a sub-tree of function parameters
ast_default, // default parameter
ast_dynamic, // dynamic parameter
ast_and, // and keyword
ast_or, // or keyword
ast_equal, // =
ast_addeq, // +=
ast_subeq, // -=
ast_multeq, // *=
ast_diveq, // /=
ast_lnkeq, // ~=
ast_cmpeq, // ==
ast_neq, // !=
ast_less, // <
ast_leq, // <=
ast_grt, // >
ast_geq, // >=
ast_add, // +
ast_sub, // -
ast_mult, // *
ast_div, // /
ast_link, // ~
ast_neg, // -
ast_not, // ~
ast_trino, // ?:
ast_for, // for keyword
ast_forindex,// forindex keyword
ast_foreach, // foreach keyword
ast_while, // while
ast_new_iter,// iterator, used in forindex/foreach
ast_conditional,// mark a sub-tree of conditional expression
ast_if, // if keyword
ast_elsif, // elsif keyword
ast_else, // else keyword
ast_multi_id,// multi identifiers sub-tree
ast_multi_scalar,// multi value sub-tree
ast_def, // definition
ast_multi_assign,// multi assignment sub-tree
ast_continue,// continue keyword
ast_break, // break keyword
ast_ret // return keyword
};
const char* ast_name[]=
@ -38,29 +69,61 @@ const char* ast_name[]=
"root",
"block",
"file",
"nil","num","str","id","func","hash","vec",
"nil",
"num",
"str",
"id",
"func",
"hash",
"vec",
"hashmember",
"call","callh","callv","callf",
"call",
"callh",
"callv",
"callf",
"subvec",
"args","default","dynamic",
"and","or",
"args",
"default",
"dynamic",
"and",
"or",
"=",
"+=","-=",
"*=","/=",
"+=",
"-=",
"*=",
"/=",
"~=",
"==","!=",
"<","<=",
">",">=",
"+","-",
"*","/",
"==",
"!=",
"<",
"<=",
">",
">=",
"+",
"-",
"*",
"/",
"~",
"unary-","unary!",
"unary-",
"unary!",
"trino",
"for","forindex","foreach","while","iter",
"conditional","if","elsif","else",
"multi_id","multi_scalar",
"def","multi_assign",
"continue","break","return"
"for",
"forindex",
"foreach",
"while",
"iter",
"conditional",
"if",
"elsif",
"else",
"multi_id",
"multi_scalar",
"def",
"multi_assign",
"continue",
"break",
"return",
nullptr
};
class nasal_ast

View File

@ -197,11 +197,11 @@ private:
uint16_t fileindex;
uint32_t in_forindex;
uint32_t in_foreach;
const std::string* file;
std::unordered_map<double,int> num_table;
std::unordered_map<std::string,int> str_table;
std::vector<double> num_res;
std::vector<std::string> str_res;
std::vector<std::string> file;
std::vector<opcode> code;
std::list<std::vector<int>> continue_ptr;
std::list<std::vector<int>> break_ptr;
@ -1146,20 +1146,9 @@ void nasal_codegen::ret_gen(const nasal_ast& ast)
void nasal_codegen::compile(const nasal_parse& parse,const nasal_import& import)
{
error=0;
in_foreach=0;
in_forindex=0;
error=in_foreach=in_forindex=0;
fileindex=0;
num_table.clear();
str_table.clear();
num_res.clear();
str_res.clear();
file=import.get_file();
code.clear();
global.clear();
local.clear();
file=import.get_file().data();
// search symbols first
find_symbol(parse.ast());
@ -1263,7 +1252,7 @@ void nasal_codegen::print_op(uint32_t index)
void nasal_codegen::print()
{
for(auto num:num_res)
for(auto& num:num_res)
std::cout<<".number "<<num<<'\n';
for(auto& str:str_res)
std::cout<<".symbol \""<<rawstr(str)<<"\"\n";

View File

@ -5,8 +5,6 @@ class nasal_import
{
private:
uint32_t error;
nasal_lexer lex;
nasal_parse par;
std::vector<std::string> files;
void die(const std::string&,const char*);
bool check_import(const nasal_ast&);
@ -67,6 +65,8 @@ void nasal_import::linker(nasal_ast& root,nasal_ast&& add_root)
nasal_ast nasal_import::file_import(nasal_ast& node)
{
nasal_lexer lex;
nasal_parse par;
// get filename and set node to ast_null
std::string filename=node[1][0].str();
node.clear();

View File

@ -101,10 +101,10 @@ private:
uint32_t error;
uint32_t line;
uint32_t ptr;
size_t size;
std::string code;
std::string res;
std::vector<token> tokens;
uint32_t get_type(const std::string&);
void die(const char*);
void open(const std::string&);
@ -120,7 +120,6 @@ public:
void nasal_lexer::open(const std::string& file)
{
error=0;
std::ifstream fin(file,std::ios::binary);
if(fin.fail())
{
@ -152,7 +151,7 @@ void nasal_lexer::die(const char* info)
std::string nasal_lexer::id_gen()
{
std::string str="";
while(ptr<size && (ID(res[ptr])||DIGIT(res[ptr])))
while(ptr<res.size() && (ID(res[ptr])||DIGIT(res[ptr])))
str+=res[ptr++];
code+=str;
return str;
@ -161,11 +160,11 @@ std::string nasal_lexer::id_gen()
std::string nasal_lexer::num_gen()
{
// generate hex number
if(ptr+1<size && res[ptr]=='0' && res[ptr+1]=='x')
if(ptr+1<res.size() && res[ptr]=='0' && res[ptr+1]=='x')
{
std::string str="0x";
ptr+=2;
while(ptr<size && HEX(res[ptr]))
while(ptr<res.size() && HEX(res[ptr]))
str+=res[ptr++];
code+=str;
if(str.length()<3)// "0x"
@ -173,11 +172,11 @@ std::string nasal_lexer::num_gen()
return str;
}
// generate oct number
else if(ptr+1<size && res[ptr]=='0' && res[ptr+1]=='o')
else if(ptr+1<res.size() && res[ptr]=='0' && res[ptr+1]=='o')
{
std::string str="0o";
ptr+=2;
while(ptr<size && OCT(res[ptr]))
while(ptr<res.size() && OCT(res[ptr]))
str+=res[ptr++];
code+=str;
if(str.length()<3)// "0o"
@ -187,12 +186,12 @@ std::string nasal_lexer::num_gen()
// generate dec number
// dec number -> [0~9][0~9]*(.[0~9]*)(e|E(+|-)0|[1~9][0~9]*)
std::string str="";
while(ptr<size && DIGIT(res[ptr]))
while(ptr<res.size() && DIGIT(res[ptr]))
str+=res[ptr++];
if(ptr<size && res[ptr]=='.')
if(ptr<res.size() && res[ptr]=='.')
{
str+=res[ptr++];
while(ptr<size && DIGIT(res[ptr]))
while(ptr<res.size() && DIGIT(res[ptr]))
str+=res[ptr++];
// "xxxx." is not a correct number
if(str.back()=='.')
@ -202,12 +201,12 @@ std::string nasal_lexer::num_gen()
return "0";
}
}
if(ptr<size && (res[ptr]=='e' || res[ptr]=='E'))
if(ptr<res.size() && (res[ptr]=='e' || res[ptr]=='E'))
{
str+=res[ptr++];
if(ptr<size && (res[ptr]=='-' || res[ptr]=='+'))
if(ptr<res.size() && (res[ptr]=='-' || res[ptr]=='+'))
str+=res[ptr++];
while(ptr<size && DIGIT(res[ptr]))
while(ptr<res.size() && DIGIT(res[ptr]))
str+=res[ptr++];
// "xxxe(-|+)" is not a correct number
if(str.back()=='e' || str.back()=='E' || str.back()=='-' || str.back()=='+')
@ -226,7 +225,7 @@ std::string nasal_lexer::str_gen()
std::string str="";
char begin=res[ptr];
code+=begin;
while(++ptr<size && res[ptr]!=begin)
while(++ptr<res.size() && res[ptr]!=begin)
{
code+=res[ptr];
if(res[ptr]=='\n')
@ -234,7 +233,7 @@ std::string nasal_lexer::str_gen()
code="";
++line;
}
if(res[ptr]=='\\' && ptr+1<size)
if(res[ptr]=='\\' && ptr+1<res.size())
{
code+=res[++ptr];
switch(res[ptr])
@ -258,7 +257,7 @@ std::string nasal_lexer::str_gen()
str+=res[ptr];
}
// check if this string ends with a " or '
if(ptr++>=size)
if(ptr++>=res.size())
{
die("get EOF when generating string.");
return str;
@ -272,16 +271,13 @@ std::string nasal_lexer::str_gen()
void nasal_lexer::scan(const std::string& file)
{
open(file);
tokens.clear();
line=1;
ptr=0;
code="";
size=res.size();
error=ptr=0;
std::string str;
while(ptr<size)
while(ptr<res.size())
{
while(ptr<size && (res[ptr]==' ' || res[ptr]=='\n' || res[ptr]=='\t' || res[ptr]=='\r' || res[ptr]<0))
while(ptr<res.size() && (res[ptr]==' ' || res[ptr]=='\n' || res[ptr]=='\t' || res[ptr]=='\r' || res[ptr]<0))
{
// these characters will be ignored, and '\n' will cause ++line
code+=res[ptr];
@ -291,7 +287,7 @@ void nasal_lexer::scan(const std::string& file)
code="";
}
}
if(ptr>=size) break;
if(ptr>=res.size()) break;
if(ID(res[ptr]))
{
str=id_gen();
@ -315,7 +311,7 @@ void nasal_lexer::scan(const std::string& file)
else if(res[ptr]=='.')
{
str=".";
if(ptr+2<size && res[ptr+1]=='.' && res[ptr+2]=='.')
if(ptr+2<res.size() && res[ptr+1]=='.' && res[ptr+2]=='.')
str+="..";
ptr+=str.length();
code+=str;
@ -325,13 +321,13 @@ void nasal_lexer::scan(const std::string& file)
{
// get calculation operator
str=res[ptr++];
if(ptr<size && res[ptr]=='=')
if(ptr<res.size() && res[ptr]=='=')
str+=res[ptr++];
code+=str;
tokens.push_back({line,get_type(str),str});
}
else if(NOTE(res[ptr]))// avoid note, after this process ptr will point to a '\n', so next loop line counter+1
while(++ptr<size && res[ptr]!='\n');
while(++ptr<res.size() && res[ptr]!='\n');
else
{
code+=res[ptr++];
@ -339,7 +335,7 @@ void nasal_lexer::scan(const std::string& file)
}
}
tokens.push_back({line,tok_eof,"eof"});
res.clear();
code=res="";
}
void nasal_lexer::print()

View File

@ -43,11 +43,10 @@ class nasal_parse
private:
uint32_t ptr;
uint32_t error;
uint32_t in_func; // count when generating function block
uint32_t in_loop; // count when generating loop block
uint32_t in_func; // count function block
uint32_t in_loop; // count loop block
const token* tokens;// ref from nasal_lexer
nasal_ast root;
std::vector<token> tokens;
std::vector<token> error_token;
void die(uint32_t,const std::string&);
void match(uint32_t type,const char* info=nullptr);
@ -84,8 +83,8 @@ private:
nasal_ast callf();
nasal_ast subvec();
nasal_ast definition();
nasal_ast var_incurve_def();
nasal_ast var_outcurve_def();
nasal_ast incurve_def();
nasal_ast outcurve_def();
nasal_ast multi_id();
nasal_ast multi_scalar(bool);
nasal_ast multi_assgin();
@ -107,38 +106,19 @@ public:
};
void nasal_parse::compile(const nasal_lexer& lexer)
{
tokens=lexer.get_tokens();
tokens=lexer.get_tokens().data();
ptr=in_func=in_loop=error=0;
error_token.clear();
root={1,ast_root};
while(tokens[ptr].type!=tok_eof)
{
uint32_t err_tok_size=error_token.size();
root.add(expr());
if(tokens[ptr].type==tok_semi)
match(tok_semi);
// if detect error token, avoid checking semicolon
else if(error_token.size()>err_tok_size)
continue;
// the last expression can be recognized without semi
else if(need_semi_check(root.child().back()) && tokens[ptr].type!=tok_eof)
die(error_line,"expected \";\"");
}
if(!error_token.size())
return;
++error;
std::cout<<"[parse] line";
uint32_t err_line=0;
for(auto& tok:error_token)
if(err_line!=tok.line)
{
std::cout<<' '<<tok.line;
err_line=tok.line;
}
std::cout
<<" have fatal syntax errors."
<<"check \'(\',\'[\',\'{\',\')\',\']\',\'}\' match or not.\n";
}
void nasal_parse::die(uint32_t line,const std::string& info)
{
@ -476,7 +456,10 @@ nasal_ast nasal_parse::expr()
case tok_break: return break_expr(); break;
case tok_ret: return ret_expr(); break;
case tok_semi: break;
default:error_token.push_back(tokens[ptr]);++ptr;break;
default:
die(error_line,"incorrect token <"+tokens[ptr].str+">");
++ptr;
break;
}
return {tokens[ptr].line,ast_null};
}
@ -493,13 +476,9 @@ nasal_ast nasal_parse::exprs()
match(tok_lbrace);
while(tokens[ptr].type!=tok_rbrace && tokens[ptr].type!=tok_eof)
{
uint32_t err_tok_size=error_token.size();
node.add(expr());
if(tokens[ptr].type==tok_semi)
match(tok_semi);
// if detect error token, avoid checking semicolon
else if(error_token.size()>err_tok_size)
continue;
// the last expression can be recognized without semi
else if(need_semi_check(node.child().back()) && tokens[ptr].type!=tok_rbrace)
die(error_line,"expected \";\"");
@ -766,12 +745,12 @@ nasal_ast nasal_parse::definition()
switch(tokens[ptr].type)
{
case tok_id: node.add(id());match(tok_id);break;
case tok_lcurve: node.add(var_outcurve_def());break;
case tok_lcurve: node.add(outcurve_def());break;
default: die(error_line,"expected identifier");break;
}
}
else if(tokens[ptr].type==tok_lcurve)
node.add(var_incurve_def());
node.add(incurve_def());
match(tok_eq);
if(tokens[ptr].type==tok_lcurve)
node.add(check_multi_scalar()?multi_scalar(false):calc());
@ -784,7 +763,7 @@ nasal_ast nasal_parse::definition()
die(node[0].line(),"too much or lack values in multi-definition");
return node;
}
nasal_ast nasal_parse::var_incurve_def()
nasal_ast nasal_parse::incurve_def()
{
match(tok_lcurve);
match(tok_var);
@ -792,7 +771,7 @@ nasal_ast nasal_parse::var_incurve_def()
match(tok_rcurve);
return node;
}
nasal_ast nasal_parse::var_outcurve_def()
nasal_ast nasal_parse::outcurve_def()
{
match(tok_lcurve);
nasal_ast node=multi_id();

View File

@ -7,18 +7,18 @@ private:
/* values of nasal_vm */
uint32_t pc; // program counter
uint32_t offset; // used to load default parameters to a new function
const double* num_table;// const numbers, ref from nasal_codegen
const std::string* str_table;// const symbols, ref from nasal_codegen
std::stack<uint32_t> ret; // stack to store return pc
std::stack<nasal_func*> func_stk; // stack to store function, used to get upvalues
std::stack<int> counter; // iterator stack for forindex/foreach
const double* num_table;// const numbers
std::vector<std::string> str_table;// const symbols
std::vector<uint32_t> imm; // immediate number
nasal_ref* mem_addr; // used for mem_call
/* garbage collector */
nasal_gc gc;
/* values used for debug */
std::vector<opcode> bytecode;
std::vector<std::string> files;
const opcode* bytecode; // ref from nasal_codegen
const std::string* files; // ref from nasal_import
void init(
const std::vector<std::string>&,
@ -130,8 +130,8 @@ void nasal_vm::init(
{
gc.init(strs);
num_table=nums.data();
str_table=strs;
files=filenames;
str_table=strs.data();
files=filenames.data();
}
void nasal_vm::clear()
{
@ -140,7 +140,6 @@ void nasal_vm::clear()
ret.pop();
while(!counter.empty())
counter.pop();
str_table.clear();
imm.clear();
}
void nasal_vm::valinfo(nasal_ref& val)
@ -804,7 +803,7 @@ inline void nasal_vm::opr_mcallh()
if(hash.type!=vm_hash)
die("mcallh: must call a hash");
nasal_hash& ref=*hash.hash();
std::string& str=str_table[imm[pc]];
const std::string& str=str_table[imm[pc]];
mem_addr=ref.get_mem(str);
if(!mem_addr) // create a new key
{
@ -856,9 +855,9 @@ void nasal_vm::run(
&&mcallg, &&mcalll, &&mupval, &&mcallv,
&&mcallh, &&ret, &&vmexit
};
bytecode=gen.get_code();
bytecode=gen.get_code().data();
std::vector<const void*> code;
for(auto& i:bytecode)
for(auto& i:gen.get_code())
{
code.push_back(opr_table[i.op]);
imm.push_back(i.num);

12
test/filesystem.nas Normal file
View File

@ -0,0 +1,12 @@
import("lib.nas");
var fd=io.open("test/filesystem.nas");
while((var line=io.readln(fd))!=nil)
println(line);
io.close(fd);
println(io.stat("test/filesystem.nas"));
var dd=unix.opendir("test");
while((var name=unix.readdir(dd))!=nil)
println(name);
unix.closedir(dd);