This commit is contained in:
ValKmjolnir 2021-10-15 22:21:57 +08:00
parent 577546763f
commit e41f728589
4 changed files with 1645 additions and 1646 deletions

282
main.cpp
View File

@ -1,144 +1,140 @@
#include "nasal.h" #include "nasal.h"
#define VM_LEXINFO 1 #define VM_LEXINFO 1
#define VM_ASTINFO 2 #define VM_ASTINFO 2
#define VM_CODEINFO 4 #define VM_CODEINFO 4
#define VM_EXECTIME 8 #define VM_EXECTIME 8
#define VM_OPCALLNUM 16 #define VM_OPCALLNUM 16
#define VM_EXEC 32 #define VM_EXEC 32
void help_cmd() void help_cmd()
{ {
std::cout std::cout
#ifdef _WIN32 #ifdef _WIN32
<<"use command \'chcp 65001\' if want to use unicode.\n" <<"use command \'chcp 65001\' if want to use unicode.\n"
#endif #endif
<<"nasal <option>\n" <<"nasal <option>\n"
<<"option:\n" <<"option:\n"
<<" -h, --help | get help.\n" <<" -h, --help | get help.\n"
<<" -v, --version | get version of nasal interpreter.\n\n" <<" -v, --version | get version of nasal interpreter.\n\n"
<<"nasal <file>\n" <<"nasal <file>\n"
<<"file:\n" <<"file:\n"
<<" input file name to execute script file.\n\n" <<" input file name to execute script file.\n\n"
<<"nasal [options] <file>\n" <<"nasal [options] <file>\n"
<<"option:\n" <<"option:\n"
<<" -l, --lex | view token info.\n" <<" -l, --lex | view token info.\n"
<<" -a, --ast | view abstract syntax tree.\n" <<" -a, --ast | view abstract syntax tree.\n"
<<" -c, --code | view bytecode.\n" <<" -c, --code | view bytecode.\n"
<<" -t, --time | execute and get the running time.\n" <<" -t, --time | execute and get the running time.\n"
<<" -o, --opcnt | count operands while running.\n" <<" -o, --opcnt | count operands while running.\n"
<<"file:\n" <<"file:\n"
<<" input file name to execute script file.\n"; <<" input file name to execute script file.\n";
return; return;
} }
void logo() void logo()
{ {
std::cout std::cout
<<" __ _ \n" <<" __ _ \n"
<<" /\\ \\ \\__ _ ___ __ _| | \n" <<" /\\ \\ \\__ _ ___ __ _| | \n"
<<" / \\/ / _` / __|/ _` | | \n" <<" / \\/ / _` / __|/ _` | | \n"
<<" / /\\ / (_| \\__ \\ (_| | | \n" <<" / /\\ / (_| \\__ \\ (_| | | \n"
<<" \\_\\ \\/ \\__,_|___/\\__,_|_|\n" <<" \\_\\ \\/ \\__,_|___/\\__,_|_|\n"
<<"nasal interpreter ver 8.0\n" <<"nasal interpreter ver 8.0\n"
<<"thanks to : https://github.com/andyross/nasal\n" <<"thanks to : https://github.com/andyross/nasal\n"
<<"code repo : https://github.com/ValKmjolnir/Nasal-Interpreter\n" <<"code repo : https://github.com/ValKmjolnir/Nasal-Interpreter\n"
<<"code repo : https://gitee.com/valkmjolnir/Nasal-Interpreter\n" <<"code repo : https://gitee.com/valkmjolnir/Nasal-Interpreter\n"
<<"lang info : http://wiki.flightgear.org/Nasal_scripting_language\n" <<"lang info : http://wiki.flightgear.org/Nasal_scripting_language\n"
<<"input \"nasal -h\" to get help .\n"; <<"input \"nasal -h\" to get help .\n";
return; return;
} }
void die(const char* stage,const std::string& filename) void die(const char* stage,const std::string& filename)
{ {
std::cout<<"["<<stage<<"] in <"<<filename<<">: error(s) occurred,stop.\n"; std::cout<<"["<<stage<<"] in <"<<filename<<">: error(s) occurred,stop.\n";
std::exit(1); std::exit(1);
} }
void cmderr() void cmderr()
{ {
std::cout std::cout
<<"invalid argument(s).\n" <<"invalid argument(s).\n"
<<"use nasal -h to get help.\n"; <<"use nasal -h to get help.\n";
std::exit(1); std::exit(1);
} }
void execute(const std::string& file,const uint16_t cmd) void execute(const std::string& file,const uint16_t cmd)
{ {
nasal_lexer lexer; nasal_lexer lexer;
nasal_parse parse; nasal_parse parse;
nasal_import import; nasal_import import;
nasal_codegen codegen; nasal_codegen codegen;
nasal_vm vm; nasal_vm vm;
lexer.open(file); lexer.open(file);
lexer.scan(); lexer.scan();
if(lexer.err()) if(lexer.err())
die("lexer",file); die("lexer",file);
if(cmd&VM_LEXINFO) if(cmd&VM_LEXINFO)
lexer.print(); lexer.print();
parse.compile(lexer.get_tokens()); parse.compile(lexer.get_tokens());
if(parse.err()) if(parse.err())
die("parse",file); die("parse",file);
if(cmd&VM_ASTINFO) if(cmd&VM_ASTINFO)
parse.get_root().print(0); parse.ast().print(0);
// first used file is itself // first used file is itself
import.link(parse.get_root(),file); import.link(parse.ast(),file);
if(import.err()) if(import.err())
die("import",file); die("import",file);
codegen.compile(import.get_root(),import.get_file()); codegen.compile(import.ast(),import.get_file());
if(codegen.err()) if(codegen.err())
die("code",file); die("code",file);
if(cmd&VM_CODEINFO) if(cmd&VM_CODEINFO)
codegen.print(); codegen.print();
vm.init( vm.init(
codegen.get_strs(), codegen.get_strs(),
codegen.get_nums(), codegen.get_nums(),
import.get_file() import.get_file()
); );
if(cmd&VM_EXECTIME) if(cmd&VM_EXECTIME)
{ {
clock_t t=clock(); clock_t t=clock();
vm.run(codegen.get_code(),cmd&VM_OPCALLNUM); vm.run(codegen.get_code(),cmd&VM_OPCALLNUM);
std::cout<<"process exited after "<<((double)(clock()-t))/CLOCKS_PER_SEC<<"s.\n"; std::cout<<"process exited after "<<((double)(clock()-t))/CLOCKS_PER_SEC<<"s.\n";
} }
else if(cmd&VM_EXEC) else if(cmd&VM_EXEC)
vm.run(codegen.get_code(),cmd&VM_OPCALLNUM); vm.run(codegen.get_code(),cmd&VM_OPCALLNUM);
vm.clear(); vm.clear();
return; return;
} }
int main(int argc,const char* argv[]) int main(int argc,const char* argv[])
{ {
std::string filename; if(argc==2 && (!strcmp(argv[1],"-v") || !strcmp(argv[1],"--version")))
uint16_t cmd=0; logo();
if(argc==2 && (!strcmp(argv[1],"-v") || !strcmp(argv[1],"--version"))) else if(argc==2 && (!strcmp(argv[1],"-h") || !strcmp(argv[1],"--help")))
logo(); help_cmd();
else if(argc==2 && (!strcmp(argv[1],"-h") || !strcmp(argv[1],"--help"))) else if(argc==2 && argv[1][0]!='-')
help_cmd(); execute(argv[1],VM_EXEC);
else if(argc==2 && argv[1][0]!='-') else if(argc>=3)
cmd|=VM_EXEC; {
else if(argc>=3) uint16_t cmd=0;
{ for(int i=1;i<argc-1;++i)
for(int i=1;i<argc-1;++i) {
{ std::string s(argv[i]);
std::string s(argv[i]); if(s=="--lex" || s=="-l")
if(s=="--lex" || s=="-l") cmd|=VM_LEXINFO;
cmd|=VM_LEXINFO; else if(s=="--ast" || s=="-a")
else if(s=="--ast" || s=="-a") cmd|=VM_ASTINFO;
cmd|=VM_ASTINFO; else if(s=="--code" || s=="-c")
else if(s=="--code" || s=="-c") cmd|=VM_CODEINFO;
cmd|=VM_CODEINFO; else if(s=="--opcnt" || s=="-o")
else if(s=="--opcnt" || s=="-o") cmd|=VM_OPCALLNUM|VM_EXEC;
cmd|=VM_OPCALLNUM|VM_EXEC; else if(s=="--time" || s=="-t")
else if(s=="--time" || s=="-t") cmd|=VM_EXECTIME;
cmd|=VM_EXECTIME; else
else cmderr();
cmderr(); }
} execute(argv[argc-1],cmd);
} }
else else
cmderr(); cmderr();
if(argv[argc-1][0]=='-') return 0;
cmderr();
if(cmd)
execute(argv[argc-1],cmd);
return 0;
} }

View File

@ -1,129 +1,129 @@
#ifndef __NASAL_IMPORT_H__ #ifndef __NASAL_IMPORT_H__
#define __NASAL_IMPORT_H__ #define __NASAL_IMPORT_H__
class nasal_import class nasal_import
{ {
private: private:
uint32_t error; uint32_t error;
nasal_lexer import_lex; nasal_lexer import_lex;
nasal_parse import_par; nasal_parse import_par;
nasal_ast import_ast; nasal_ast import_ast;
std::vector<std::string> filename_table; std::vector<std::string> filename_table;
void die(const std::string&,const char*); void die(const std::string&,const char*);
bool check_import(const nasal_ast&); bool check_import(const nasal_ast&);
bool check_exist(const std::string&); bool check_exist(const std::string&);
void linker(nasal_ast&,nasal_ast&&); void linker(nasal_ast&,nasal_ast&&);
nasal_ast file_import(nasal_ast&); nasal_ast file_import(nasal_ast&);
nasal_ast load(nasal_ast&,uint16_t); nasal_ast load(nasal_ast&,uint16_t);
public: public:
uint32_t err(){return error;} uint32_t err(){return error;}
void link(nasal_ast&,const std::string&); void link(nasal_ast&,const std::string&);
const nasal_ast& get_root(){return import_ast;} const nasal_ast& ast(){return import_ast;}
const std::vector<std::string>& get_file(){return filename_table;} const std::vector<std::string>& get_file(){return filename_table;}
}; };
void nasal_import::die(const std::string& filename,const char* error_stage) void nasal_import::die(const std::string& file,const char* stage)
{ {
++error; ++error;
std::cout<<"[import] in <\""<<filename<<"\">: error(s) occurred in "<<error_stage<<".\n"; std::cout<<"[import] in <\""<<file<<"\">: error(s) occurred in "<<stage<<".\n";
} }
bool nasal_import::check_import(const nasal_ast& node) bool nasal_import::check_import(const nasal_ast& node)
{ {
/* /*
only this kind of node can be recognized as 'import': only this kind of node can be recognized as 'import':
call call
id:import id:import
call_func call_func
string:'filename' string:'filename'
*/ */
if(node.get_type()!=ast_call) if(node.get_type()!=ast_call)
return false; return false;
const std::vector<nasal_ast>& ref_vec=node.get_children(); const std::vector<nasal_ast>& ref_vec=node.get_children();
if(ref_vec.size()!=2) if(ref_vec.size()!=2)
return false; return false;
if(ref_vec[0].get_str()!="import") if(ref_vec[0].get_str()!="import")
return false; return false;
if(ref_vec[1].get_type()!=ast_callf) if(ref_vec[1].get_type()!=ast_callf)
return false; return false;
if(ref_vec[1].get_children().size()!=1 || ref_vec[1].get_children()[0].get_type()!=ast_str) if(ref_vec[1].get_children().size()!=1 || ref_vec[1].get_children()[0].get_type()!=ast_str)
return false; return false;
return true; return true;
} }
bool nasal_import::check_exist(const std::string& file) bool nasal_import::check_exist(const std::string& file)
{ {
// avoid importing the same file // avoid importing the same file
for(auto& fname:filename_table) for(auto& fname:filename_table)
if(file==fname) if(file==fname)
return true; return true;
filename_table.push_back(file); filename_table.push_back(file);
return false; return false;
} }
void nasal_import::linker(nasal_ast& root,nasal_ast&& add_root) void nasal_import::linker(nasal_ast& root,nasal_ast&& add_root)
{ {
// add children of add_root to the back of root // add children of add_root to the back of root
for(auto& i:add_root.get_children()) for(auto& i:add_root.get_children())
root.add_child(std::move(i)); root.add_child(std::move(i));
} }
nasal_ast nasal_import::file_import(nasal_ast& node) nasal_ast nasal_import::file_import(nasal_ast& node)
{ {
// initializing // initializing
nasal_ast tmp(0,ast_root); nasal_ast tmp(0,ast_root);
// get filename and set node to ast_null // get filename and set node to ast_null
std::string filename=node.get_children()[1].get_children()[0].get_str(); std::string filename=node.get_children()[1].get_children()[0].get_str();
node.clear(); node.clear();
// avoid infinite loading loop // avoid infinite loading loop
if(check_exist(filename)) if(check_exist(filename))
return tmp; return tmp;
// start importing... // start importing...
import_lex.open(filename); import_lex.open(filename);
import_lex.scan(); import_lex.scan();
if(import_lex.err()) if(import_lex.err())
{ {
die(filename,"lexer"); die(filename,"lexer");
return tmp; return tmp;
} }
import_par.compile(import_lex.get_tokens()); import_par.compile(import_lex.get_tokens());
if(import_par.err()) if(import_par.err())
{ {
die(filename,"parser"); die(filename,"parser");
return tmp; return tmp;
} }
tmp=std::move(import_par.get_root()); tmp=std::move(import_par.ast());
// check if tmp has 'import' // check if tmp has 'import'
return load(tmp,filename_table.size()-1); return load(tmp,filename_table.size()-1);
} }
nasal_ast nasal_import::load(nasal_ast& root,uint16_t fileindex) nasal_ast nasal_import::load(nasal_ast& root,uint16_t fileindex)
{ {
nasal_ast new_root(0,ast_root); nasal_ast new_root(0,ast_root);
for(auto& i:root.get_children()) for(auto& i:root.get_children())
if(check_import(i)) if(check_import(i))
linker(new_root,file_import(i)); linker(new_root,file_import(i));
// add root to the back of new_root // add root to the back of new_root
nasal_ast file_head(0,ast_file); nasal_ast file_head(0,ast_file);
file_head.set_num(fileindex); file_head.set_num(fileindex);
new_root.add_child(std::move(file_head)); new_root.add_child(std::move(file_head));
linker(new_root,std::move(root)); linker(new_root,std::move(root));
return new_root; return new_root;
} }
void nasal_import::link(nasal_ast& root,const std::string& self) void nasal_import::link(nasal_ast& root,const std::string& self)
{ {
// initializing // initializing
error=0; error=0;
filename_table.clear(); filename_table.clear();
filename_table.push_back(self); filename_table.push_back(self);
import_ast.clear(); import_ast.clear();
// scan root and import files,then generate a new ast and return to import_ast // scan root and import files,then generate a new ast and return to import_ast
// the main file's index is 0 // the main file's index is 0
import_ast=load(root,0); import_ast=load(root,0);
} }
#endif #endif

View File

@ -1,362 +1,365 @@
#ifndef __NASAL_LEXER_H__ #ifndef __NASAL_LEXER_H__
#define __NASAL_LEXER_H__ #define __NASAL_LEXER_H__
#define IS_IDENTIFIER(c) ((c=='_')||('a'<=c && c<='z')||('A'<=c&&c<='Z')) #define IS_IDENTIFIER(c) ((c=='_')||('a'<=c && c<='z')||('A'<=c&&c<='Z'))
#define IS_HEX_NUMBER(c) (('0'<=c&&c<='9')||('a'<=c&&c<='f')||('A'<=c && c<='F')) #define IS_HEX_NUMBER(c) (('0'<=c&&c<='9')||('a'<=c&&c<='f')||('A'<=c && c<='F'))
#define IS_OCT_NUMEBR(c) ('0'<=c&&c<='7') #define IS_OCT_NUMEBR(c) ('0'<=c&&c<='7')
#define IS_DIGIT(c) ('0'<=c&&c<='9') #define IS_DIGIT(c) ('0'<=c&&c<='9')
#define IS_STRING(c) (c=='\''||c=='\"'||c=='`') #define IS_STRING(c) (c=='\''||c=='\"'||c=='`')
// single operators have only one character // single operators have only one character
#define IS_SINGLE_OPERATOR(c) (c=='('||c==')'||c=='['||c==']'||c=='{'||c=='}'||c==','||c==';'||c=='|'||c==':'||\ #define IS_SINGLE_OPERATOR(c) (c=='('||c==')'||c=='['||c==']'||c=='{'||c=='}'||c==','||c==';'||c=='|'||c==':'||\
c=='?'||c=='`'||c=='&'||c=='@'||c=='%'||c=='$'||c=='^'||c=='\\') c=='?'||c=='`'||c=='&'||c=='@'||c=='%'||c=='$'||c=='^'||c=='\\')
// calculation operators may have two chars, for example: += -= *= /= ~= != == >= <= // calculation operators may have two chars, for example: += -= *= /= ~= != == >= <=
#define IS_CALC_OPERATOR(c) (c=='='||c=='+'||c=='-'||c=='*'||c=='!'||c=='/'||c=='<'||c=='>'||c=='~') #define IS_CALC_OPERATOR(c) (c=='='||c=='+'||c=='-'||c=='*'||c=='!'||c=='/'||c=='<'||c=='>'||c=='~')
#define IS_NOTE(c) (c=='#') #define IS_NOTE(c) (c=='#')
enum token_type enum token_type
{ {
tok_null=0, tok_null=0,
tok_num,tok_str,tok_id, tok_num,tok_str,tok_id,
tok_for,tok_forindex,tok_foreach,tok_while, tok_for,tok_forindex,tok_foreach,tok_while,
tok_var,tok_func,tok_break,tok_continue, tok_var,tok_func,tok_break,tok_continue,
tok_ret,tok_if,tok_elsif,tok_else,tok_nil, tok_ret,tok_if,tok_elsif,tok_else,tok_nil,
tok_lcurve,tok_rcurve, tok_lcurve,tok_rcurve,
tok_lbracket,tok_rbracket, tok_lbracket,tok_rbracket,
tok_lbrace,tok_rbrace, tok_lbrace,tok_rbrace,
tok_semi,tok_and,tok_or,tok_comma,tok_dot,tok_ellipsis,tok_quesmark, tok_semi,tok_and,tok_or,tok_comma,tok_dot,tok_ellipsis,tok_quesmark,
tok_colon,tok_add,tok_sub,tok_mult,tok_div,tok_link,tok_not, tok_colon,tok_add,tok_sub,tok_mult,tok_div,tok_link,tok_not,
tok_eq, tok_eq,
tok_addeq,tok_subeq,tok_multeq,tok_diveq,tok_lnkeq, tok_addeq,tok_subeq,tok_multeq,tok_diveq,tok_lnkeq,
tok_cmpeq,tok_neq,tok_less,tok_leq,tok_grt,tok_geq, tok_cmpeq,tok_neq,tok_less,tok_leq,tok_grt,tok_geq,
tok_eof tok_eof
}; };
struct struct
{ {
const char* str; const char* str;
const int tok_type; const int tok_type;
}token_table[]= }token_table[]=
{ {
{"for" ,tok_for }, {"for" ,tok_for },
{"forindex",tok_forindex }, {"forindex",tok_forindex },
{"foreach" ,tok_foreach }, {"foreach" ,tok_foreach },
{"while" ,tok_while }, {"while" ,tok_while },
{"var" ,tok_var }, {"var" ,tok_var },
{"func" ,tok_func }, {"func" ,tok_func },
{"break" ,tok_break }, {"break" ,tok_break },
{"continue",tok_continue }, {"continue",tok_continue },
{"return" ,tok_ret }, {"return" ,tok_ret },
{"if" ,tok_if }, {"if" ,tok_if },
{"elsif" ,tok_elsif }, {"elsif" ,tok_elsif },
{"else" ,tok_else }, {"else" ,tok_else },
{"nil" ,tok_nil }, {"nil" ,tok_nil },
{"(" ,tok_lcurve }, {"(" ,tok_lcurve },
{")" ,tok_rcurve }, {")" ,tok_rcurve },
{"[" ,tok_lbracket }, {"[" ,tok_lbracket },
{"]" ,tok_rbracket }, {"]" ,tok_rbracket },
{"{" ,tok_lbrace }, {"{" ,tok_lbrace },
{"}" ,tok_rbrace }, {"}" ,tok_rbrace },
{";" ,tok_semi }, {";" ,tok_semi },
{"and" ,tok_and }, {"and" ,tok_and },
{"or" ,tok_or }, {"or" ,tok_or },
{"," ,tok_comma }, {"," ,tok_comma },
{"." ,tok_dot }, {"." ,tok_dot },
{"..." ,tok_ellipsis }, {"..." ,tok_ellipsis },
{"?" ,tok_quesmark }, {"?" ,tok_quesmark },
{":" ,tok_colon }, {":" ,tok_colon },
{"+" ,tok_add }, {"+" ,tok_add },
{"-" ,tok_sub }, {"-" ,tok_sub },
{"*" ,tok_mult }, {"*" ,tok_mult },
{"/" ,tok_div }, {"/" ,tok_div },
{"~" ,tok_link }, {"~" ,tok_link },
{"!" ,tok_not }, {"!" ,tok_not },
{"=" ,tok_eq }, {"=" ,tok_eq },
{"+=" ,tok_addeq }, {"+=" ,tok_addeq },
{"-=" ,tok_subeq }, {"-=" ,tok_subeq },
{"*=" ,tok_multeq }, {"*=" ,tok_multeq },
{"/=" ,tok_diveq }, {"/=" ,tok_diveq },
{"~=" ,tok_lnkeq }, {"~=" ,tok_lnkeq },
{"==" ,tok_cmpeq }, {"==" ,tok_cmpeq },
{"!=" ,tok_neq }, {"!=" ,tok_neq },
{"<" ,tok_less }, {"<" ,tok_less },
{"<=" ,tok_leq }, {"<=" ,tok_leq },
{">" ,tok_grt }, {">" ,tok_grt },
{">=" ,tok_geq }, {">=" ,tok_geq },
{nullptr ,-1 } {nullptr ,-1 }
}; };
struct token struct token
{ {
uint32_t line; uint32_t line;
uint32_t type; uint32_t type;
std::string str; std::string str;
token(uint32_t l=0,uint32_t t=tok_null,std::string s=""){line=l;type=t;str=s;} token(uint32_t l=0,uint32_t t=tok_null,std::string s=""){line=l;type=t;str=s;}
}; };
class nasal_lexer class nasal_lexer
{ {
private: private:
uint32_t error; uint32_t error;
uint32_t line; uint32_t line;
uint32_t ptr; uint32_t ptr;
size_t res_size; size_t res_size;
std::string line_code; std::string line_code;
std::string res; std::string res;
std::vector<token> token_list; std::vector<token> token_list;
uint32_t get_type(const std::string&); uint32_t get_type(const std::string&);
void die(const char*); void die(const char*);
std::string id_gen(); std::string id_gen();
std::string num_gen(); std::string num_gen();
std::string str_gen(); std::string str_gen();
public: public:
void open(const std::string&); void open(const std::string&);
void scan(); void scan();
void print(); void print();
uint32_t err(){return error;} uint32_t err(){return error;}
const std::vector<token>& get_tokens(){return token_list;} const std::vector<token>& get_tokens(){return token_list;}
}; };
void nasal_lexer::open(const std::string& filename) void nasal_lexer::open(const std::string& filename)
{ {
error=0; error=0;
res.clear(); res.clear();
std::ifstream fin(filename,std::ios::binary); std::ifstream fin(filename,std::ios::binary);
if(fin.fail()) if(fin.fail())
{ {
++error; ++error;
std::cout<<"[lexer] cannot open file <"<<filename<<">.\n"; std::cout<<"[lexer] cannot open file <"<<filename<<">.\n";
return; return;
} }
while(!fin.eof()) std::stringstream ss;
{ ss<<fin.rdbuf();
char c=fin.get(); res=ss.str();
if(fin.eof()) // while(!fin.eof())
break; // {
res+=c; // char c=fin.get();
} // if(fin.eof())
return; // break;
} // res+=c;
// }
uint32_t nasal_lexer::get_type(const std::string& tk_str) return;
{ }
for(int i=0;token_table[i].str;++i)
if(tk_str==token_table[i].str) uint32_t nasal_lexer::get_type(const std::string& tk_str)
return token_table[i].tok_type; {
return tok_null; for(int i=0;token_table[i].str;++i)
} if(tk_str==token_table[i].str)
return token_table[i].tok_type;
void nasal_lexer::die(const char* error_info) return tok_null;
{ }
++error;
std::cout<<"[lexer] line "<<line<<" column "<<line_code.length()<<": \n"<<line_code<<"\n"; void nasal_lexer::die(const char* error_info)
for(auto i:line_code) {
std::cout<<(i=='\t'?'\t':' '); ++error;
std::cout<<"^"<<error_info<<'\n'; std::cout<<"[lexer] line "<<line<<" column "<<line_code.length()<<": \n"<<line_code<<"\n";
return; for(auto i:line_code)
} std::cout<<(i=='\t'?'\t':' ');
std::cout<<"^"<<error_info<<'\n';
std::string nasal_lexer::id_gen() return;
{ }
std::string token_str="";
while(ptr<res_size && (IS_IDENTIFIER(res[ptr])||IS_DIGIT(res[ptr]))) std::string nasal_lexer::id_gen()
token_str+=res[ptr++]; {
line_code+=token_str; std::string token_str="";
return token_str; while(ptr<res_size && (IS_IDENTIFIER(res[ptr])||IS_DIGIT(res[ptr])))
// after running this process, ptr will point to the next token's beginning character token_str+=res[ptr++];
} line_code+=token_str;
return token_str;
std::string nasal_lexer::num_gen() // after running this process, ptr will point to the next token's beginning character
{ }
// generate hex number
if(ptr+1<res_size && res[ptr]=='0' && res[ptr+1]=='x') std::string nasal_lexer::num_gen()
{ {
std::string token_str="0x"; // generate hex number
ptr+=2; if(ptr+1<res_size && res[ptr]=='0' && res[ptr+1]=='x')
while(ptr<res_size && IS_HEX_NUMBER(res[ptr])) {
token_str+=res[ptr++]; std::string token_str="0x";
line_code+=token_str; ptr+=2;
if(token_str.length()<3)// "0x" while(ptr<res_size && IS_HEX_NUMBER(res[ptr]))
die("incorrect number."); token_str+=res[ptr++];
return token_str; line_code+=token_str;
} if(token_str.length()<3)// "0x"
// generate oct number die("incorrect number.");
else if(ptr+1<res_size && res[ptr]=='0' && res[ptr+1]=='o') return token_str;
{ }
std::string token_str="0o"; // generate oct number
ptr+=2; else if(ptr+1<res_size && res[ptr]=='0' && res[ptr+1]=='o')
while(ptr<res_size && IS_OCT_NUMEBR(res[ptr])) {
token_str+=res[ptr++]; std::string token_str="0o";
line_code+=token_str; ptr+=2;
if(token_str.length()<3)// "0o" while(ptr<res_size && IS_OCT_NUMEBR(res[ptr]))
die("incorrect number."); token_str+=res[ptr++];
return token_str; line_code+=token_str;
} if(token_str.length()<3)// "0o"
// generate dec number die("incorrect number.");
// dec number -> [0~9][0~9]*(.[0~9]*)(e|E(+|-)0|[1~9][0~9]*) return token_str;
std::string token_str=""; }
while(ptr<res_size && IS_DIGIT(res[ptr])) // generate dec number
token_str+=res[ptr++]; // dec number -> [0~9][0~9]*(.[0~9]*)(e|E(+|-)0|[1~9][0~9]*)
if(ptr<res_size && res[ptr]=='.') std::string token_str="";
{ while(ptr<res_size && IS_DIGIT(res[ptr]))
token_str+=res[ptr++]; token_str+=res[ptr++];
while(ptr<res_size && IS_DIGIT(res[ptr])) if(ptr<res_size && res[ptr]=='.')
token_str+=res[ptr++]; {
// "xxxx." is not a correct number token_str+=res[ptr++];
if(token_str.back()=='.') while(ptr<res_size && IS_DIGIT(res[ptr]))
{ token_str+=res[ptr++];
line_code+=token_str; // "xxxx." is not a correct number
die("incorrect number."); if(token_str.back()=='.')
return "0"; {
} line_code+=token_str;
} die("incorrect number.");
if(ptr<res_size && (res[ptr]=='e' || res[ptr]=='E')) return "0";
{ }
token_str+=res[ptr++]; }
if(ptr<res_size && (res[ptr]=='-' || res[ptr]=='+')) if(ptr<res_size && (res[ptr]=='e' || res[ptr]=='E'))
token_str+=res[ptr++]; {
while(ptr<res_size && IS_DIGIT(res[ptr])) token_str+=res[ptr++];
token_str+=res[ptr++]; if(ptr<res_size && (res[ptr]=='-' || res[ptr]=='+'))
// "xxxe(-|+)" is not a correct number token_str+=res[ptr++];
if(token_str.back()=='e' || token_str.back()=='E' || token_str.back()=='-' || token_str.back()=='+') while(ptr<res_size && IS_DIGIT(res[ptr]))
{ token_str+=res[ptr++];
line_code+=token_str; // "xxxe(-|+)" is not a correct number
die("incorrect number."); if(token_str.back()=='e' || token_str.back()=='E' || token_str.back()=='-' || token_str.back()=='+')
return "0"; {
} line_code+=token_str;
} die("incorrect number.");
line_code+=token_str; return "0";
return token_str; }
} }
line_code+=token_str;
std::string nasal_lexer::str_gen() return token_str;
{ }
std::string token_str="";
char str_begin=res[ptr]; std::string nasal_lexer::str_gen()
line_code+=str_begin; {
while(++ptr<res_size && res[ptr]!=str_begin) std::string token_str="";
{ char str_begin=res[ptr];
line_code+=res[ptr]; line_code+=str_begin;
if(res[ptr]=='\n') while(++ptr<res_size && res[ptr]!=str_begin)
{ {
line_code=""; line_code+=res[ptr];
++line; if(res[ptr]=='\n')
} {
if(res[ptr]=='\\' && ptr+1<res_size) line_code="";
{ ++line;
line_code+=res[++ptr]; }
switch(res[ptr]) if(res[ptr]=='\\' && ptr+1<res_size)
{ {
case 'a': token_str.push_back('\a');break; line_code+=res[++ptr];
case 'b': token_str.push_back('\b');break; switch(res[ptr])
case 'f': token_str.push_back('\f');break; {
case 'n': token_str.push_back('\n');break; case 'a': token_str.push_back('\a');break;
case 'r': token_str.push_back('\r');break; case 'b': token_str.push_back('\b');break;
case 't': token_str.push_back('\t');break; case 'f': token_str.push_back('\f');break;
case 'v': token_str.push_back('\v');break; case 'n': token_str.push_back('\n');break;
case '?': token_str.push_back('\?');break; case 'r': token_str.push_back('\r');break;
case '0': token_str.push_back('\0');break; case 't': token_str.push_back('\t');break;
case '\\':token_str.push_back('\\');break; case 'v': token_str.push_back('\v');break;
case '\'':token_str.push_back('\'');break; case '?': token_str.push_back('\?');break;
case '\"':token_str.push_back('\"');break; case '0': token_str.push_back('\0');break;
default: token_str.push_back(res[ptr]);break; case '\\':token_str.push_back('\\');break;
} case '\'':token_str.push_back('\'');break;
continue; case '\"':token_str.push_back('\"');break;
} default: token_str.push_back(res[ptr]);break;
token_str+=res[ptr]; }
} continue;
// check if this string ends with a " or ' }
if(ptr++>=res_size) token_str+=res[ptr];
die("get EOF when generating string."); }
if(str_begin=='`' && token_str.length()!=1) // check if this string ends with a " or '
die("\'`\' is used for string that includes one character."); if(ptr++>=res_size)
return token_str; die("get EOF when generating string.");
} if(str_begin=='`' && token_str.length()!=1)
die("\'`\' is used for string that includes one character.");
void nasal_lexer::scan() return token_str;
{ }
token_list.clear();
line=1; void nasal_lexer::scan()
ptr=0; {
line_code=""; token_list.clear();
res_size=res.size(); line=1;
ptr=0;
std::string token_str; line_code="";
while(ptr<res_size) res_size=res.size();
{
while(ptr<res_size && (res[ptr]==' ' || res[ptr]=='\n' || res[ptr]=='\t' || res[ptr]=='\r' || res[ptr]<0)) std::string token_str;
{ while(ptr<res_size)
// these characters will be ignored, and '\n' will cause ++line {
line_code+=res[ptr]; while(ptr<res_size && (res[ptr]==' ' || res[ptr]=='\n' || res[ptr]=='\t' || res[ptr]=='\r' || res[ptr]<0))
if(res[ptr++]=='\n') {
{ // these characters will be ignored, and '\n' will cause ++line
++line; line_code+=res[ptr];
line_code=""; if(res[ptr++]=='\n')
} {
} ++line;
if(ptr>=res_size) break; line_code="";
if(IS_IDENTIFIER(res[ptr])) }
{ }
token_str=id_gen(); if(ptr>=res_size) break;
token_list.push_back({line,get_type(token_str),token_str}); if(IS_IDENTIFIER(res[ptr]))
if(!token_list.back().type) {
token_list.back().type=tok_id; token_str=id_gen();
} token_list.push_back({line,get_type(token_str),token_str});
else if(IS_DIGIT(res[ptr])) if(!token_list.back().type)
token_list.push_back({line,tok_num,num_gen()}); token_list.back().type=tok_id;
else if(IS_STRING(res[ptr])) }
token_list.push_back({line,tok_str,str_gen()}); else if(IS_DIGIT(res[ptr]))
else if(IS_SINGLE_OPERATOR(res[ptr])) token_list.push_back({line,tok_num,num_gen()});
{ else if(IS_STRING(res[ptr]))
token_str=res[ptr]; token_list.push_back({line,tok_str,str_gen()});
line_code+=res[ptr]; else if(IS_SINGLE_OPERATOR(res[ptr]))
uint32_t type=get_type(token_str); {
if(!type) token_str=res[ptr];
die("incorrect operator."); line_code+=res[ptr];
token_list.push_back({line,type,token_str}); uint32_t type=get_type(token_str);
++ptr; if(!type)
} die("incorrect operator.");
else if(res[ptr]=='.') token_list.push_back({line,type,token_str});
{ ++ptr;
if(ptr+2<res_size && res[ptr+1]=='.' && res[ptr+2]=='.') }
{ else if(res[ptr]=='.')
token_str="..."; {
ptr+=3; if(ptr+2<res_size && res[ptr+1]=='.' && res[ptr+2]=='.')
} {
else token_str="...";
{ ptr+=3;
token_str="."; }
++ptr; else
} {
line_code+=token_str; token_str=".";
token_list.push_back({line,get_type(token_str),token_str}); ++ptr;
} }
else if(IS_CALC_OPERATOR(res[ptr])) line_code+=token_str;
{ token_list.push_back({line,get_type(token_str),token_str});
// get calculation operator }
token_str=res[ptr++]; else if(IS_CALC_OPERATOR(res[ptr]))
if(ptr<res_size && res[ptr]=='=') {
token_str+=res[ptr++]; // get calculation operator
line_code+=token_str; token_str=res[ptr++];
token_list.push_back({line,get_type(token_str),token_str}); if(ptr<res_size && res[ptr]=='=')
} token_str+=res[ptr++];
else if(IS_NOTE(res[ptr]))// avoid note, after this process ptr will point to a '\n', so next loop line counter+1 line_code+=token_str;
while(++ptr<res_size && res[ptr]!='\n'); token_list.push_back({line,get_type(token_str),token_str});
else }
{ else if(IS_NOTE(res[ptr]))// avoid note, after this process ptr will point to a '\n', so next loop line counter+1
line_code+=res[ptr++]; while(++ptr<res_size && res[ptr]!='\n');
die("unknown character."); else
} {
} line_code+=res[ptr++];
token_list.push_back({line,tok_eof,""}); die("unknown character.");
res.clear(); }
return; }
} token_list.push_back({line,tok_eof,""});
res.clear();
void nasal_lexer::print() return;
{ }
for(auto& tok:token_list)
std::cout<<"("<<tok.line<<" | "<<tok.str<<")\n"; void nasal_lexer::print()
return; {
} for(auto& tok:token_list)
std::cout<<"("<<tok.line<<" | "<<tok.str<<")\n";
return;
}
#endif #endif

File diff suppressed because it is too large Load Diff