From b8728dd725a8821e7eea99ba188651a413321846 Mon Sep 17 00:00:00 2001 From: Valk Richard Li <48872266+ValKmjolnir@users.noreply.github.com> Date: Tue, 7 Apr 2020 01:25:56 -0700 Subject: [PATCH] update --- version2.0/lib/system.nas | 9 + version2.0/nasal_lexer.h | 923 ++++++++++++++++++------------------- version2.0/nasal_runtime.h | 4 +- 3 files changed, 463 insertions(+), 473 deletions(-) create mode 100644 version2.0/lib/system.nas diff --git a/version2.0/lib/system.nas b/version2.0/lib/system.nas new file mode 100644 index 0000000..84d7b16 --- /dev/null +++ b/version2.0/lib/system.nas @@ -0,0 +1,9 @@ +var system= +{ + # print the type of thing on the screen + type:func(thing) + { + nasal_call_inline_scalar_type(thing); + return; + } +}; \ No newline at end of file diff --git a/version2.0/nasal_lexer.h b/version2.0/nasal_lexer.h index 6eed1c2..5a90f28 100644 --- a/version2.0/nasal_lexer.h +++ b/version2.0/nasal_lexer.h @@ -24,17 +24,20 @@ 2.71828 (float) 0xdeadbeef (hex) or 0xDEADBEEF (hex) 0o170001 (oct) + 1e-1234 (dec) or 10E2 (dec) __token_operator: - ! + - * / ~ - = += -= *= /= ~= - == != > >= < <= + ! + - * / ~ + = += -= *= /= ~= + == != > >= < <= ('and' 'or' are operators too but they are recognized as operator in generate_detail_token()) () [] {} ; , . : ? others: __unknown_operator */ /* filenames of lib files */ -const std::string lib_filename[10]= +#ifndef LIB_FILE_NUM +#define LIB_FILE_NUM 11 +const std::string lib_filename[LIB_FILE_NUM]= { "lib/base.nas", "lib/bits.nas", @@ -43,12 +46,16 @@ const std::string lib_filename[10]= "lib/readline.nas", "lib/regex.nas", "lib/sqlite.nas", + "lib/system.nas", "lib/thread.nas", "lib/unix.nas", "lib/utf8.nas" }; +#endif /* reserve words */ -std::string reserve_word[15]= +#ifndef RESERVE_WORD_NUM +#define RESERVE_WORD_NUM 15 +std::string reserve_word[RESERVE_WORD_NUM]= { "for","foreach","forindex","while", "var","func","break","continue","return", @@ -57,106 +64,32 @@ std::string reserve_word[15]= /* check if an identifier is a reserve word */ int is_reserve_word(std::string str) { - for(int i=0;i<15;++i) + for(int i=0;i resource; - public: - /* + private: + std::vector source_code; + public: + /* delete_all_source: clear all the source codes in std::list resource input_file : input source codes by filenames load_lib_file : input lib source codes - get_source : get the std::list resource + get_source : get the std::vector source_code print_resource : print source codes */ - resource_file() - { - resource.clear(); - return; - } - ~resource_file() - { - resource.clear(); - return; - } - void delete_all_source() - { - resource.clear(); - return; - } - void input_file(std::string filename) - { - char c=0; - std::ifstream fin(filename,std::ios::binary); - if(fin.fail()) - { - std::cout<<">> [Resource] cannot open file \'"<> [Resource] fatal error: lack \'"<& get_source() - { - return resource; - } - void print_resource() - { - int line=1; - std::cout<::iterator i=resource.begin();i!=resource.end();++i) - { - if(32<=*i) - std::cout<<*i; - else - std::cout<<" "; - if(*i=='\n') - { - ++line; - std::cout<& get_source(); + void print_resource(); }; + /* struct token: mainly used in nasal_lexer and nasal_parse*/ struct token { @@ -174,393 +107,441 @@ struct token class nasal_lexer { - private: - std::list token_list; - std::list detail_token_list; - int error; - // change utf8 codes into '?' - // this function will be deleted if there is a way to print utf8 codes out correctly - std::string utf8_clear(std::string tmp) - { - /* - 0xxx xxxx 0x0 1 byte - 110x xxxx 0xc0 2 byte - 1110 xxxx 0xe0 3 byte - 1111 0xxx 0xf0 4 byte - 1111 10xx 0xf8 5 byte - 1111 110x 0xfc 6 byte - bytes after it is: - 10xx xxxx 0x80 - - so utf-8 format is: - 0xxxxxxx - 110xxxxx 10xxxxxx - 1110xxxx 10xxxxxx 10xxxxxx - 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx - 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx - 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx - */ - unsigned char utf8head[6]={0x0,0xc0,0xe0,0xf0,0xf8,0xfc}; - std::string ret=""; - for(int i=0;i=0) - ret+=tmp[i]; - else - { - int utf8byte=0; - for(int j=5;j>=0;--j) - if((tmp[i] & utf8head[j])==utf8head[j]) - { - utf8byte=j; - break; - } - for(int j=0;j token_list; + std::list detail_token_list; + int error; + std::string identifier_gen(std::vector&,int&,int&); + std::string number_gen (std::vector&,int&,int&); + std::string string_gen (std::vector&,int&,int&); public: /* + identifier_gen : scan the source codes and generate identifiers + number_gen : scan the source codes and generate numbers + string_gen : scan the source codes and generate strings print_token_list : print generated token list scanner : scan the source codes and generate tokens generate_detail_token: recognize and change token types to detailed types that can be processed by nasal_parse get_error : get the number of errors that occurred when generating tokens get_detail_token : output the detailed tokens,must be used after generate_detail_token() */ - nasal_lexer() - { - token_list.clear(); - detail_token_list.clear(); - error=0; - return; - } - ~nasal_lexer() - { - token_list.clear(); - detail_token_list.clear(); - return; - } - void delete_all_tokens() - { - token_list.clear(); - detail_token_list.clear(); - error=0; - return; - } - void print_token_list() - { - for(std::list::iterator i=token_list.begin();i!=token_list.end();++i) - { - std::cout<<"line "<line<<" ( "; - print_lexer_token(i->type); - std::cout<<" | "<str<<" )"<& res) - { - token_list.clear(); - detail_token_list.clear(); - error=0; - - int line=1; - std::string token_str; - std::list::iterator ptr=res.begin(); - while(ptr!=res.end()) - { - while(ptr!=res.end() && (*ptr==' ' || *ptr=='\n' || *ptr=='\t' || *ptr=='\r' || *ptr<0 || *ptr>127)) - { - if(*ptr=='\n') - ++line; - ++ptr; - } - if(ptr==res.end()) - break; - - if(*ptr=='_' || ('a'<=*ptr && *ptr<='z') || ('A'<=*ptr && *ptr<='Z')) - { - // get identifier or reserve word - token_str=""; - while(*ptr=='_' || ('a'<=*ptr && *ptr<='z') || ('A'<=*ptr && *ptr<='Z') || ('0'<=*ptr && *ptr<='9')) - { - token_str+=*ptr; - ++ptr; - if(ptr==res.end()) - break; - } - // check dynamic identifier "..." - if(*ptr=='.') - { - ++ptr; - if(ptr!=res.end() && *ptr=='.') - { - ++ptr; - if(ptr!=res.end() && *ptr=='.') - { - token_str+="..."; - ++ptr; - } - else - { - --ptr; - --ptr; - } - } - else - --ptr; - } - token new_token; - new_token.line=line; - new_token.type=is_reserve_word(token_str); - new_token.str=token_str; - token_list.push_back(new_token); - if(ptr==res.end()) - break; - } - else if('0'<=*ptr && *ptr<='9') - { - token_str=""; - while(('0'<=*ptr && *ptr<='9') || ('a'<=*ptr && *ptr<='f') || ('A'<=*ptr && *ptr<='F') || *ptr=='.' || *ptr=='x' || *ptr=='o') - { - token_str+=*ptr; - ++ptr; - if(ptr==res.end()) - break; - } - if(!check_numerable_string(token_str)) - { - ++error; - std::cout<<">> [Lexer] line "<> [Lexer] line "<' || *ptr=='~') - { - // get calculation operator - token_str=""; - token_str+=*ptr; - ++ptr; - if(ptr!=res.end() && *ptr=='=') - { - token_str+=*ptr; - ++ptr; - } - token new_token; - new_token.line=line; - new_token.type=__token_operator; - new_token.str=token_str; - token_list.push_back(new_token); - if(ptr==res.end()) - break; - } - else if(*ptr=='#') - { - // avoid note - while(ptr!=res.end() && *ptr!='\n') - ++ptr; - if(ptr==res.end()) - break; - } - else - { - ++error; - std::cout<<">> [Lexer] line "<> [Lexer] complete scanning. "<::iterator i=token_list.begin();i!=token_list.end();++i) - { - if(i->type==__token_number) - { - detail_token.line=i->line; - detail_token.str =i->str; - detail_token.type=__number; - detail_token_list.push_back(detail_token); - } - else if(i->type==__token_string) - { - detail_token.line=i->line; - detail_token.str =i->str; - detail_token.type=__string; - detail_token_list.push_back(detail_token); - } - else if(i->type==__token_reserve_word) - { - detail_token.line=i->line; - detail_token.str =""; - if(i->str=="for") detail_token.type=__for; - else if(i->str=="foreach") detail_token.type=__foreach; - else if(i->str=="forindex") detail_token.type=__forindex; - else if(i->str=="while") detail_token.type=__while; - else if(i->str=="var") detail_token.type=__var; - else if(i->str=="func") detail_token.type=__func; - else if(i->str=="break") detail_token.type=__break; - else if(i->str=="continue") detail_token.type=__continue; - else if(i->str=="return") detail_token.type=__return; - else if(i->str=="if") detail_token.type=__if; - else if(i->str=="else") detail_token.type=__else; - else if(i->str=="elsif") detail_token.type=__elsif; - else if(i->str=="nil") detail_token.type=__nil; - else if(i->str=="and") detail_token.type=__and_operator; - else if(i->str=="or") detail_token.type=__or_operator; - detail_token_list.push_back(detail_token); - } - else if(i->type==__token_identifier) - { - detail_token.line=i->line; - detail_token.str =i->str; - if(i->str.length()<=3) - detail_token.type=__id; - else - { - std::string tempstr=i->str; - int strback=tempstr.length()-1; - if(tempstr.length()>3 &&tempstr[strback]=='.' && tempstr[strback-1]=='.' && tempstr[strback-2]=='.') - { - detail_token.str=""; - for(int j=0;jtype==__token_operator) - { - detail_token.line=i->line; - detail_token.str =""; - if(i->str=="+") detail_token.type=__add_operator; - else if(i->str=="-") detail_token.type=__sub_operator; - else if(i->str=="*") detail_token.type=__mul_operator; - else if(i->str=="/") detail_token.type=__div_operator; - else if(i->str=="~") detail_token.type=__link_operator; - else if(i->str=="+=") detail_token.type=__add_equal; - else if(i->str=="-=") detail_token.type=__sub_equal; - else if(i->str=="*=") detail_token.type=__mul_equal; - else if(i->str=="/=") detail_token.type=__div_equal; - else if(i->str=="~=") detail_token.type=__link_equal; - else if(i->str=="=") detail_token.type=__equal; - else if(i->str=="==") detail_token.type=__cmp_equal; - else if(i->str=="!=") detail_token.type=__cmp_not_equal; - else if(i->str=="<") detail_token.type=__cmp_less; - else if(i->str=="<=") detail_token.type=__cmp_less_or_equal; - else if(i->str==">") detail_token.type=__cmp_more; - else if(i->str==">=") detail_token.type=__cmp_more_or_equal; - else if(i->str==";") detail_token.type=__semi; - else if(i->str==".") detail_token.type=__dot; - else if(i->str==":") detail_token.type=__colon; - else if(i->str==",") detail_token.type=__comma; - else if(i->str=="?") detail_token.type=__ques_mark; - else if(i->str=="!") detail_token.type=__nor_operator; - else if(i->str=="[") detail_token.type=__left_bracket; - else if(i->str=="]") detail_token.type=__right_bracket; - else if(i->str=="(") detail_token.type=__left_curve; - else if(i->str==")") detail_token.type=__right_curve; - else if(i->str=="{") detail_token.type=__left_brace; - else if(i->str=="}") detail_token.type=__right_brace; - else - { - ++error; - std::cout<<">> [Lexer] line "<str<<"\'."<> [Lexer] complete generating. "<& get_detail_token_list() - { - return detail_token_list; - } + nasal_lexer(); + ~nasal_lexer(); + void delete_all_tokens(); + void print_token_list(); + void scanner(std::vector&); + void generate_detail_token(); + int get_error(); + std::list& get_detail_token_list(); }; + +void resource_file::delete_all_source() +{ + std::vector tmp; + source_code.clear(); + source_code.swap(tmp); + return; +} +void resource_file::input_file(std::string filename) +{ + char c=0; + std::ifstream fin(filename,std::ios::binary); + if(fin.fail()) + { + std::cout<<">> [Resource] cannot open file \'"<> [Resource] fatal error: lack \'"<& resource_file::get_source() +{ + return source_code; +} +void resource_file::print_resource() +{ + int line=1; + std::cout<& res,int& ptr,int& line) +{ + std::string token_str=""; + while(res[ptr]=='_' || ('a'<=res[ptr] && res[ptr]<='z') || ('A'<=res[ptr] && res[ptr]<='Z') || ('0'<=res[ptr] && res[ptr]<='9')) + { + token_str+=res[ptr]; + ++ptr; + if(ptr>=res.size()) + break; + } + // check dynamic identifier "..." + if(res[ptr]=='.') + { + ++ptr; + if(ptr& res,int& ptr,int& line) +{ + std::string token_str=""; + while(('0'<=res[ptr] && res[ptr]<='9') || + ('a'<=res[ptr] && res[ptr]<='f') || + ('A'<=res[ptr] && res[ptr]<='F') || + res[ptr]=='.' || res[ptr]=='x' || res[ptr]=='o' || + res[ptr]=='e' || res[ptr]=='E' || res[ptr]=='-') + { + token_str+=res[ptr]; + ++ptr; + if(ptr>=res.size()) + break; + } + if(!check_numerable_string(token_str)) + { + ++error; + std::cout<<">> [Lexer] line "<& res,int& ptr,int& line) +{ + std::string token_str=""; + char str_begin=res[ptr]; + ++ptr; + if(ptr>=res.size()) + return token_str; + while(ptr=res.size()) + break; + } + // check if this string ends with a " or ' + if(ptr>=res.size()) + { + ++error; + std::cout<<">> [Lexer] line "<::iterator i=token_list.begin();i!=token_list.end();++i) + { + std::cout<<"line "<line<<" ( "; + print_lexer_token(i->type); + std::cout<<" | "<str<<" )"<& res) +{ + token_list.clear(); + detail_token_list.clear(); + error=0; + + int line=1; + std::string token_str; + int ptr=0; + while(ptr=res.size()) + break; + if(res[ptr]=='_' || ('a'<=res[ptr] && res[ptr]<='z') || ('A'<=res[ptr] && res[ptr]<='Z')) + { + token_str=identifier_gen(res,ptr,line); + token new_token; + new_token.line=line; + new_token.type=is_reserve_word(token_str); + new_token.str=token_str; + token_list.push_back(new_token); + } + else if('0'<=res[ptr] && res[ptr]<='9') + { + token_str=number_gen(res,ptr,line); + token new_token; + new_token.line=line; + new_token.type=__token_number; + new_token.str=token_str; + token_list.push_back(new_token); + } + else if(res[ptr]=='\'' || res[ptr]=='\"') + { + token_str=string_gen(res,ptr,line); + token new_token; + new_token.line=line; + new_token.type=__token_string; + new_token.str=token_str; + token_list.push_back(new_token); + } + else if(res[ptr]=='(' || res[ptr]==')' || res[ptr]=='[' || res[ptr]==']' || res[ptr]=='{' || + res[ptr]=='}' || res[ptr]==',' || res[ptr]==';' || res[ptr]=='|' || res[ptr]==':' || + res[ptr]=='?' || res[ptr]=='.' || res[ptr]=='`' || res[ptr]=='&' || res[ptr]=='@' || + res[ptr]=='%' || res[ptr]=='$' || res[ptr]=='^' || res[ptr]=='\\') + { + token_str=""; + token_str+=res[ptr]; + token new_token; + new_token.line=line; + new_token.type=__token_operator; + new_token.str=token_str; + token_list.push_back(new_token); + ++ptr; + } + else if(res[ptr]=='=' || res[ptr]=='+' || res[ptr]=='-' || res[ptr]=='*' || res[ptr]=='!' || + res[ptr]=='/' || res[ptr]=='<' || res[ptr]=='>' || res[ptr]=='~') + { + // get calculation operator + token_str=""; + token_str+=res[ptr]; + ++ptr; + if(ptr> [Lexer] line "<> [Lexer] complete scanning. "<::iterator i=token_list.begin();i!=token_list.end();++i) + { + if(i->type==__token_number) + { + detail_token.line=i->line; + detail_token.str =i->str; + detail_token.type=__number; + detail_token_list.push_back(detail_token); + } + else if(i->type==__token_string) + { + detail_token.line=i->line; + detail_token.str =i->str; + detail_token.type=__string; + detail_token_list.push_back(detail_token); + } + else if(i->type==__token_reserve_word) + { + detail_token.line=i->line; + detail_token.str =""; + if (i->str=="for") detail_token.type=__for; + else if(i->str=="foreach") detail_token.type=__foreach; + else if(i->str=="forindex") detail_token.type=__forindex; + else if(i->str=="while") detail_token.type=__while; + else if(i->str=="var") detail_token.type=__var; + else if(i->str=="func") detail_token.type=__func; + else if(i->str=="break") detail_token.type=__break; + else if(i->str=="continue") detail_token.type=__continue; + else if(i->str=="return") detail_token.type=__return; + else if(i->str=="if") detail_token.type=__if; + else if(i->str=="else") detail_token.type=__else; + else if(i->str=="elsif") detail_token.type=__elsif; + else if(i->str=="nil") detail_token.type=__nil; + else if(i->str=="and") detail_token.type=__and_operator; + else if(i->str=="or") detail_token.type=__or_operator; + detail_token_list.push_back(detail_token); + } + else if(i->type==__token_identifier) + { + detail_token.line=i->line; + detail_token.str =i->str; + if(i->str.length()<=3) + detail_token.type=__id; + else + { + std::string tempstr=i->str; + int strback=tempstr.length()-1; + if(tempstr.length()>3 &&tempstr[strback]=='.' && tempstr[strback-1]=='.' && tempstr[strback-2]=='.') + { + detail_token.str=""; + for(int j=0;jtype==__token_operator) + { + detail_token.line=i->line; + detail_token.str =""; + if (i->str=="+") detail_token.type=__add_operator; + else if(i->str=="-") detail_token.type=__sub_operator; + else if(i->str=="*") detail_token.type=__mul_operator; + else if(i->str=="/") detail_token.type=__div_operator; + else if(i->str=="~") detail_token.type=__link_operator; + else if(i->str=="+=") detail_token.type=__add_equal; + else if(i->str=="-=") detail_token.type=__sub_equal; + else if(i->str=="*=") detail_token.type=__mul_equal; + else if(i->str=="/=") detail_token.type=__div_equal; + else if(i->str=="~=") detail_token.type=__link_equal; + else if(i->str=="=") detail_token.type=__equal; + else if(i->str=="==") detail_token.type=__cmp_equal; + else if(i->str=="!=") detail_token.type=__cmp_not_equal; + else if(i->str=="<") detail_token.type=__cmp_less; + else if(i->str=="<=") detail_token.type=__cmp_less_or_equal; + else if(i->str==">") detail_token.type=__cmp_more; + else if(i->str==">=") detail_token.type=__cmp_more_or_equal; + else if(i->str==";") detail_token.type=__semi; + else if(i->str==".") detail_token.type=__dot; + else if(i->str==":") detail_token.type=__colon; + else if(i->str==",") detail_token.type=__comma; + else if(i->str=="?") detail_token.type=__ques_mark; + else if(i->str=="!") detail_token.type=__nor_operator; + else if(i->str=="[") detail_token.type=__left_bracket; + else if(i->str=="]") detail_token.type=__right_bracket; + else if(i->str=="(") detail_token.type=__left_curve; + else if(i->str==")") detail_token.type=__right_curve; + else if(i->str=="{") detail_token.type=__left_brace; + else if(i->str=="}") detail_token.type=__right_brace; + else + { + ++error; + std::cout<<">> [Lexer] line "<str<<"\'."<> [Lexer] complete generating. "<& nasal_lexer::get_detail_token_list() +{ + return detail_token_list; +} #endif diff --git a/version2.0/nasal_runtime.h b/version2.0/nasal_runtime.h index 7d46490..291861d 100644 --- a/version2.0/nasal_runtime.h +++ b/version2.0/nasal_runtime.h @@ -3,7 +3,6 @@ #define nas_lib_func_num 32 std::string inline_func_name[nas_lib_func_num]= { - "nasal_call_inline_scalar_type", //base.nas "nasal_call_inline_push_back", "nasal_call_inline_push_null", @@ -39,7 +38,8 @@ std::string inline_func_name[nas_lib_func_num]= "nasal_call_inline_cpp_math_ln", "nasal_call_inline_cpp_math_sqrt", "nasal_call_inline_cpp_atan2", - // + //system.nas + "nasal_call_inline_scalar_type" }; class nasal_runtime