diff --git a/version2.0/nasal.h b/version2.0/nasal.h index 44ce56f..0588935 100644 --- a/version2.0/nasal.h +++ b/version2.0/nasal.h @@ -4,9 +4,10 @@ #include #include #include +#include #include #include -// if thread is used, don't forget to add -std=c++11 or higher standard before executing +/* if thread is used, don't forget to add -std=c++11 or higher standard before executing */ // #include #include #include @@ -14,71 +15,84 @@ #include /* - nasal_misc.h: - including some functions that change number to string or change string to number - including a function that check if a string is a numerable string - including a function that print the hex format number of an integer +nasal_misc.h: +including some functions that change number to string or change string to number +including a function that check if a string is a numerable string +including a function that print the hex format number of an integer */ #include "nasal_misc.h" + /* - nasal_enum.h - including enums of: lexer token type,parse generated type,scalar type - lexer token type is used by nasal_lexer - parse generated type is used both by nasal_parse and abstract_syntax_tree - parse generated type is also used when lexer is generating detailed tokens which are used in nasal_parse - scalar type is used in nasal_runtime and nasal_gc +nasal_enum.h +including enums of: lexer token type,parse generated type,scalar type +lexer token type is used by nasal_lexer +parse generated type is used both by nasal_parse and abstract_syntax_tree +parse generated type is also used when lexer is generating detailed tokens which are used in nasal_parse +scalar type is used in nasal_runtime and nasal_gc */ #include "nasal_enum.h" + /* - nasal_ast.h - including a class named abstract_syntax_tree - this class is frequently used in nasal_parse nasal_runtime +nasal_ast.h +including a class named abstract_syntax_tree +this class is frequently used in nasal_parse nasal_runtime */ #include "nasal_ast.h" + /* - nasal_lexer.h - including a class named resource_file - including a class named nasal_lexer - including a string[] named lib_filename, by this way resource_file can load lib files - including a string[] named reserve_word, it is used in lexer,when generating an identifier,nasal_lexer will check if it is a reserve word - including a struct named token, this struct is often used in nasal_lexer and nasal_parse - including a function named is_reserve_word, checking if an identifier is a reserve word +nasal_lexer.h +including a class named resource_file +including a class named nasal_lexer +including a string[] named lib_filename, by this way resource_file can load lib files +including a string[] named reserve_word, it is used in lexer,when generating an identifier,nasal_lexer will check if it is a reserve word +including a struct named token, this struct is often used in nasal_lexer and nasal_parse +including a function named is_reserve_word, checking if an identifier is a reserve word */ #include "nasal_lexer.h" + /* - nasal_parse.h - including a class named nasal_parse - nasal_parse uses tokens generated by lexer and generats them into abstract syntax tree - this class has a special enum named parse_error_type - if parse errors occur,this enum will be into use +nasal_parse.h +including a class named nasal_parse +nasal_parse uses tokens generated by lexer and generats them into abstract syntax tree +this class has a special enum named parse_error_type +if parse errors occur,this enum will be into use */ #include "nasal_parse.h" + /* - nasal_gc.h(garbage collector and memory manager of nasal_runtime) - including basic classed named: nasal_number, nasal_string, nasal_vector, nasal_hash, nasal_function - including important class named gc_manager - including struct named gc_unit, it is the smallest memory unit.used in gc_manager - nasal_gc is an object of class gc_manager,and nasal_runtime uses this object as it's memory manager +nasal_gc.h(garbage collector and memory manager of nasal_runtime) +including basic classed named: nasal_number, nasal_string, nasal_vector, nasal_hash, nasal_function +including important class named gc_manager +including struct named gc_unit, it is the smallest memory unit.used in gc_manager +nasal_gc is an object of class gc_manager,and nasal_runtime uses this object as it's memory manager */ #include "nasal_gc.h" -/* - nasal_runtime.h - including a class named nasal_runtime - including a string[] named inline_func_name - function that mentioned in inline_func_name is special functions that were written by cpp,so they can be ca;;ed directly - if you want to add new built-in functions: - add it's name into inline_func_name - change the number of nas_lib_func_num - write it's function in nasal_runtime::inline_function - and don't forget to warp it up with a function that written by nasal - - for example: print(dyn...) - var print=func(dyn...) - { - nasal_call_inline_c_std_puts(dyn); - return nil; - } +/* +nasal_builtinfunc.h +including built-in functions of nasal lib +all functions in this .cpp will be used in nasal_runtime::builtin_function() +*/ +#include "nasal_builtinfunc.h" + +/* +nasal_runtime.h +including a class named nasal_runtime +including a string[] named inline_func_name +function that mentioned in inline_func_name is special functions that were written by cpp,so they can be ca;;ed directly + +if you want to add new built-in functions: + add it's name into inline_func_name + change the number of nas_lib_func_num + write it's function in nasal_runtime::inline_function + and don't forget to warp it up with a function that written by nasal + +for example: print(dyn...) +var print=func(dyn...) +{ + nasal_call_inline_c_std_puts(dyn); + return nil; +} */ #include "nasal_runtime.h" diff --git a/version2.0/nasal_enum.h b/version2.0/nasal_enum.h index ca26832..f5a5843 100644 --- a/version2.0/nasal_enum.h +++ b/version2.0/nasal_enum.h @@ -2,7 +2,38 @@ #define __NASAL_ENUM_H__ // lexer token type is only used in nasal_lexer -// each scanned token will be recognized as one of these below +/* +__token_reserve_word: + for,foreach,forindex,while : loop head + var,func : definition + break,continue : in loop + return : in function + if,else,elsif : conditional expr + and,or : calculation + nil : special type +__token_identifier: + must begin with '_' or 'a'~'z' or 'A'~'Z' + can include '_' or 'a'~'z' or 'A'~'Z' or '0'~'9' +__token_string: + example: + "string" + 'string' + if a string does not end with " or ' then lexer will throw an error +__token_number: + example: + 2147483647 (integer) + 2.71828 (float) + 0xdeadbeef (hex) or 0xDEADBEEF (hex) + 0o170001 (oct) + 1e-1234 (dec) or 10E2 (dec) +__token_operator: + ! + - * / ~ + = += -= *= /= ~= + == != > >= < <= + ('and' 'or' are operators too but they are recognized as operator in generate_detail_token()) + () [] {} ; , . : ? + others: __unknown_operator +*/ enum lexer_token_type { __token_reserve_word=1, @@ -28,41 +59,40 @@ void print_lexer_token(int type) enum parse_gen_type { /* - stack end is an important flag for parse token stack to - check if it's stack is at end - if stack is empty,the parser will get a wrong memory space and cause SIGSEGV + stack end is an important flag for parse token stack to + check if it's stack is at end/empty + if stack is empty,the parser will get a wrong memory space and cause SIGSEGV */ __stack_end=1, - // operators == != < <= > >= + __cmp_equal, __cmp_not_equal, __cmp_less,__cmp_less_or_equal, - __cmp_more,__cmp_more_or_equal, - // operators and or ! + - * / ~ + __cmp_more,__cmp_more_or_equal, // operators == != < <= > >= + __and_operator, __or_operator,__nor_operator, __add_operator,__sub_operator, - __mul_operator,__div_operator,__link_operator, - // operators = += -= *= /= ~= + __mul_operator,__div_operator,__link_operator, // operators and or ! + - * / ~ + __equal, __add_equal,__sub_equal, - __mul_equal,__div_equal,__link_equal, - // operators {} [] () ; , : . ? + __mul_equal,__div_equal,__link_equal, // operators = += -= *= /= ~= + __left_brace,__right_brace, // {} __left_bracket,__right_bracket, // [] __left_curve,__right_curve, // () __semi,__comma,__colon,__dot,__ques_mark, // ; , : . ? __unknown_operator, - // reserve words + __var, __func,__return,__nil, __if,__elsif,__else, __continue,__break, - __for,__forindex,__foreach,__while, + __for,__forindex,__foreach,__while, // reserve words - // basic scalar type: number string - __number,__string, - // basic identifier type: identifier dynamic_identifier - __id,__dynamic_id, + __number,__string, // basic scalar type: number string + + __id,__dynamic_id, // basic identifier type: identifier dynamic_identifier // abstract_syntax_tree type below // abstract_syntax_tree also uses the types above, such as operators @@ -141,7 +171,7 @@ void print_parse_token(int type) case __number: context="num"; break; case __string: context="str"; break; - default: context="undefined"; break; + default: context="undefined"; break; } std::cout< >= < <= - ('and' 'or' are operators too but they are recognized as operator in generate_detail_token()) - () [] {} ; , . : ? - others: __unknown_operator -*/ +#define IS_IDENTIFIER_HEAD(c) (c=='_')||('a'<=c && c<='z')||('A'<=c&&c<='Z') +#define IS_IDENTIFIER_BODY(c) (c=='_')||('a'<=c && c<='z')||('A'<=c&&c<='Z')||('0'<=c&&c<='9') +#define IS_NUMBER_HEAD(c) ('0'<=c&&c<='9') +#define IS_NUMBER_BODY(c) ('0'<=c&&c<='9')||('a'<=c&&c<='f')||('A'<=c&&c<='F')||(c=='e'||c=='E'||c=='.'||c=='x'||c=='o') +#define IS_STRING_HEAD(c) (c=='\''||c=='\"') +// single operators have only one character +#define IS_SINGLE_OPRATOR(c) (c=='('||c==')'||c=='['||c==']'||c=='{'||c=='}'||c==','||c==';'||c=='|'||c==':'||\ + c=='?'||c=='.'||c=='`'||c=='&'||c=='@'||c=='%'||c=='$'||c=='^'||c=='\\') +// calculation operators may have two chars, for example: += -= *= /= ~= != == >= <= +#define IS_CALC_OPERATOR(c) (c=='='||c=='+'||c=='-'||c=='*'||c=='!'||c=='/'||c=='<'||c=='>'||c=='~') +#define IS_NOTE_HEAD(c) (c=='#') /* filenames of lib files */ #ifndef LIB_FILE_NUM @@ -52,6 +31,7 @@ const std::string lib_filename[LIB_FILE_NUM]= "lib/utf8.nas" }; #endif + /* reserve words */ #ifndef RESERVE_WORD_NUM #define RESERVE_WORD_NUM 15 @@ -73,21 +53,21 @@ int is_reserve_word(std::string str) class resource_file { - private: - std::vector source_code; - public: - /* - delete_all_source: clear all the source codes in std::list resource - input_file : input source codes by filenames - load_lib_file : input lib source codes - get_source : get the std::vector source_code - print_resource : print source codes - */ - void delete_all_source(); - void input_file(std::string); - void load_lib_file(); - std::vector& get_source(); - void print_resource(); +private: + std::vector source_code; +public: + /* + delete_all_source: clear all the source codes in std::list resource + input_file : input source codes by filenames + load_lib_file : input lib source codes + get_source : get the std::vector source_code + print_resource : print source codes + */ + void delete_all_source(); + void input_file(std::string); + void load_lib_file(); + std::vector& get_source(); + void print_resource(); }; /* struct token: mainly used in nasal_lexer and nasal_parse*/ @@ -100,39 +80,39 @@ struct token { line=tmp.line; type=tmp.type; - str=tmp.str; + str =tmp.str; return *this; } }; class nasal_lexer { - private: - std::list token_list; - std::list detail_token_list; - int error; - std::string identifier_gen(std::vector&,int&,int&); - std::string number_gen (std::vector&,int&,int&); - std::string string_gen (std::vector&,int&,int&); - public: - /* - identifier_gen : scan the source codes and generate identifiers - number_gen : scan the source codes and generate numbers - string_gen : scan the source codes and generate strings - print_token_list : print generated token list - scanner : scan the source codes and generate tokens - generate_detail_token: recognize and change token types to detailed types that can be processed by nasal_parse - get_error : get the number of errors that occurred when generating tokens - get_detail_token : output the detailed tokens,must be used after generate_detail_token() - */ - nasal_lexer(); - ~nasal_lexer(); - void delete_all_tokens(); - void print_token_list(); - void scanner(std::vector&); - void generate_detail_token(); - int get_error(); - std::list& get_detail_token_list(); +private: + std::list token_list; + std::list detail_token_list; + int error; + std::string identifier_gen(std::vector&,int&,int&); + std::string number_gen (std::vector&,int&,int&); + std::string string_gen (std::vector&,int&,int&); +public: + /* + identifier_gen : scan the source codes and generate identifiers + number_gen : scan the source codes and generate numbers + string_gen : scan the source codes and generate strings + print_token_list : print generated token list + scanner : scan the source codes and generate tokens + generate_detail_token: recognize and change token types to detailed types that can be processed by nasal_parse + get_error : get the number of errors that occurred when generating tokens + get_detail_token : output the detailed tokens,must be used after generate_detail_token() + */ + nasal_lexer(); + ~nasal_lexer(); + void delete_all_tokens(); + void print_token_list(); + void scanner(std::vector&); + void generate_detail_token(); + int get_error(); + std::list& get_detail_token_list(); }; @@ -141,6 +121,7 @@ void resource_file::delete_all_source() std::vector tmp; source_code.clear(); source_code.swap(tmp); + // use tmp's destructor to delete the memory space that source_code used before return; } void resource_file::input_file(std::string filename) @@ -156,9 +137,7 @@ void resource_file::input_file(std::string filename) while(!fin.eof()) { c=fin.get(); - if(fin.eof()) - break; - //source_code.push_back(c<0? '?':c); + if(fin.eof()) break; source_code.push_back(c); } fin.close(); @@ -178,9 +157,8 @@ void resource_file::load_lib_file() while(!fin.eof()) { c=fin.get(); - if(fin.eof()) - break; - source_code.push_back(c<0? '?':c); + if(fin.eof()) break; + source_code.push_back(c); } } fin.close(); @@ -193,26 +171,25 @@ std::vector& resource_file::get_source() } void resource_file::print_resource() { + int size=source_code.size(); int line=1; std::cout<=0) - break; + if(source_code[i]>=0) break; tmp.push_back(source_code[i]); } std::cout<& res,int& ptr,int& line) { std::string token_str=""; - while(res[ptr]=='_' || ('a'<=res[ptr] && res[ptr]<='z') || ('A'<=res[ptr] && res[ptr]<='Z') || ('0'<=res[ptr] && res[ptr]<='9')) + while(IS_IDENTIFIER_BODY(res[ptr])) { token_str+=res[ptr]; ++ptr; - if(ptr>=res.size()) - break; + if(ptr>=res.size()) break; } // check dynamic identifier "..." - if(res[ptr]=='.') + if(ptr+2& res,int& ptr,int& line) { - bool scientific_notation=false; + bool scientific_notation=false;// numbers like 1e8 are scientific_notation std::string token_str=""; - while(('0'<=res[ptr] && res[ptr]<='9') || - ('a'<=res[ptr] && res[ptr]<='f') || - ('A'<=res[ptr] && res[ptr]<='F') || - res[ptr]=='.' || res[ptr]=='x' || res[ptr]=='o' || - res[ptr]=='e' || res[ptr]=='E') + while(IS_NUMBER_BODY(res[ptr])) { token_str+=res[ptr]; if(res[ptr]=='e' || res[ptr]=='E') @@ -299,14 +261,12 @@ std::string nasal_lexer::string_gen(std::vector& res,int& ptr,int& line) std::string token_str=""; char str_begin=res[ptr]; ++ptr; - if(ptr>=res.size()) - return token_str; + if(ptr>=res.size()) return token_str; while(ptr& res,int& ptr,int& line) } } ++ptr; - if(ptr>=res.size()) - break; + if(ptr>=res.size()) break; } // check if this string ends with a " or ' if(ptr>=res.size()) @@ -377,13 +336,12 @@ void nasal_lexer::scanner(std::vector& res) { while(ptr=res.size()) - break; - if(res[ptr]=='_' || ('a'<=res[ptr] && res[ptr]<='z') || ('A'<=res[ptr] && res[ptr]<='Z')) + if(ptr>=res.size()) break; + if(IS_IDENTIFIER_HEAD(res[ptr])) { token_str=identifier_gen(res,ptr,line); token new_token; @@ -392,7 +350,7 @@ void nasal_lexer::scanner(std::vector& res) new_token.str=token_str; token_list.push_back(new_token); } - else if('0'<=res[ptr] && res[ptr]<='9') + else if(IS_NUMBER_HEAD(res[ptr])) { token_str=number_gen(res,ptr,line); token new_token; @@ -401,7 +359,7 @@ void nasal_lexer::scanner(std::vector& res) new_token.str=token_str; token_list.push_back(new_token); } - else if(res[ptr]=='\'' || res[ptr]=='\"') + else if(IS_STRING_HEAD(res[ptr])) { token_str=string_gen(res,ptr,line); token new_token; @@ -410,10 +368,7 @@ void nasal_lexer::scanner(std::vector& res) new_token.str=token_str; token_list.push_back(new_token); } - else if(res[ptr]=='(' || res[ptr]==')' || res[ptr]=='[' || res[ptr]==']' || res[ptr]=='{' || - res[ptr]=='}' || res[ptr]==',' || res[ptr]==';' || res[ptr]=='|' || res[ptr]==':' || - res[ptr]=='?' || res[ptr]=='.' || res[ptr]=='`' || res[ptr]=='&' || res[ptr]=='@' || - res[ptr]=='%' || res[ptr]=='$' || res[ptr]=='^' || res[ptr]=='\\') + else if(IS_SINGLE_OPRATOR(res[ptr])) { token_str=""; token_str+=res[ptr]; @@ -424,8 +379,7 @@ void nasal_lexer::scanner(std::vector& res) token_list.push_back(new_token); ++ptr; } - else if(res[ptr]=='=' || res[ptr]=='+' || res[ptr]=='-' || res[ptr]=='*' || res[ptr]=='!' || - res[ptr]=='/' || res[ptr]=='<' || res[ptr]=='>' || res[ptr]=='~') + else if(IS_CALC_OPERATOR(res[ptr])) { // get calculation operator token_str=""; @@ -442,7 +396,7 @@ void nasal_lexer::scanner(std::vector& res) new_token.str=token_str; token_list.push_back(new_token); } - else if(res[ptr]=='#') + else if(IS_NOTE_HEAD(res[ptr])) { // avoid note while(ptrline; detail_token.str =""; - if (i->str=="for") detail_token.type=__for; - else if(i->str=="foreach") detail_token.type=__foreach; + if (i->str=="for" ) detail_token.type=__for; + else if(i->str=="foreach" ) detail_token.type=__foreach; else if(i->str=="forindex") detail_token.type=__forindex; - else if(i->str=="while") detail_token.type=__while; - else if(i->str=="var") detail_token.type=__var; - else if(i->str=="func") detail_token.type=__func; - else if(i->str=="break") detail_token.type=__break; + else if(i->str=="while" ) detail_token.type=__while; + else if(i->str=="var" ) detail_token.type=__var; + else if(i->str=="func" ) detail_token.type=__func; + else if(i->str=="break" ) detail_token.type=__break; else if(i->str=="continue") detail_token.type=__continue; - else if(i->str=="return") detail_token.type=__return; - else if(i->str=="if") detail_token.type=__if; - else if(i->str=="else") detail_token.type=__else; - else if(i->str=="elsif") detail_token.type=__elsif; - else if(i->str=="nil") detail_token.type=__nil; - else if(i->str=="and") detail_token.type=__and_operator; - else if(i->str=="or") detail_token.type=__or_operator; + else if(i->str=="return" ) detail_token.type=__return; + else if(i->str=="if" ) detail_token.type=__if; + else if(i->str=="else" ) detail_token.type=__else; + else if(i->str=="elsif" ) detail_token.type=__elsif; + else if(i->str=="nil" ) detail_token.type=__nil; + else if(i->str=="and" ) detail_token.type=__and_operator; + else if(i->str=="or" ) detail_token.type=__or_operator; detail_token_list.push_back(detail_token); } else if(i->type==__token_identifier) @@ -527,35 +481,35 @@ void nasal_lexer::generate_detail_token() { detail_token.line=i->line; detail_token.str =""; - if (i->str=="+") detail_token.type=__add_operator; - else if(i->str=="-") detail_token.type=__sub_operator; - else if(i->str=="*") detail_token.type=__mul_operator; - else if(i->str=="/") detail_token.type=__div_operator; - else if(i->str=="~") detail_token.type=__link_operator; + if (i->str=="+" ) detail_token.type=__add_operator; + else if(i->str=="-" ) detail_token.type=__sub_operator; + else if(i->str=="*" ) detail_token.type=__mul_operator; + else if(i->str=="/" ) detail_token.type=__div_operator; + else if(i->str=="~" ) detail_token.type=__link_operator; else if(i->str=="+=") detail_token.type=__add_equal; else if(i->str=="-=") detail_token.type=__sub_equal; else if(i->str=="*=") detail_token.type=__mul_equal; else if(i->str=="/=") detail_token.type=__div_equal; else if(i->str=="~=") detail_token.type=__link_equal; - else if(i->str=="=") detail_token.type=__equal; + else if(i->str=="=" ) detail_token.type=__equal; else if(i->str=="==") detail_token.type=__cmp_equal; else if(i->str=="!=") detail_token.type=__cmp_not_equal; - else if(i->str=="<") detail_token.type=__cmp_less; + else if(i->str=="<" ) detail_token.type=__cmp_less; else if(i->str=="<=") detail_token.type=__cmp_less_or_equal; - else if(i->str==">") detail_token.type=__cmp_more; + else if(i->str==">" ) detail_token.type=__cmp_more; else if(i->str==">=") detail_token.type=__cmp_more_or_equal; - else if(i->str==";") detail_token.type=__semi; - else if(i->str==".") detail_token.type=__dot; - else if(i->str==":") detail_token.type=__colon; - else if(i->str==",") detail_token.type=__comma; - else if(i->str=="?") detail_token.type=__ques_mark; - else if(i->str=="!") detail_token.type=__nor_operator; - else if(i->str=="[") detail_token.type=__left_bracket; - else if(i->str=="]") detail_token.type=__right_bracket; - else if(i->str=="(") detail_token.type=__left_curve; - else if(i->str==")") detail_token.type=__right_curve; - else if(i->str=="{") detail_token.type=__left_brace; - else if(i->str=="}") detail_token.type=__right_brace; + else if(i->str==";" ) detail_token.type=__semi; + else if(i->str=="." ) detail_token.type=__dot; + else if(i->str==":" ) detail_token.type=__colon; + else if(i->str=="," ) detail_token.type=__comma; + else if(i->str=="?" ) detail_token.type=__ques_mark; + else if(i->str=="!" ) detail_token.type=__nor_operator; + else if(i->str=="[" ) detail_token.type=__left_bracket; + else if(i->str=="]" ) detail_token.type=__right_bracket; + else if(i->str=="(" ) detail_token.type=__left_curve; + else if(i->str==")" ) detail_token.type=__right_curve; + else if(i->str=="{" ) detail_token.type=__left_brace; + else if(i->str=="}" ) detail_token.type=__right_brace; else { ++error; diff --git a/version2.0/nasal_runtime.h b/version2.0/nasal_runtime.h index 6ab7408..782a62f 100644 --- a/version2.0/nasal_runtime.h +++ b/version2.0/nasal_runtime.h @@ -773,7 +773,8 @@ int nasal_runtime::call_function(std::list >& local_s nasal_gc.get_scalar(addr).get_function().get_parameter_list(), nasal_gc.get_scalar(addr).get_function().get_statement_block(), *call_node, - last_hash_addr); + last_hash_addr + ); if(addr<0) return -1; nasal_gc.reference_delete(tmp_addr); @@ -968,6 +969,8 @@ int nasal_runtime::function_generation(std::list >& lo void nasal_runtime::update_closure(std::list >& local_scope,int local_scope_addr) { // update_closure + // each new function will be updated only once, after updating closure,functions' closure_updated flag will be set true + // but this has a bug, if this new function is a member of vector or hash, it will not be updated if(!local_scope.size()) return; for(std::map::iterator i=local_scope.back().begin();i!=local_scope.back().end();++i)