update

2020-05-26 02:59:45 -07:00 · 2020-05-26 02:59:45 -07:00 · c1803a455d
parent 54c855e17e
commit c1803a455d
4 changed files with 235 additions and 234 deletions
--- a/version2.0/nasal.h
+++ b/version2.0/nasal.h
@ -4,9 +4,10 @@
 #include <iostream>
 #include <fstream>
 #include <cstring>
 #include <cstdlib>
 #include <ctime>
 #include <cmath>
-// if thread is used, don't forget to add -std=c++11 or higher standard before executing
+/* if thread is used, don't forget to add -std=c++11 or higher standard before executing */
 // #include <thread>
 #include <list>
 #include <stack>
@ -20,6 +21,7 @@
 including a function that print the hex format number of an integer
 */
 #include "nasal_misc.h"
 /*
 nasal_enum.h
 including enums of: lexer token type,parse generated type,scalar type
@ -29,12 +31,14 @@
 scalar type is used in nasal_runtime and nasal_gc
 */
 #include "nasal_enum.h"
 /*
 nasal_ast.h
 including a class named abstract_syntax_tree
 this class is frequently used in nasal_parse nasal_runtime
 */
 #include "nasal_ast.h"
 /*
 nasal_lexer.h
 including a class named resource_file
@ -45,6 +49,7 @@
 including a function named is_reserve_word, checking if an identifier is a reserve word
 */
 #include "nasal_lexer.h"
 /*
 nasal_parse.h
 including a class named nasal_parse
@ -53,6 +58,7 @@
 if parse errors occur,this enum will be into use
 */
 #include "nasal_parse.h"
 /*
 nasal_gc.h(garbage collector and memory manager of nasal_runtime)
 including basic classed named: nasal_number, nasal_string, nasal_vector, nasal_hash, nasal_function
@ -61,6 +67,14 @@
 nasal_gc is an object of class gc_manager,and nasal_runtime uses this object as it's memory manager
 */
 #include "nasal_gc.h"
 /*
 nasal_builtinfunc.h
 including built-in functions of nasal lib
 all functions in this .cpp will be used in nasal_runtime::builtin_function()
 */
 #include "nasal_builtinfunc.h"
 /*
 nasal_runtime.h
 including a class named nasal_runtime
--- a/version2.0/nasal_enum.h
+++ b/version2.0/nasal_enum.h
@ -2,7 +2,38 @@
 #define __NASAL_ENUM_H__
 // lexer token type is only used in nasal_lexer
-// each scanned token will be recognized as one of these below
+/*
 __token_reserve_word:
 	for,foreach,forindex,while : loop head
 	var,func                   : definition
 	break,continue             : in loop
 	return                     : in function
 	if,else,elsif              : conditional expr
 	and,or                     : calculation
 	nil                        : special type
 __token_identifier:
 	must begin with '_' or 'a'~'z' or 'A'~'Z'
 	can include '_' or 'a'~'z' or 'A'~'Z' or '0'~'9'
 __token_string:
 	example:
 		"string"
 		'string'
 	if a string does not end with " or ' then lexer will throw an error
 __token_number:
 	example:
 		2147483647 (integer)
 		2.71828    (float)
 		0xdeadbeef (hex) or 0xDEADBEEF (hex)
 		0o170001   (oct)
 		1e-1234    (dec) or 10E2       (dec)
 __token_operator:
 	!  +  -  *  /  ~
 	=  += -= *= /= ~=
 	== != >  >= <  <=
 	('and'  'or' are operators too but they are recognized as operator in generate_detail_token())
 	() [] {} ; , . : ?
 	others: __unknown_operator
 */
 enum lexer_token_type
 {
 	__token_reserve_word=1,
@ -29,40 +60,39 @@ enum parse_gen_type
 {
 	/*
 	stack end is an important flag for parse token stack to
-		check if it's stack is at end
+	check if it's stack is at end/empty
 	if stack is empty,the parser will get a wrong memory space and cause SIGSEGV
 	*/
 	__stack_end=1,
-	// operators == != < <= > >= 
+	
 	__cmp_equal,
 	__cmp_not_equal,
 	__cmp_less,__cmp_less_or_equal,
-	__cmp_more,__cmp_more_or_equal,
+	__cmp_more,__cmp_more_or_equal,                // operators == != < <= > >= 
-	// operators and or ! + - * / ~
+	
 	__and_operator,	__or_operator,__nor_operator,
 	__add_operator,__sub_operator,
-	__mul_operator,__div_operator,__link_operator,
+	__mul_operator,__div_operator,__link_operator, // operators and or ! + - * / ~
-	// operators = += -= *= /= ~=
+	
 	__equal,
 	__add_equal,__sub_equal,
-	__mul_equal,__div_equal,__link_equal,
+	__mul_equal,__div_equal,__link_equal,          // operators = += -= *= /= ~=
-	// operators {} [] () ; , : . ?
+
 	__left_brace,__right_brace,                    // {}
 	__left_bracket,__right_bracket,                // []
 	__left_curve,__right_curve,                    // ()
 	__semi,__comma,__colon,__dot,__ques_mark,      // ; , : . ?
 	__unknown_operator,
-	// reserve words
+	
 	__var,
 	__func,__return,__nil,
 	__if,__elsif,__else,
 	__continue,__break,
-	__for,__forindex,__foreach,__while,
+	__for,__forindex,__foreach,__while,            // reserve words
-	// basic scalar type: number string
+	__number,__string,                             // basic scalar type: number string
-	__number,__string,
+	
-	// basic identifier type: identifier dynamic_identifier
+	__id,__dynamic_id,                             // basic identifier type: identifier dynamic_identifier
 	__id,__dynamic_id,
 	// abstract_syntax_tree type below
 	// abstract_syntax_tree also uses the types above, such as operators
--- a/version2.0/nasal_lexer.h
+++ b/version2.0/nasal_lexer.h
@ -1,38 +1,17 @@
 #ifndef __NASAL_LEXER_H__
 #define __NASAL_LEXER_H__
-/*
+#define IS_IDENTIFIER_HEAD(c) (c=='_')||('a'<=c && c<='z')||('A'<=c&&c<='Z')
-	__token_reserve_word:
+#define IS_IDENTIFIER_BODY(c) (c=='_')||('a'<=c && c<='z')||('A'<=c&&c<='Z')||('0'<=c&&c<='9')
-		for,foreach,forindex,while : loop
+#define IS_NUMBER_HEAD(c)     ('0'<=c&&c<='9')
-		var,func                   : definition
+#define IS_NUMBER_BODY(c)     ('0'<=c&&c<='9')||('a'<=c&&c<='f')||('A'<=c&&c<='F')||(c=='e'||c=='E'||c=='.'||c=='x'||c=='o')
-		break,continue             : in loop
+#define IS_STRING_HEAD(c)     (c=='\''||c=='\"')
-		return                     : in function
+// single operators have only one character
-		if,else,elsif              : conditional expr
+#define IS_SINGLE_OPRATOR(c)  (c=='('||c==')'||c=='['||c==']'||c=='{'||c=='}'||c==','||c==';'||c=='|'||c==':'||\
-		and,or                     : calculation
+							   c=='?'||c=='.'||c=='`'||c=='&'||c=='@'||c=='%'||c=='$'||c=='^'||c=='\\')
-		nil                        : special type
+// calculation operators may have two chars, for example: += -= *= /= ~= != == >= <=
-	__token_identifier:
+#define IS_CALC_OPERATOR(c)   (c=='='||c=='+'||c=='-'||c=='*'||c=='!'||c=='/'||c=='<'||c=='>'||c=='~')
-		must begin with '_' or 'a'~'z' or 'A'~'Z'
+#define IS_NOTE_HEAD(c)       (c=='#')
 		can include '_' or 'a'~'z' or 'A'~'Z' or '0'~'9'
 	__token_string:
 		example:
 			"string"
 			'string'
 		if a string does not end with " or ' then lexer will throw an error
 	__token_number:
 		example:
 			2147483647 (integer)
 			2.71828    (float)
 			0xdeadbeef (hex) or 0xDEADBEEF (hex)
 			0o170001   (oct)
 			1e-1234    (dec) or 10E2       (dec)
 	__token_operator:
 		!  +  -  *  /  ~
 		=  += -= *= /= ~=
 		== != >  >= <  <=
 		('and'  'or' are operators too but they are recognized as operator in generate_detail_token())
 		() [] {} ; , . : ?
 		others: __unknown_operator
 */
 /* filenames of lib files */
 #ifndef LIB_FILE_NUM
@ -52,6 +31,7 @@ const std::string lib_filename[LIB_FILE_NUM]=
 	"lib/utf8.nas"
 };
 #endif
 /* reserve words */
 #ifndef RESERVE_WORD_NUM
 #define RESERVE_WORD_NUM 15
@ -141,6 +121,7 @@ void resource_file::delete_all_source()
 	std::vector<char> tmp;
 	source_code.clear();
 	source_code.swap(tmp);
 	// use tmp's destructor to delete the memory space that source_code used before
 	return;
 }
 void resource_file::input_file(std::string filename)
@ -156,9 +137,7 @@ void resource_file::input_file(std::string filename)
 	while(!fin.eof())
 	{
 		c=fin.get();
-		if(fin.eof())
+		if(fin.eof()) break;
 			break;
 		//source_code.push_back(c<0? '?':c);
 		source_code.push_back(c);
 	}
 	fin.close();
@ -178,9 +157,8 @@ void resource_file::load_lib_file()
 			while(!fin.eof())
 			{
 				c=fin.get();
-				if(fin.eof())
+				if(fin.eof()) break;
-					break;
+				source_code.push_back(c);
 				source_code.push_back(c<0? '?':c);
 			}
 		}
 		fin.close();
@ -193,26 +171,25 @@ std::vector<char>& resource_file::get_source()
 }
 void resource_file::print_resource()
 {
 	int size=source_code.size();
 	int line=1;
 	std::cout<<line<<"\t";
-	for(int i=0;i<source_code.size();++i)
+	for(int i=0;i<size;++i)
 	{
-		if(32<=source_code[i])
+		if(32<=source_code[i]) std::cout<<source_code[i];
 			std::cout<<source_code[i];
 		else if(source_code[i]<0)
 		{
 			// print unicode
 			std::string tmp="";
-			for(;i<source_code.size();++i)
+			for(;i<size;++i)
 			{
-				if(source_code[i]>=0)
+				if(source_code[i]>=0) break;
 					break;
 				tmp.push_back(source_code[i]);
 			}
 			std::cout<<tmp;--i;
 		}
-		else
+		else std::cout<<" ";
-			std::cout<<" ";
+		if(i<size && source_code[i]=='\n')
 		if(source_code[i]=='\n')
 		{
 			++line;
 			std::cout<<std::endl<<line<<"\t";
@ -225,42 +202,27 @@ void resource_file::print_resource()
 std::string nasal_lexer::identifier_gen(std::vector<char>& res,int& ptr,int& line)
 {
 	std::string token_str="";
-	while(res[ptr]=='_' || ('a'<=res[ptr] && res[ptr]<='z') || ('A'<=res[ptr] && res[ptr]<='Z') || ('0'<=res[ptr] && res[ptr]<='9'))
+	while(IS_IDENTIFIER_BODY(res[ptr]))
 	{
 		token_str+=res[ptr];
 		++ptr;
-		if(ptr>=res.size())
+		if(ptr>=res.size()) break;
 			break;
 	}
 	// check dynamic identifier "..."
-	if(res[ptr]=='.')
+	if(ptr+2<res.size() && res[ptr]=='.' && res[ptr+1]=='.' && res[ptr+2]=='.')
 	{
 		++ptr;
 		if(ptr<res.size() && res[ptr]=='.')
 		{
 			++ptr;
 			if(ptr<res.size() && res[ptr]=='.')
 	{
 		token_str+="...";
-				++ptr;
+		ptr+=3;
 			}
 			else
 				ptr-=2;
 		}
 		else
 			--ptr;
 	}
 	return token_str;
 	// after running this process, ptr will point to the next token's beginning character
 }
 std::string nasal_lexer::number_gen(std::vector<char>& res,int& ptr,int& line)
 {
-	bool scientific_notation=false;
+	bool scientific_notation=false;// numbers like 1e8 are scientific_notation
 	std::string token_str="";
-	while(('0'<=res[ptr] && res[ptr]<='9') ||
+	while(IS_NUMBER_BODY(res[ptr]))
 		('a'<=res[ptr] && res[ptr]<='f') ||
 		('A'<=res[ptr] && res[ptr]<='F') ||
 		res[ptr]=='.' || res[ptr]=='x' || res[ptr]=='o' ||
 		res[ptr]=='e' || res[ptr]=='E')
 	{
 		token_str+=res[ptr];
 		if(res[ptr]=='e' || res[ptr]=='E')
@ -299,14 +261,12 @@ std::string nasal_lexer::string_gen(std::vector<char>& res,int& ptr,int& line)
 	std::string token_str="";
 	char str_begin=res[ptr];
 	++ptr;
-	if(ptr>=res.size())
+	if(ptr>=res.size()) return token_str;
 		return token_str;
 	while(ptr<res.size() && res[ptr]!=str_begin)
 	{
 		token_str+=res[ptr];
-		if(res[ptr]=='\n')
+		if(res[ptr]=='\n') ++line;
-			++line;
+		if(res[ptr]=='\\' && ptr+1<res.size())
 		if(res[ptr]=='\\')
 		{
 			++ptr;
 			switch(res[ptr])
@ -321,8 +281,7 @@ std::string nasal_lexer::string_gen(std::vector<char>& res,int& ptr,int& line)
 			}
 		}
 		++ptr;
-		if(ptr>=res.size())
+		if(ptr>=res.size()) break;
 			break;
 	}
 	// check if this string ends with a " or '
 	if(ptr>=res.size())
@ -377,13 +336,12 @@ void nasal_lexer::scanner(std::vector<char>& res)
 	{
 		while(ptr<res.size() && (res[ptr]==' ' || res[ptr]=='\n' || res[ptr]=='\t' || res[ptr]=='\r' || res[ptr]<0))
 		{
-			if(res[ptr]=='\n')
+			// these characters will be ignored, and '\n' will cause ++line
-				++line;
+			if(res[ptr]=='\n') ++line;
 			++ptr;
 		}
-		if(ptr>=res.size())
+		if(ptr>=res.size()) break;
-			break;
+		if(IS_IDENTIFIER_HEAD(res[ptr]))
 		if(res[ptr]=='_' || ('a'<=res[ptr] && res[ptr]<='z') || ('A'<=res[ptr] && res[ptr]<='Z'))
 		{
 			token_str=identifier_gen(res,ptr,line);
 			token new_token;
@ -392,7 +350,7 @@ void nasal_lexer::scanner(std::vector<char>& res)
 			new_token.str=token_str;
 			token_list.push_back(new_token);
 		}
-		else if('0'<=res[ptr] && res[ptr]<='9')
+		else if(IS_NUMBER_HEAD(res[ptr]))
 		{
 			token_str=number_gen(res,ptr,line);
 			token new_token;
@ -401,7 +359,7 @@ void nasal_lexer::scanner(std::vector<char>& res)
 			new_token.str=token_str;
 			token_list.push_back(new_token);
 		}
-		else if(res[ptr]=='\'' || res[ptr]=='\"')
+		else if(IS_STRING_HEAD(res[ptr]))
 		{
 			token_str=string_gen(res,ptr,line);
 			token new_token;
@ -410,10 +368,7 @@ void nasal_lexer::scanner(std::vector<char>& res)
 			new_token.str=token_str;
 			token_list.push_back(new_token);
 		}
-		else if(res[ptr]=='(' || res[ptr]==')' || res[ptr]=='[' || res[ptr]==']' || res[ptr]=='{' ||
+		else if(IS_SINGLE_OPRATOR(res[ptr]))
 				res[ptr]=='}' || res[ptr]==',' || res[ptr]==';' || res[ptr]=='|' || res[ptr]==':' ||
 				res[ptr]=='?' || res[ptr]=='.' || res[ptr]=='`' || res[ptr]=='&' || res[ptr]=='@' ||
 				res[ptr]=='%' || res[ptr]=='$' || res[ptr]=='^' || res[ptr]=='\\')
 		{
 			token_str="";
 			token_str+=res[ptr];
@ -424,8 +379,7 @@ void nasal_lexer::scanner(std::vector<char>& res)
 			token_list.push_back(new_token);
 			++ptr;
 		}
-		else if(res[ptr]=='=' || res[ptr]=='+' || res[ptr]=='-' || res[ptr]=='*' || res[ptr]=='!' ||
+		else if(IS_CALC_OPERATOR(res[ptr]))
 				res[ptr]=='/' || res[ptr]=='<' || res[ptr]=='>' || res[ptr]=='~')
 		{
 			// get calculation operator
 			token_str="";
@ -442,7 +396,7 @@ void nasal_lexer::scanner(std::vector<char>& res)
 			new_token.str=token_str;
 			token_list.push_back(new_token);
 		}
-		else if(res[ptr]=='#')
+		else if(IS_NOTE_HEAD(res[ptr]))
 		{
 			// avoid note
 			while(ptr<res.size() && res[ptr]!='\n')
--- a/version2.0/nasal_runtime.h
+++ b/version2.0/nasal_runtime.h
@ -773,7 +773,8 @@ int  nasal_runtime::call_function(std::list<std::map<std::string,int> >& local_s
        nasal_gc.get_scalar(addr).get_function().get_parameter_list(),
        nasal_gc.get_scalar(addr).get_function().get_statement_block(),
        *call_node,
-        last_hash_addr);
+        last_hash_addr
    );
    if(addr<0)
        return -1;
    nasal_gc.reference_delete(tmp_addr);
@ -968,6 +969,8 @@ int nasal_runtime::function_generation(std::list<std::map<std::string,int> >& lo
 void nasal_runtime::update_closure(std::list<std::map<std::string,int> >& local_scope,int local_scope_addr)
 {
    // update_closure
    // each new function will be updated only once, after updating closure,functions' closure_updated flag will be set true
    // but this has a bug, if this new function is a member of vector or hash, it will not be updated
    if(!local_scope.size())
        return;
    for(std::map<std::string,int>::iterator i=local_scope.back().begin();i!=local_scope.back().end();++i)