This commit is contained in:
Valk Richard Li 2020-05-26 02:59:45 -07:00 committed by GitHub
parent 54c855e17e
commit c1803a455d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 235 additions and 234 deletions

View File

@ -4,9 +4,10 @@
#include <iostream> #include <iostream>
#include <fstream> #include <fstream>
#include <cstring> #include <cstring>
#include <cstdlib>
#include <ctime> #include <ctime>
#include <cmath> #include <cmath>
// if thread is used, don't forget to add -std=c++11 or higher standard before executing /* if thread is used, don't forget to add -std=c++11 or higher standard before executing */
// #include <thread> // #include <thread>
#include <list> #include <list>
#include <stack> #include <stack>
@ -20,6 +21,7 @@
including a function that print the hex format number of an integer including a function that print the hex format number of an integer
*/ */
#include "nasal_misc.h" #include "nasal_misc.h"
/* /*
nasal_enum.h nasal_enum.h
including enums of: lexer token type,parse generated type,scalar type including enums of: lexer token type,parse generated type,scalar type
@ -29,12 +31,14 @@
scalar type is used in nasal_runtime and nasal_gc scalar type is used in nasal_runtime and nasal_gc
*/ */
#include "nasal_enum.h" #include "nasal_enum.h"
/* /*
nasal_ast.h nasal_ast.h
including a class named abstract_syntax_tree including a class named abstract_syntax_tree
this class is frequently used in nasal_parse nasal_runtime this class is frequently used in nasal_parse nasal_runtime
*/ */
#include "nasal_ast.h" #include "nasal_ast.h"
/* /*
nasal_lexer.h nasal_lexer.h
including a class named resource_file including a class named resource_file
@ -45,6 +49,7 @@
including a function named is_reserve_word, checking if an identifier is a reserve word including a function named is_reserve_word, checking if an identifier is a reserve word
*/ */
#include "nasal_lexer.h" #include "nasal_lexer.h"
/* /*
nasal_parse.h nasal_parse.h
including a class named nasal_parse including a class named nasal_parse
@ -53,6 +58,7 @@
if parse errors occur,this enum will be into use if parse errors occur,this enum will be into use
*/ */
#include "nasal_parse.h" #include "nasal_parse.h"
/* /*
nasal_gc.h(garbage collector and memory manager of nasal_runtime) nasal_gc.h(garbage collector and memory manager of nasal_runtime)
including basic classed named: nasal_number, nasal_string, nasal_vector, nasal_hash, nasal_function including basic classed named: nasal_number, nasal_string, nasal_vector, nasal_hash, nasal_function
@ -61,6 +67,14 @@
nasal_gc is an object of class gc_manager,and nasal_runtime uses this object as it's memory manager nasal_gc is an object of class gc_manager,and nasal_runtime uses this object as it's memory manager
*/ */
#include "nasal_gc.h" #include "nasal_gc.h"
/*
nasal_builtinfunc.h
including built-in functions of nasal lib
all functions in this .cpp will be used in nasal_runtime::builtin_function()
*/
#include "nasal_builtinfunc.h"
/* /*
nasal_runtime.h nasal_runtime.h
including a class named nasal_runtime including a class named nasal_runtime

View File

@ -2,7 +2,38 @@
#define __NASAL_ENUM_H__ #define __NASAL_ENUM_H__
// lexer token type is only used in nasal_lexer // lexer token type is only used in nasal_lexer
// each scanned token will be recognized as one of these below /*
__token_reserve_word:
for,foreach,forindex,while : loop head
var,func : definition
break,continue : in loop
return : in function
if,else,elsif : conditional expr
and,or : calculation
nil : special type
__token_identifier:
must begin with '_' or 'a'~'z' or 'A'~'Z'
can include '_' or 'a'~'z' or 'A'~'Z' or '0'~'9'
__token_string:
example:
"string"
'string'
if a string does not end with " or ' then lexer will throw an error
__token_number:
example:
2147483647 (integer)
2.71828 (float)
0xdeadbeef (hex) or 0xDEADBEEF (hex)
0o170001 (oct)
1e-1234 (dec) or 10E2 (dec)
__token_operator:
! + - * / ~
= += -= *= /= ~=
== != > >= < <=
('and' 'or' are operators too but they are recognized as operator in generate_detail_token())
() [] {} ; , . : ?
others: __unknown_operator
*/
enum lexer_token_type enum lexer_token_type
{ {
__token_reserve_word=1, __token_reserve_word=1,
@ -29,40 +60,39 @@ enum parse_gen_type
{ {
/* /*
stack end is an important flag for parse token stack to stack end is an important flag for parse token stack to
check if it's stack is at end check if it's stack is at end/empty
if stack is empty,the parser will get a wrong memory space and cause SIGSEGV if stack is empty,the parser will get a wrong memory space and cause SIGSEGV
*/ */
__stack_end=1, __stack_end=1,
// operators == != < <= > >=
__cmp_equal, __cmp_equal,
__cmp_not_equal, __cmp_not_equal,
__cmp_less,__cmp_less_or_equal, __cmp_less,__cmp_less_or_equal,
__cmp_more,__cmp_more_or_equal, __cmp_more,__cmp_more_or_equal, // operators == != < <= > >=
// operators and or ! + - * / ~
__and_operator, __or_operator,__nor_operator, __and_operator, __or_operator,__nor_operator,
__add_operator,__sub_operator, __add_operator,__sub_operator,
__mul_operator,__div_operator,__link_operator, __mul_operator,__div_operator,__link_operator, // operators and or ! + - * / ~
// operators = += -= *= /= ~=
__equal, __equal,
__add_equal,__sub_equal, __add_equal,__sub_equal,
__mul_equal,__div_equal,__link_equal, __mul_equal,__div_equal,__link_equal, // operators = += -= *= /= ~=
// operators {} [] () ; , : . ?
__left_brace,__right_brace, // {} __left_brace,__right_brace, // {}
__left_bracket,__right_bracket, // [] __left_bracket,__right_bracket, // []
__left_curve,__right_curve, // () __left_curve,__right_curve, // ()
__semi,__comma,__colon,__dot,__ques_mark, // ; , : . ? __semi,__comma,__colon,__dot,__ques_mark, // ; , : . ?
__unknown_operator, __unknown_operator,
// reserve words
__var, __var,
__func,__return,__nil, __func,__return,__nil,
__if,__elsif,__else, __if,__elsif,__else,
__continue,__break, __continue,__break,
__for,__forindex,__foreach,__while, __for,__forindex,__foreach,__while, // reserve words
// basic scalar type: number string __number,__string, // basic scalar type: number string
__number,__string,
// basic identifier type: identifier dynamic_identifier __id,__dynamic_id, // basic identifier type: identifier dynamic_identifier
__id,__dynamic_id,
// abstract_syntax_tree type below // abstract_syntax_tree type below
// abstract_syntax_tree also uses the types above, such as operators // abstract_syntax_tree also uses the types above, such as operators

View File

@ -1,38 +1,17 @@
#ifndef __NASAL_LEXER_H__ #ifndef __NASAL_LEXER_H__
#define __NASAL_LEXER_H__ #define __NASAL_LEXER_H__
/* #define IS_IDENTIFIER_HEAD(c) (c=='_')||('a'<=c && c<='z')||('A'<=c&&c<='Z')
__token_reserve_word: #define IS_IDENTIFIER_BODY(c) (c=='_')||('a'<=c && c<='z')||('A'<=c&&c<='Z')||('0'<=c&&c<='9')
for,foreach,forindex,while : loop #define IS_NUMBER_HEAD(c) ('0'<=c&&c<='9')
var,func : definition #define IS_NUMBER_BODY(c) ('0'<=c&&c<='9')||('a'<=c&&c<='f')||('A'<=c&&c<='F')||(c=='e'||c=='E'||c=='.'||c=='x'||c=='o')
break,continue : in loop #define IS_STRING_HEAD(c) (c=='\''||c=='\"')
return : in function // single operators have only one character
if,else,elsif : conditional expr #define IS_SINGLE_OPRATOR(c) (c=='('||c==')'||c=='['||c==']'||c=='{'||c=='}'||c==','||c==';'||c=='|'||c==':'||\
and,or : calculation c=='?'||c=='.'||c=='`'||c=='&'||c=='@'||c=='%'||c=='$'||c=='^'||c=='\\')
nil : special type // calculation operators may have two chars, for example: += -= *= /= ~= != == >= <=
__token_identifier: #define IS_CALC_OPERATOR(c) (c=='='||c=='+'||c=='-'||c=='*'||c=='!'||c=='/'||c=='<'||c=='>'||c=='~')
must begin with '_' or 'a'~'z' or 'A'~'Z' #define IS_NOTE_HEAD(c) (c=='#')
can include '_' or 'a'~'z' or 'A'~'Z' or '0'~'9'
__token_string:
example:
"string"
'string'
if a string does not end with " or ' then lexer will throw an error
__token_number:
example:
2147483647 (integer)
2.71828 (float)
0xdeadbeef (hex) or 0xDEADBEEF (hex)
0o170001 (oct)
1e-1234 (dec) or 10E2 (dec)
__token_operator:
! + - * / ~
= += -= *= /= ~=
== != > >= < <=
('and' 'or' are operators too but they are recognized as operator in generate_detail_token())
() [] {} ; , . : ?
others: __unknown_operator
*/
/* filenames of lib files */ /* filenames of lib files */
#ifndef LIB_FILE_NUM #ifndef LIB_FILE_NUM
@ -52,6 +31,7 @@ const std::string lib_filename[LIB_FILE_NUM]=
"lib/utf8.nas" "lib/utf8.nas"
}; };
#endif #endif
/* reserve words */ /* reserve words */
#ifndef RESERVE_WORD_NUM #ifndef RESERVE_WORD_NUM
#define RESERVE_WORD_NUM 15 #define RESERVE_WORD_NUM 15
@ -141,6 +121,7 @@ void resource_file::delete_all_source()
std::vector<char> tmp; std::vector<char> tmp;
source_code.clear(); source_code.clear();
source_code.swap(tmp); source_code.swap(tmp);
// use tmp's destructor to delete the memory space that source_code used before
return; return;
} }
void resource_file::input_file(std::string filename) void resource_file::input_file(std::string filename)
@ -156,9 +137,7 @@ void resource_file::input_file(std::string filename)
while(!fin.eof()) while(!fin.eof())
{ {
c=fin.get(); c=fin.get();
if(fin.eof()) if(fin.eof()) break;
break;
//source_code.push_back(c<0? '?':c);
source_code.push_back(c); source_code.push_back(c);
} }
fin.close(); fin.close();
@ -178,9 +157,8 @@ void resource_file::load_lib_file()
while(!fin.eof()) while(!fin.eof())
{ {
c=fin.get(); c=fin.get();
if(fin.eof()) if(fin.eof()) break;
break; source_code.push_back(c);
source_code.push_back(c<0? '?':c);
} }
} }
fin.close(); fin.close();
@ -193,26 +171,25 @@ std::vector<char>& resource_file::get_source()
} }
void resource_file::print_resource() void resource_file::print_resource()
{ {
int size=source_code.size();
int line=1; int line=1;
std::cout<<line<<"\t"; std::cout<<line<<"\t";
for(int i=0;i<source_code.size();++i) for(int i=0;i<size;++i)
{ {
if(32<=source_code[i]) if(32<=source_code[i]) std::cout<<source_code[i];
std::cout<<source_code[i];
else if(source_code[i]<0) else if(source_code[i]<0)
{ {
// print unicode
std::string tmp=""; std::string tmp="";
for(;i<source_code.size();++i) for(;i<size;++i)
{ {
if(source_code[i]>=0) if(source_code[i]>=0) break;
break;
tmp.push_back(source_code[i]); tmp.push_back(source_code[i]);
} }
std::cout<<tmp;--i; std::cout<<tmp;--i;
} }
else else std::cout<<" ";
std::cout<<" "; if(i<size && source_code[i]=='\n')
if(source_code[i]=='\n')
{ {
++line; ++line;
std::cout<<std::endl<<line<<"\t"; std::cout<<std::endl<<line<<"\t";
@ -225,42 +202,27 @@ void resource_file::print_resource()
std::string nasal_lexer::identifier_gen(std::vector<char>& res,int& ptr,int& line) std::string nasal_lexer::identifier_gen(std::vector<char>& res,int& ptr,int& line)
{ {
std::string token_str=""; std::string token_str="";
while(res[ptr]=='_' || ('a'<=res[ptr] && res[ptr]<='z') || ('A'<=res[ptr] && res[ptr]<='Z') || ('0'<=res[ptr] && res[ptr]<='9')) while(IS_IDENTIFIER_BODY(res[ptr]))
{ {
token_str+=res[ptr]; token_str+=res[ptr];
++ptr; ++ptr;
if(ptr>=res.size()) if(ptr>=res.size()) break;
break;
} }
// check dynamic identifier "..." // check dynamic identifier "..."
if(res[ptr]=='.') if(ptr+2<res.size() && res[ptr]=='.' && res[ptr+1]=='.' && res[ptr+2]=='.')
{
++ptr;
if(ptr<res.size() && res[ptr]=='.')
{
++ptr;
if(ptr<res.size() && res[ptr]=='.')
{ {
token_str+="..."; token_str+="...";
++ptr; ptr+=3;
}
else
ptr-=2;
}
else
--ptr;
} }
return token_str; return token_str;
// after running this process, ptr will point to the next token's beginning character
} }
std::string nasal_lexer::number_gen(std::vector<char>& res,int& ptr,int& line) std::string nasal_lexer::number_gen(std::vector<char>& res,int& ptr,int& line)
{ {
bool scientific_notation=false; bool scientific_notation=false;// numbers like 1e8 are scientific_notation
std::string token_str=""; std::string token_str="";
while(('0'<=res[ptr] && res[ptr]<='9') || while(IS_NUMBER_BODY(res[ptr]))
('a'<=res[ptr] && res[ptr]<='f') ||
('A'<=res[ptr] && res[ptr]<='F') ||
res[ptr]=='.' || res[ptr]=='x' || res[ptr]=='o' ||
res[ptr]=='e' || res[ptr]=='E')
{ {
token_str+=res[ptr]; token_str+=res[ptr];
if(res[ptr]=='e' || res[ptr]=='E') if(res[ptr]=='e' || res[ptr]=='E')
@ -299,14 +261,12 @@ std::string nasal_lexer::string_gen(std::vector<char>& res,int& ptr,int& line)
std::string token_str=""; std::string token_str="";
char str_begin=res[ptr]; char str_begin=res[ptr];
++ptr; ++ptr;
if(ptr>=res.size()) if(ptr>=res.size()) return token_str;
return token_str;
while(ptr<res.size() && res[ptr]!=str_begin) while(ptr<res.size() && res[ptr]!=str_begin)
{ {
token_str+=res[ptr]; token_str+=res[ptr];
if(res[ptr]=='\n') if(res[ptr]=='\n') ++line;
++line; if(res[ptr]=='\\' && ptr+1<res.size())
if(res[ptr]=='\\')
{ {
++ptr; ++ptr;
switch(res[ptr]) switch(res[ptr])
@ -321,8 +281,7 @@ std::string nasal_lexer::string_gen(std::vector<char>& res,int& ptr,int& line)
} }
} }
++ptr; ++ptr;
if(ptr>=res.size()) if(ptr>=res.size()) break;
break;
} }
// check if this string ends with a " or ' // check if this string ends with a " or '
if(ptr>=res.size()) if(ptr>=res.size())
@ -377,13 +336,12 @@ void nasal_lexer::scanner(std::vector<char>& res)
{ {
while(ptr<res.size() && (res[ptr]==' ' || res[ptr]=='\n' || res[ptr]=='\t' || res[ptr]=='\r' || res[ptr]<0)) while(ptr<res.size() && (res[ptr]==' ' || res[ptr]=='\n' || res[ptr]=='\t' || res[ptr]=='\r' || res[ptr]<0))
{ {
if(res[ptr]=='\n') // these characters will be ignored, and '\n' will cause ++line
++line; if(res[ptr]=='\n') ++line;
++ptr; ++ptr;
} }
if(ptr>=res.size()) if(ptr>=res.size()) break;
break; if(IS_IDENTIFIER_HEAD(res[ptr]))
if(res[ptr]=='_' || ('a'<=res[ptr] && res[ptr]<='z') || ('A'<=res[ptr] && res[ptr]<='Z'))
{ {
token_str=identifier_gen(res,ptr,line); token_str=identifier_gen(res,ptr,line);
token new_token; token new_token;
@ -392,7 +350,7 @@ void nasal_lexer::scanner(std::vector<char>& res)
new_token.str=token_str; new_token.str=token_str;
token_list.push_back(new_token); token_list.push_back(new_token);
} }
else if('0'<=res[ptr] && res[ptr]<='9') else if(IS_NUMBER_HEAD(res[ptr]))
{ {
token_str=number_gen(res,ptr,line); token_str=number_gen(res,ptr,line);
token new_token; token new_token;
@ -401,7 +359,7 @@ void nasal_lexer::scanner(std::vector<char>& res)
new_token.str=token_str; new_token.str=token_str;
token_list.push_back(new_token); token_list.push_back(new_token);
} }
else if(res[ptr]=='\'' || res[ptr]=='\"') else if(IS_STRING_HEAD(res[ptr]))
{ {
token_str=string_gen(res,ptr,line); token_str=string_gen(res,ptr,line);
token new_token; token new_token;
@ -410,10 +368,7 @@ void nasal_lexer::scanner(std::vector<char>& res)
new_token.str=token_str; new_token.str=token_str;
token_list.push_back(new_token); token_list.push_back(new_token);
} }
else if(res[ptr]=='(' || res[ptr]==')' || res[ptr]=='[' || res[ptr]==']' || res[ptr]=='{' || else if(IS_SINGLE_OPRATOR(res[ptr]))
res[ptr]=='}' || res[ptr]==',' || res[ptr]==';' || res[ptr]=='|' || res[ptr]==':' ||
res[ptr]=='?' || res[ptr]=='.' || res[ptr]=='`' || res[ptr]=='&' || res[ptr]=='@' ||
res[ptr]=='%' || res[ptr]=='$' || res[ptr]=='^' || res[ptr]=='\\')
{ {
token_str=""; token_str="";
token_str+=res[ptr]; token_str+=res[ptr];
@ -424,8 +379,7 @@ void nasal_lexer::scanner(std::vector<char>& res)
token_list.push_back(new_token); token_list.push_back(new_token);
++ptr; ++ptr;
} }
else if(res[ptr]=='=' || res[ptr]=='+' || res[ptr]=='-' || res[ptr]=='*' || res[ptr]=='!' || else if(IS_CALC_OPERATOR(res[ptr]))
res[ptr]=='/' || res[ptr]=='<' || res[ptr]=='>' || res[ptr]=='~')
{ {
// get calculation operator // get calculation operator
token_str=""; token_str="";
@ -442,7 +396,7 @@ void nasal_lexer::scanner(std::vector<char>& res)
new_token.str=token_str; new_token.str=token_str;
token_list.push_back(new_token); token_list.push_back(new_token);
} }
else if(res[ptr]=='#') else if(IS_NOTE_HEAD(res[ptr]))
{ {
// avoid note // avoid note
while(ptr<res.size() && res[ptr]!='\n') while(ptr<res.size() && res[ptr]!='\n')

View File

@ -773,7 +773,8 @@ int nasal_runtime::call_function(std::list<std::map<std::string,int> >& local_s
nasal_gc.get_scalar(addr).get_function().get_parameter_list(), nasal_gc.get_scalar(addr).get_function().get_parameter_list(),
nasal_gc.get_scalar(addr).get_function().get_statement_block(), nasal_gc.get_scalar(addr).get_function().get_statement_block(),
*call_node, *call_node,
last_hash_addr); last_hash_addr
);
if(addr<0) if(addr<0)
return -1; return -1;
nasal_gc.reference_delete(tmp_addr); nasal_gc.reference_delete(tmp_addr);
@ -968,6 +969,8 @@ int nasal_runtime::function_generation(std::list<std::map<std::string,int> >& lo
void nasal_runtime::update_closure(std::list<std::map<std::string,int> >& local_scope,int local_scope_addr) void nasal_runtime::update_closure(std::list<std::map<std::string,int> >& local_scope,int local_scope_addr)
{ {
// update_closure // update_closure
// each new function will be updated only once, after updating closure,functions' closure_updated flag will be set true
// but this has a bug, if this new function is a member of vector or hash, it will not be updated
if(!local_scope.size()) if(!local_scope.size())
return; return;
for(std::map<std::string,int>::iterator i=local_scope.back().begin();i!=local_scope.back().end();++i) for(std::map<std::string,int>::iterator i=local_scope.back().begin();i!=local_scope.back().end();++i)