This commit is contained in:
Valk Richard Li 2020-04-07 01:25:56 -07:00 committed by GitHub
parent 8519b134be
commit b8728dd725
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 463 additions and 473 deletions

View File

@ -0,0 +1,9 @@
var system=
{
# print the type of thing on the screen
type:func(thing)
{
nasal_call_inline_scalar_type(thing);
return;
}
};

View File

@ -24,6 +24,7 @@
2.71828 (float) 2.71828 (float)
0xdeadbeef (hex) or 0xDEADBEEF (hex) 0xdeadbeef (hex) or 0xDEADBEEF (hex)
0o170001 (oct) 0o170001 (oct)
1e-1234 (dec) or 10E2 (dec)
__token_operator: __token_operator:
! + - * / ~ ! + - * / ~
= += -= *= /= ~= = += -= *= /= ~=
@ -34,7 +35,9 @@
*/ */
/* filenames of lib files */ /* filenames of lib files */
const std::string lib_filename[10]= #ifndef LIB_FILE_NUM
#define LIB_FILE_NUM 11
const std::string lib_filename[LIB_FILE_NUM]=
{ {
"lib/base.nas", "lib/base.nas",
"lib/bits.nas", "lib/bits.nas",
@ -43,12 +46,16 @@ const std::string lib_filename[10]=
"lib/readline.nas", "lib/readline.nas",
"lib/regex.nas", "lib/regex.nas",
"lib/sqlite.nas", "lib/sqlite.nas",
"lib/system.nas",
"lib/thread.nas", "lib/thread.nas",
"lib/unix.nas", "lib/unix.nas",
"lib/utf8.nas" "lib/utf8.nas"
}; };
#endif
/* reserve words */ /* reserve words */
std::string reserve_word[15]= #ifndef RESERVE_WORD_NUM
#define RESERVE_WORD_NUM 15
std::string reserve_word[RESERVE_WORD_NUM]=
{ {
"for","foreach","forindex","while", "for","foreach","forindex","while",
"var","func","break","continue","return", "var","func","break","continue","return",
@ -57,106 +64,32 @@ std::string reserve_word[15]=
/* check if an identifier is a reserve word */ /* check if an identifier is a reserve word */
int is_reserve_word(std::string str) int is_reserve_word(std::string str)
{ {
for(int i=0;i<15;++i) for(int i=0;i<RESERVE_WORD_NUM;++i)
if(reserve_word[i]==str) if(reserve_word[i]==str)
return __token_reserve_word; return __token_reserve_word;
return __token_identifier; return __token_identifier;
} }
#endif
class resource_file class resource_file
{ {
private: private:
std::list<char> resource; std::vector<char> source_code;
public: public:
/* /*
delete_all_source: clear all the source codes in std::list<char> resource delete_all_source: clear all the source codes in std::list<char> resource
input_file : input source codes by filenames input_file : input source codes by filenames
load_lib_file : input lib source codes load_lib_file : input lib source codes
get_source : get the std::list<char> resource get_source : get the std::vector<char> source_code
print_resource : print source codes print_resource : print source codes
*/ */
resource_file() void delete_all_source();
{ void input_file(std::string);
resource.clear(); void load_lib_file();
return; std::vector<char>& get_source();
} void print_resource();
~resource_file()
{
resource.clear();
return;
}
void delete_all_source()
{
resource.clear();
return;
}
void input_file(std::string filename)
{
char c=0;
std::ifstream fin(filename,std::ios::binary);
if(fin.fail())
{
std::cout<<">> [Resource] cannot open file \'"<<filename<<"\' ."<<std::endl;
fin.close();
return;
}
while(!fin.eof())
{
c=fin.get();
if(fin.eof())
break;
resource.push_back(c);
}
fin.close();
return;
}
void load_lib_file()
{
resource.clear();
for(int i=0;i<10;++i)
{
std::ifstream fin(lib_filename[i],std::ios::binary);
if(fin.fail())
std::cout<<">> [Resource] fatal error: lack \'"<<lib_filename[i]<<"\'"<<std::endl;
else
{
char c=0;
while(!fin.eof())
{
c=fin.get();
if(fin.eof())
break;
resource.push_back(c);
}
}
fin.close();
}
return;
}
std::list<char>& get_source()
{
return resource;
}
void print_resource()
{
int line=1;
std::cout<<line<<"\t";
for(std::list<char>::iterator i=resource.begin();i!=resource.end();++i)
{
if(32<=*i)
std::cout<<*i;
else
std::cout<<" ";
if(*i=='\n')
{
++line;
std::cout<<std::endl<<line<<"\t";
}
}
std::cout<<std::endl;
return;
}
}; };
/* struct token: mainly used in nasal_lexer and nasal_parse*/ /* struct token: mainly used in nasal_lexer and nasal_parse*/
struct token struct token
{ {
@ -178,156 +111,147 @@ class nasal_lexer
std::list<token> token_list; std::list<token> token_list;
std::list<token> detail_token_list; std::list<token> detail_token_list;
int error; int error;
// change utf8 codes into '?' std::string identifier_gen(std::vector<char>&,int&,int&);
// this function will be deleted if there is a way to print utf8 codes out correctly std::string number_gen (std::vector<char>&,int&,int&);
std::string utf8_clear(std::string tmp) std::string string_gen (std::vector<char>&,int&,int&);
{
/*
0xxx xxxx 0x0 1 byte
110x xxxx 0xc0 2 byte
1110 xxxx 0xe0 3 byte
1111 0xxx 0xf0 4 byte
1111 10xx 0xf8 5 byte
1111 110x 0xfc 6 byte
bytes after it is:
10xx xxxx 0x80
so utf-8 format is:
0xxxxxxx
110xxxxx 10xxxxxx
1110xxxx 10xxxxxx 10xxxxxx
11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
*/
unsigned char utf8head[6]={0x0,0xc0,0xe0,0xf0,0xf8,0xfc};
std::string ret="";
for(int i=0;i<tmp.length();++i)
{
if(tmp[i]>=0)
ret+=tmp[i];
else
{
int utf8byte=0;
for(int j=5;j>=0;--j)
if((tmp[i] & utf8head[j])==utf8head[j])
{
utf8byte=j;
break;
}
for(int j=0;j<utf8byte;++j)
++i;
ret+='?';
}
}
return ret;
}
public: public:
/* /*
identifier_gen : scan the source codes and generate identifiers
number_gen : scan the source codes and generate numbers
string_gen : scan the source codes and generate strings
print_token_list : print generated token list print_token_list : print generated token list
scanner : scan the source codes and generate tokens scanner : scan the source codes and generate tokens
generate_detail_token: recognize and change token types to detailed types that can be processed by nasal_parse generate_detail_token: recognize and change token types to detailed types that can be processed by nasal_parse
get_error : get the number of errors that occurred when generating tokens get_error : get the number of errors that occurred when generating tokens
get_detail_token : output the detailed tokens,must be used after generate_detail_token() get_detail_token : output the detailed tokens,must be used after generate_detail_token()
*/ */
nasal_lexer() nasal_lexer();
{ ~nasal_lexer();
token_list.clear(); void delete_all_tokens();
detail_token_list.clear(); void print_token_list();
error=0; void scanner(std::vector<char>&);
return; void generate_detail_token();
} int get_error();
~nasal_lexer() std::list<token>& get_detail_token_list();
{ };
token_list.clear();
detail_token_list.clear();
return;
}
void delete_all_tokens()
{
token_list.clear();
detail_token_list.clear();
error=0;
return;
}
void print_token_list()
{
for(std::list<token>::iterator i=token_list.begin();i!=token_list.end();++i)
{
std::cout<<"line "<<i->line<<" ( ";
print_lexer_token(i->type);
std::cout<<" | "<<i->str<<" )"<<std::endl;
}
return;
}
void scanner(std::list<char>& res)
{
token_list.clear();
detail_token_list.clear();
error=0;
int line=1;
std::string token_str; void resource_file::delete_all_source()
std::list<char>::iterator ptr=res.begin(); {
while(ptr!=res.end()) std::vector<char> tmp;
source_code.clear();
source_code.swap(tmp);
return;
}
void resource_file::input_file(std::string filename)
{
char c=0;
std::ifstream fin(filename,std::ios::binary);
if(fin.fail())
{ {
while(ptr!=res.end() && (*ptr==' ' || *ptr=='\n' || *ptr=='\t' || *ptr=='\r' || *ptr<0 || *ptr>127)) std::cout<<">> [Resource] cannot open file \'"<<filename<<"\' ."<<std::endl;
{ fin.close();
if(*ptr=='\n') return;
++line;
++ptr;
} }
if(ptr==res.end()) while(!fin.eof())
{
c=fin.get();
if(fin.eof())
break; break;
source_code.push_back(c<0? '?':c);
}
fin.close();
return;
}
void resource_file::load_lib_file()
{
source_code.clear();
for(int i=0;i<LIB_FILE_NUM;++i)
{
std::ifstream fin(lib_filename[i],std::ios::binary);
if(fin.fail())
std::cout<<">> [Resource] fatal error: lack \'"<<lib_filename[i]<<"\'"<<std::endl;
else
{
char c=0;
while(!fin.eof())
{
c=fin.get();
if(fin.eof())
break;
source_code.push_back(c<0? '?':c);
}
}
fin.close();
}
return;
}
std::vector<char>& resource_file::get_source()
{
return source_code;
}
void resource_file::print_resource()
{
int line=1;
std::cout<<line<<"\t";
for(int i=0;i<source_code.size();++i)
{
if(32<=source_code[i])
std::cout<<source_code[i];
else
std::cout<<" ";
if(source_code[i]=='\n')
{
++line;
std::cout<<std::endl<<line<<"\t";
}
}
std::cout<<std::endl;
return;
}
if(*ptr=='_' || ('a'<=*ptr && *ptr<='z') || ('A'<=*ptr && *ptr<='Z')) std::string nasal_lexer::identifier_gen(std::vector<char>& res,int& ptr,int& line)
{
std::string token_str="";
while(res[ptr]=='_' || ('a'<=res[ptr] && res[ptr]<='z') || ('A'<=res[ptr] && res[ptr]<='Z') || ('0'<=res[ptr] && res[ptr]<='9'))
{ {
// get identifier or reserve word token_str+=res[ptr];
token_str="";
while(*ptr=='_' || ('a'<=*ptr && *ptr<='z') || ('A'<=*ptr && *ptr<='Z') || ('0'<=*ptr && *ptr<='9'))
{
token_str+=*ptr;
++ptr; ++ptr;
if(ptr==res.end()) if(ptr>=res.size())
break; break;
} }
// check dynamic identifier "..." // check dynamic identifier "..."
if(*ptr=='.') if(res[ptr]=='.')
{ {
++ptr; ++ptr;
if(ptr!=res.end() && *ptr=='.') if(ptr<res.size() && res[ptr]=='.')
{ {
++ptr; ++ptr;
if(ptr!=res.end() && *ptr=='.') if(ptr<res.size() && res[ptr]=='.')
{ {
token_str+="..."; token_str+="...";
++ptr; ++ptr;
} }
else else
{ ptr-=2;
--ptr;
--ptr;
}
} }
else else
--ptr; --ptr;
} }
token new_token; return token_str;
new_token.line=line; }
new_token.type=is_reserve_word(token_str); std::string nasal_lexer::number_gen(std::vector<char>& res,int& ptr,int& line)
new_token.str=token_str; {
token_list.push_back(new_token); std::string token_str="";
if(ptr==res.end()) while(('0'<=res[ptr] && res[ptr]<='9') ||
break; ('a'<=res[ptr] && res[ptr]<='f') ||
} ('A'<=res[ptr] && res[ptr]<='F') ||
else if('0'<=*ptr && *ptr<='9') res[ptr]=='.' || res[ptr]=='x' || res[ptr]=='o' ||
res[ptr]=='e' || res[ptr]=='E' || res[ptr]=='-')
{ {
token_str=""; token_str+=res[ptr];
while(('0'<=*ptr && *ptr<='9') || ('a'<=*ptr && *ptr<='f') || ('A'<=*ptr && *ptr<='F') || *ptr=='.' || *ptr=='x' || *ptr=='o')
{
token_str+=*ptr;
++ptr; ++ptr;
if(ptr==res.end()) if(ptr>=res.size())
break; break;
} }
if(!check_numerable_string(token_str)) if(!check_numerable_string(token_str))
@ -336,45 +260,24 @@ class nasal_lexer
std::cout<<">> [Lexer] line "<<line<<": "<<token_str<<" is not a numerable string."<<std::endl; std::cout<<">> [Lexer] line "<<line<<": "<<token_str<<" is not a numerable string."<<std::endl;
token_str="0"; token_str="0";
} }
token new_token; return token_str;
new_token.line=line; }
new_token.type=__token_number; std::string nasal_lexer::string_gen(std::vector<char>& res,int& ptr,int& line)
new_token.str=token_str; {
token_list.push_back(new_token); std::string token_str="";
if(ptr==res.end()) char str_begin=res[ptr];
break;
}
else if(*ptr=='(' || *ptr==')' || *ptr=='[' || *ptr==']' || *ptr=='{' ||
*ptr=='}' || *ptr==',' || *ptr==';' || *ptr=='|' || *ptr==':' ||
*ptr=='?' || *ptr=='.' || *ptr=='`' || *ptr=='&' || *ptr=='@' ||
*ptr=='%' || *ptr=='$' || *ptr=='^' || *ptr=='\\')
{
token_str="";
token_str+=*ptr;
token new_token;
new_token.line=line;
new_token.type=__token_operator;
new_token.str=token_str;
token_list.push_back(new_token);
++ptr; ++ptr;
if(ptr==res.end()) if(ptr>=res.size())
break; return token_str;
} while(ptr<res.size() && res[ptr]!=str_begin)
else if(*ptr=='\'' || *ptr=='\"')
{ {
// get string token_str+=res[ptr];
char str_begin=*ptr; if(res[ptr]=='\n')
token_str=""; ++line;
++ptr; if(res[ptr]=='\\')
if(ptr==res.end())
break;
while(*ptr!=str_begin && ptr!=res.end())
{
token_str+=*ptr;
if(*ptr=='\\')
{ {
++ptr; ++ptr;
switch(*ptr) switch(res[ptr])
{ {
case '\\':token_str.pop_back();token_str.push_back('\\');break; case '\\':token_str.pop_back();token_str.push_back('\\');break;
case 'r': token_str.pop_back();token_str.push_back('\r');break; case 'r': token_str.pop_back();token_str.push_back('\r');break;
@ -382,41 +285,123 @@ class nasal_lexer
case 'n': token_str.pop_back();token_str.push_back('\n');break; case 'n': token_str.pop_back();token_str.push_back('\n');break;
case '\'':token_str.pop_back();token_str.push_back('\'');break; case '\'':token_str.pop_back();token_str.push_back('\'');break;
case '\"':token_str.pop_back();token_str.push_back('\"');break; case '\"':token_str.pop_back();token_str.push_back('\"');break;
default: token_str.push_back(*ptr);break; default: token_str.push_back(res[ptr]);break;
} }
} }
++ptr; ++ptr;
if(ptr==res.end()) if(ptr>=res.size())
break; break;
} }
// check if this string ends with a " or ' // check if this string ends with a " or '
if(ptr==res.end() || *ptr!=str_begin) if(ptr>=res.size())
{ {
++error; ++error;
std::cout<<">> [Lexer] line "<<line<<": this string must have a \' "<<str_begin<<" \' as its end."<<std::endl; std::cout<<">> [Lexer] line "<<line<<": this string must have a \' "<<str_begin<<" \' as its end."<<std::endl;
--ptr; --ptr;
} }
else ++ptr;
return token_str;
}
nasal_lexer::nasal_lexer()
{
token_list.clear();
detail_token_list.clear();
error=0;
return;
}
nasal_lexer::~nasal_lexer()
{
token_list.clear();
detail_token_list.clear();
return;
}
void nasal_lexer::delete_all_tokens()
{
token_list.clear();
detail_token_list.clear();
error=0;
return;
}
void nasal_lexer::print_token_list()
{
for(std::list<token>::iterator i=token_list.begin();i!=token_list.end();++i)
{ {
std::cout<<"line "<<i->line<<" ( ";
print_lexer_token(i->type);
std::cout<<" | "<<i->str<<" )"<<std::endl;
}
return;
}
void nasal_lexer::scanner(std::vector<char>& res)
{
token_list.clear();
detail_token_list.clear();
error=0;
int line=1;
std::string token_str;
int ptr=0;
while(ptr<res.size())
{
while(ptr<res.size() && (res[ptr]==' ' || res[ptr]=='\n' || res[ptr]=='\t' || res[ptr]=='\r' || res[ptr]<0))
{
if(res[ptr]=='\n')
++line;
++ptr;
}
if(ptr>=res.size())
break;
if(res[ptr]=='_' || ('a'<=res[ptr] && res[ptr]<='z') || ('A'<=res[ptr] && res[ptr]<='Z'))
{
token_str=identifier_gen(res,ptr,line);
token new_token;
new_token.line=line;
new_token.type=is_reserve_word(token_str);
new_token.str=token_str;
token_list.push_back(new_token);
}
else if('0'<=res[ptr] && res[ptr]<='9')
{
token_str=number_gen(res,ptr,line);
token new_token;
new_token.line=line;
new_token.type=__token_number;
new_token.str=token_str;
token_list.push_back(new_token);
}
else if(res[ptr]=='\'' || res[ptr]=='\"')
{
token_str=string_gen(res,ptr,line);
token new_token; token new_token;
new_token.line=line; new_token.line=line;
new_token.type=__token_string; new_token.type=__token_string;
new_token.str=utf8_clear(token_str); new_token.str=token_str;
token_list.push_back(new_token); token_list.push_back(new_token);
} }
else if(res[ptr]=='(' || res[ptr]==')' || res[ptr]=='[' || res[ptr]==']' || res[ptr]=='{' ||
res[ptr]=='}' || res[ptr]==',' || res[ptr]==';' || res[ptr]=='|' || res[ptr]==':' ||
res[ptr]=='?' || res[ptr]=='.' || res[ptr]=='`' || res[ptr]=='&' || res[ptr]=='@' ||
res[ptr]=='%' || res[ptr]=='$' || res[ptr]=='^' || res[ptr]=='\\')
{
token_str="";
token_str+=res[ptr];
token new_token;
new_token.line=line;
new_token.type=__token_operator;
new_token.str=token_str;
token_list.push_back(new_token);
++ptr; ++ptr;
if(ptr==res.end())
break;
} }
else if(*ptr=='=' || *ptr=='+' || *ptr=='-' || *ptr=='*' || *ptr=='!' || *ptr=='/' || *ptr=='<' || *ptr=='>' || *ptr=='~') else if(res[ptr]=='=' || res[ptr]=='+' || res[ptr]=='-' || res[ptr]=='*' || res[ptr]=='!' ||
res[ptr]=='/' || res[ptr]=='<' || res[ptr]=='>' || res[ptr]=='~')
{ {
// get calculation operator // get calculation operator
token_str=""; token_str="";
token_str+=*ptr; token_str+=res[ptr];
++ptr; ++ptr;
if(ptr!=res.end() && *ptr=='=') if(ptr<res.size() && res[ptr]=='=')
{ {
token_str+=*ptr; token_str+=res[ptr];
++ptr; ++ptr;
} }
token new_token; token new_token;
@ -424,16 +409,14 @@ class nasal_lexer
new_token.type=__token_operator; new_token.type=__token_operator;
new_token.str=token_str; new_token.str=token_str;
token_list.push_back(new_token); token_list.push_back(new_token);
if(ptr==res.end())
break;
} }
else if(*ptr=='#') else if(res[ptr]=='#')
{ {
// avoid note // avoid note
while(ptr!=res.end() && *ptr!='\n') while(ptr<res.size() && res[ptr]!='\n')
++ptr; ++ptr;
if(ptr==res.end()) // after this process ptr will point to a '\n'
break; // don't ++ptr then the counter for line can work correctly
} }
else else
{ {
@ -444,9 +427,9 @@ class nasal_lexer
} }
std::cout<<">> [Lexer] complete scanning. "<<error<<" error(s)."<<std::endl; std::cout<<">> [Lexer] complete scanning. "<<error<<" error(s)."<<std::endl;
return; return;
} }
void generate_detail_token() void nasal_lexer::generate_detail_token()
{ {
token detail_token; token detail_token;
detail_token_list.clear(); detail_token_list.clear();
for(std::list<token>::iterator i=token_list.begin();i!=token_list.end();++i) for(std::list<token>::iterator i=token_list.begin();i!=token_list.end();++i)
@ -469,7 +452,7 @@ class nasal_lexer
{ {
detail_token.line=i->line; detail_token.line=i->line;
detail_token.str =""; detail_token.str ="";
if(i->str=="for") detail_token.type=__for; if (i->str=="for") detail_token.type=__for;
else if(i->str=="foreach") detail_token.type=__foreach; else if(i->str=="foreach") detail_token.type=__foreach;
else if(i->str=="forindex") detail_token.type=__forindex; else if(i->str=="forindex") detail_token.type=__forindex;
else if(i->str=="while") detail_token.type=__while; else if(i->str=="while") detail_token.type=__while;
@ -512,7 +495,7 @@ class nasal_lexer
{ {
detail_token.line=i->line; detail_token.line=i->line;
detail_token.str =""; detail_token.str ="";
if(i->str=="+") detail_token.type=__add_operator; if (i->str=="+") detail_token.type=__add_operator;
else if(i->str=="-") detail_token.type=__sub_operator; else if(i->str=="-") detail_token.type=__sub_operator;
else if(i->str=="*") detail_token.type=__mul_operator; else if(i->str=="*") detail_token.type=__mul_operator;
else if(i->str=="/") detail_token.type=__div_operator; else if(i->str=="/") detail_token.type=__div_operator;
@ -552,15 +535,13 @@ class nasal_lexer
} }
std::cout<<">> [Lexer] complete generating. "<<error<<" error(s)."<<std::endl; std::cout<<">> [Lexer] complete generating. "<<error<<" error(s)."<<std::endl;
return; return;
} }
int get_error() int nasal_lexer::get_error()
{ {
return error; return error;
} }
std::list<token>& get_detail_token_list() std::list<token>& nasal_lexer::get_detail_token_list()
{ {
return detail_token_list; return detail_token_list;
} }
};
#endif #endif

View File

@ -3,7 +3,6 @@
#define nas_lib_func_num 32 #define nas_lib_func_num 32
std::string inline_func_name[nas_lib_func_num]= std::string inline_func_name[nas_lib_func_num]=
{ {
"nasal_call_inline_scalar_type",
//base.nas //base.nas
"nasal_call_inline_push_back", "nasal_call_inline_push_back",
"nasal_call_inline_push_null", "nasal_call_inline_push_null",
@ -39,7 +38,8 @@ std::string inline_func_name[nas_lib_func_num]=
"nasal_call_inline_cpp_math_ln", "nasal_call_inline_cpp_math_ln",
"nasal_call_inline_cpp_math_sqrt", "nasal_call_inline_cpp_math_sqrt",
"nasal_call_inline_cpp_atan2", "nasal_call_inline_cpp_atan2",
// //system.nas
"nasal_call_inline_scalar_type"
}; };
class nasal_runtime class nasal_runtime