update
This commit is contained in:
parent
269d81ae5b
commit
6f5143657e
2
lib.nas
2
lib.nas
|
@ -86,7 +86,7 @@ var die=func(str)
|
||||||
nasal_call_builtin_die(str);
|
nasal_call_builtin_die(str);
|
||||||
return nil;
|
return nil;
|
||||||
}
|
}
|
||||||
var type=func(object)
|
var typeof=func(object)
|
||||||
{
|
{
|
||||||
return nasal_call_builtin_type(object);
|
return nasal_call_builtin_type(object);
|
||||||
}
|
}
|
||||||
|
|
|
@ -140,7 +140,7 @@ enum parse_error
|
||||||
void error_info(int line,int error_type,std::string error_str="")
|
void error_info(int line,int error_type,std::string error_str="")
|
||||||
{
|
{
|
||||||
std::string detail;
|
std::string detail;
|
||||||
std::cout<<">> [parse] error: [line "<<line<<"] ";
|
std::cout<<">> [parse] line "<<line<<": ";
|
||||||
switch(error_type)
|
switch(error_type)
|
||||||
{
|
{
|
||||||
case unknown: std::cout<<"unknown error.\n"; break;
|
case unknown: std::cout<<"unknown error.\n"; break;
|
||||||
|
|
131
nasal_lexer.h
131
nasal_lexer.h
|
@ -1,26 +1,23 @@
|
||||||
#ifndef __NASAL_LEXER_H__
|
#ifndef __NASAL_LEXER_H__
|
||||||
#define __NASAL_LEXER_H__
|
#define __NASAL_LEXER_H__
|
||||||
|
|
||||||
#define IS_IDENTIFIER_HEAD(c) ((c=='_')||('a'<=c && c<='z')||('A'<=c&&c<='Z'))
|
#define IS_IDENTIFIER(c) ((c=='_')||('a'<=c && c<='z')||('A'<=c&&c<='Z'))
|
||||||
#define IS_IDENTIFIER_BODY(c) ((c=='_')||('a'<=c && c<='z')||('A'<=c&&c<='Z')||('0'<=c&&c<='9'))
|
|
||||||
#define IS_HEX_NUMBER(c) (('0'<=c&&c<='9')||('a'<=c&&c<='f')||('A'<=c && c<='F'))
|
#define IS_HEX_NUMBER(c) (('0'<=c&&c<='9')||('a'<=c&&c<='f')||('A'<=c && c<='F'))
|
||||||
#define IS_OCT_NUMEBR(c) ('0'<=c&&c<='7')
|
#define IS_OCT_NUMEBR(c) ('0'<=c&&c<='7')
|
||||||
#define IS_DIGIT(c) ('0'<=c&&c<='9')
|
#define IS_DIGIT(c) ('0'<=c&&c<='9')
|
||||||
#define IS_STRING_HEAD(c) (c=='\''||c=='\"')
|
#define IS_STRING(c) (c=='\''||c=='\"'||c=='`')
|
||||||
// single operators have only one character
|
// single operators have only one character
|
||||||
#define IS_SINGLE_OPRATOR(c) (c=='('||c==')'||c=='['||c==']'||c=='{'||c=='}'||c==','||c==';'||c=='|'||c==':'||\
|
#define IS_SINGLE_OPRATOR(c) (c=='('||c==')'||c=='['||c==']'||c=='{'||c=='}'||c==','||c==';'||c=='|'||c==':'||\
|
||||||
c=='?'||c=='`'||c=='&'||c=='@'||c=='%'||c=='$'||c=='^'||c=='\\')
|
c=='?'||c=='`'||c=='&'||c=='@'||c=='%'||c=='$'||c=='^'||c=='\\')
|
||||||
// calculation operators may have two chars, for example: += -= *= /= ~= != == >= <=
|
// calculation operators may have two chars, for example: += -= *= /= ~= != == >= <=
|
||||||
#define IS_CALC_OPERATOR(c) (c=='='||c=='+'||c=='-'||c=='*'||c=='!'||c=='/'||c=='<'||c=='>'||c=='~')
|
#define IS_CALC_OPERATOR(c) (c=='='||c=='+'||c=='-'||c=='*'||c=='!'||c=='/'||c=='<'||c=='>'||c=='~')
|
||||||
#define IS_NOTE_HEAD(c) (c=='#')
|
#define IS_NOTE(c) (c=='#')
|
||||||
|
|
||||||
#ifndef TOKEN_TABLE_SIZE
|
struct
|
||||||
#define TOKEN_TABLE_SIZE 45
|
|
||||||
struct token_table
|
|
||||||
{
|
{
|
||||||
std::string str;
|
const char* str;
|
||||||
int tok_type;
|
int tok_type;
|
||||||
}tok_tbl[TOKEN_TABLE_SIZE]=
|
}token_table[]=
|
||||||
{
|
{
|
||||||
{"for" ,tok_for },
|
{"for" ,tok_for },
|
||||||
{"forindex",tok_forindex },
|
{"forindex",tok_forindex },
|
||||||
|
@ -67,8 +64,8 @@ struct token_table
|
||||||
{">" ,tok_greater_than },
|
{">" ,tok_greater_than },
|
||||||
{"<=" ,tok_less_equal },
|
{"<=" ,tok_less_equal },
|
||||||
{">=" ,tok_greater_equal},
|
{">=" ,tok_greater_equal},
|
||||||
|
{NULL ,-1 }
|
||||||
};
|
};
|
||||||
#endif
|
|
||||||
|
|
||||||
struct token
|
struct token
|
||||||
{
|
{
|
||||||
|
@ -84,15 +81,16 @@ private:
|
||||||
int res_size;
|
int res_size;
|
||||||
int line;
|
int line;
|
||||||
int ptr;
|
int ptr;
|
||||||
|
std::string line_code;
|
||||||
std::vector<char> res;
|
std::vector<char> res;
|
||||||
std::vector<token> token_list;
|
std::vector<token> token_list;
|
||||||
std::string identifier_gen();
|
std::string identifier_gen();
|
||||||
void generate_number_error(int,std::string);
|
|
||||||
std::string number_gen();
|
std::string number_gen();
|
||||||
std::string string_gen();
|
std::string string_gen();
|
||||||
public:
|
public:
|
||||||
void clear();
|
void clear();
|
||||||
void openfile(std::string);
|
void openfile(std::string);
|
||||||
|
void die(std::string,int,int);
|
||||||
void scanner();
|
void scanner();
|
||||||
void print_token();
|
void print_token();
|
||||||
int get_error();
|
int get_error();
|
||||||
|
@ -105,6 +103,7 @@ void nasal_lexer::clear()
|
||||||
res_size=0;
|
res_size=0;
|
||||||
line=0;
|
line=0;
|
||||||
ptr=0;
|
ptr=0;
|
||||||
|
line_code="";
|
||||||
res.clear();
|
res.clear();
|
||||||
token_list.clear();
|
token_list.clear();
|
||||||
return;
|
return;
|
||||||
|
@ -134,21 +133,23 @@ void nasal_lexer::openfile(std::string filename)
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void nasal_lexer::die(std::string error_info,int line=-1,int column=-1)
|
||||||
|
{
|
||||||
|
++error;
|
||||||
|
std::cout<<">> [lexer] line "<<line<<" column "<<column<<": "<<error_info<<"\n";
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
std::string nasal_lexer::identifier_gen()
|
std::string nasal_lexer::identifier_gen()
|
||||||
{
|
{
|
||||||
std::string token_str="";
|
std::string token_str="";
|
||||||
while(ptr<res_size && IS_IDENTIFIER_BODY(res[ptr]))
|
while(ptr<res_size && (IS_IDENTIFIER(res[ptr])||IS_DIGIT(res[ptr])))
|
||||||
token_str+=res[ptr++];
|
token_str+=res[ptr++];
|
||||||
|
line_code+=token_str;
|
||||||
return token_str;
|
return token_str;
|
||||||
// after running this process, ptr will point to the next token's beginning character
|
// after running this process, ptr will point to the next token's beginning character
|
||||||
}
|
}
|
||||||
|
|
||||||
void nasal_lexer::generate_number_error(int line,std::string token_str)
|
|
||||||
{
|
|
||||||
++error;
|
|
||||||
std::cout<<">> [lexer] line "<<line<<": \""<<token_str<<"\" is not a correct number.\n";
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
std::string nasal_lexer::number_gen()
|
std::string nasal_lexer::number_gen()
|
||||||
{
|
{
|
||||||
bool scientific_notation=false;// numbers like 1e8 are scientific_notation
|
bool scientific_notation=false;// numbers like 1e8 are scientific_notation
|
||||||
|
@ -160,9 +161,10 @@ std::string nasal_lexer::number_gen()
|
||||||
ptr+=2;
|
ptr+=2;
|
||||||
while(ptr<res_size && IS_HEX_NUMBER(res[ptr]))
|
while(ptr<res_size && IS_HEX_NUMBER(res[ptr]))
|
||||||
token_str+=res[ptr++];
|
token_str+=res[ptr++];
|
||||||
|
line_code+=token_str;
|
||||||
if(token_str=="0x")
|
if(token_str=="0x")
|
||||||
{
|
{
|
||||||
generate_number_error(line,token_str);
|
die("["+line_code+"_] incorrect number.",line,line_code.length());
|
||||||
return "0";
|
return "0";
|
||||||
}
|
}
|
||||||
return token_str;
|
return token_str;
|
||||||
|
@ -174,17 +176,16 @@ std::string nasal_lexer::number_gen()
|
||||||
ptr+=2;
|
ptr+=2;
|
||||||
while(ptr<res_size && IS_OCT_NUMEBR(res[ptr]))
|
while(ptr<res_size && IS_OCT_NUMEBR(res[ptr]))
|
||||||
token_str+=res[ptr++];
|
token_str+=res[ptr++];
|
||||||
|
line_code+=token_str;
|
||||||
if(token_str=="0o")
|
if(token_str=="0o")
|
||||||
{
|
{
|
||||||
generate_number_error(line,token_str);
|
die("["+line_code+"_] incorrect number.",line,line_code.length());
|
||||||
return "0";
|
return "0";
|
||||||
}
|
}
|
||||||
return token_str;
|
return token_str;
|
||||||
}
|
}
|
||||||
// generate dec number
|
// generate dec number
|
||||||
// dec number -> 0|[1~9][0~9]*(.[0~9]*)(e|E(+|-)0|[1~9][0~9]*)
|
// dec number -> [0~9][0~9]*(.[0~9]*)(e|E(+|-)0|[1~9][0~9]*)
|
||||||
if(ptr<res_size && res[ptr]=='0')
|
|
||||||
token_str+=res[ptr++];
|
|
||||||
while(ptr<res_size && IS_DIGIT(res[ptr]))
|
while(ptr<res_size && IS_DIGIT(res[ptr]))
|
||||||
token_str+=res[ptr++];
|
token_str+=res[ptr++];
|
||||||
if(ptr<res_size && res[ptr]=='.')
|
if(ptr<res_size && res[ptr]=='.')
|
||||||
|
@ -193,7 +194,8 @@ std::string nasal_lexer::number_gen()
|
||||||
// "xxxx." is not a correct number
|
// "xxxx." is not a correct number
|
||||||
if(ptr>=res_size)
|
if(ptr>=res_size)
|
||||||
{
|
{
|
||||||
generate_number_error(line,token_str);
|
line_code+=token_str;
|
||||||
|
die("["+line_code+"_] incorrect number.",line,line_code.length());
|
||||||
return "0";
|
return "0";
|
||||||
}
|
}
|
||||||
while(ptr<res_size && IS_DIGIT(res[ptr]))
|
while(ptr<res_size && IS_DIGIT(res[ptr]))
|
||||||
|
@ -201,7 +203,8 @@ std::string nasal_lexer::number_gen()
|
||||||
// "xxxx." is not a correct number
|
// "xxxx." is not a correct number
|
||||||
if(token_str.back()=='.')
|
if(token_str.back()=='.')
|
||||||
{
|
{
|
||||||
generate_number_error(line,token_str);
|
line_code+=token_str;
|
||||||
|
die("["+line_code+"_] incorrect number.",line,line_code.length());
|
||||||
return "0";
|
return "0";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -211,14 +214,16 @@ std::string nasal_lexer::number_gen()
|
||||||
// "xxxe" is not a correct number
|
// "xxxe" is not a correct number
|
||||||
if(ptr>=res_size)
|
if(ptr>=res_size)
|
||||||
{
|
{
|
||||||
generate_number_error(line,token_str);
|
line_code+=token_str;
|
||||||
|
die("["+line_code+"_] incorrect number.",line,line_code.length());
|
||||||
return "0";
|
return "0";
|
||||||
}
|
}
|
||||||
if(ptr<res_size && (res[ptr]=='-' || res[ptr]=='+'))
|
if(ptr<res_size && (res[ptr]=='-' || res[ptr]=='+'))
|
||||||
token_str+=res[ptr++];
|
token_str+=res[ptr++];
|
||||||
if(ptr>=res_size)
|
if(ptr>=res_size)
|
||||||
{
|
{
|
||||||
generate_number_error(line,token_str);
|
line_code+=token_str;
|
||||||
|
die("["+line_code+"_] incorrect number.",line,line_code.length());
|
||||||
return "0";
|
return "0";
|
||||||
}
|
}
|
||||||
if(ptr<res_size && res[ptr]=='0')
|
if(ptr<res_size && res[ptr]=='0')
|
||||||
|
@ -228,24 +233,32 @@ std::string nasal_lexer::number_gen()
|
||||||
// "xxxe(-|+)" is not a correct number
|
// "xxxe(-|+)" is not a correct number
|
||||||
if(token_str.back()=='e' || token_str.back()=='E' || token_str.back()=='-' || token_str.back()=='+')
|
if(token_str.back()=='e' || token_str.back()=='E' || token_str.back()=='-' || token_str.back()=='+')
|
||||||
{
|
{
|
||||||
generate_number_error(line,token_str);
|
line_code+=token_str;
|
||||||
|
die("["+line_code+"_] incorrect number.",line,line_code.length());
|
||||||
return "0";
|
return "0";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
line_code+=token_str;
|
||||||
return token_str;
|
return token_str;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string nasal_lexer::string_gen()
|
std::string nasal_lexer::string_gen()
|
||||||
{
|
{
|
||||||
std::string token_str="";
|
std::string token_str="";
|
||||||
|
line_code+=res[ptr];
|
||||||
char str_begin=res[ptr++];
|
char str_begin=res[ptr++];
|
||||||
if(ptr>=res_size) return token_str;
|
|
||||||
while(ptr<res_size && res[ptr]!=str_begin)
|
while(ptr<res_size && res[ptr]!=str_begin)
|
||||||
{
|
{
|
||||||
if(res[ptr]=='\n') ++line;
|
line_code+=res[ptr];
|
||||||
if(res[ptr]=='\\' && ptr+1<res.size())
|
if(res[ptr]=='\n')
|
||||||
|
{
|
||||||
|
line_code="";
|
||||||
|
++line;
|
||||||
|
}
|
||||||
|
if(res[ptr]=='\\' && ptr+1<res_size)
|
||||||
{
|
{
|
||||||
++ptr;
|
++ptr;
|
||||||
|
line_code+=res[ptr];
|
||||||
switch(res[ptr])
|
switch(res[ptr])
|
||||||
{
|
{
|
||||||
case 'a':token_str.push_back('\a');break;
|
case 'a':token_str.push_back('\a');break;
|
||||||
|
@ -269,10 +282,7 @@ std::string nasal_lexer::string_gen()
|
||||||
}
|
}
|
||||||
// check if this string ends with a " or '
|
// check if this string ends with a " or '
|
||||||
if(ptr>=res_size)
|
if(ptr>=res_size)
|
||||||
{
|
die("["+line_code+"_] get EOF when generating string.",line,line_code.length());
|
||||||
++error;
|
|
||||||
std::cout<<">> [lexer] line "<<line<<": get EOF when generating string.\n";
|
|
||||||
}
|
|
||||||
++ptr;
|
++ptr;
|
||||||
return token_str;
|
return token_str;
|
||||||
}
|
}
|
||||||
|
@ -282,6 +292,7 @@ void nasal_lexer::scanner()
|
||||||
token_list.clear();
|
token_list.clear();
|
||||||
line=1;
|
line=1;
|
||||||
ptr=0;
|
ptr=0;
|
||||||
|
line_code="";
|
||||||
|
|
||||||
std::string token_str;
|
std::string token_str;
|
||||||
while(ptr<res_size)
|
while(ptr<res_size)
|
||||||
|
@ -289,21 +300,26 @@ void nasal_lexer::scanner()
|
||||||
while(ptr<res_size && (res[ptr]==' ' || res[ptr]=='\n' || res[ptr]=='\t' || res[ptr]=='\r' || res[ptr]<0))
|
while(ptr<res_size && (res[ptr]==' ' || res[ptr]=='\n' || res[ptr]=='\t' || res[ptr]=='\r' || res[ptr]<0))
|
||||||
{
|
{
|
||||||
// these characters will be ignored, and '\n' will cause ++line
|
// these characters will be ignored, and '\n' will cause ++line
|
||||||
if(res[ptr]=='\n') ++line;
|
line_code+=res[ptr];
|
||||||
|
if(res[ptr]=='\n')
|
||||||
|
{
|
||||||
|
++line;
|
||||||
|
line_code="";
|
||||||
|
}
|
||||||
++ptr;
|
++ptr;
|
||||||
}
|
}
|
||||||
if(ptr>=res_size) break;
|
if(ptr>=res_size) break;
|
||||||
if(IS_IDENTIFIER_HEAD(res[ptr]))
|
if(IS_IDENTIFIER(res[ptr]))
|
||||||
{
|
{
|
||||||
token_str=identifier_gen();
|
token_str=identifier_gen();
|
||||||
token new_token;
|
token new_token;
|
||||||
new_token.line=line;
|
new_token.line=line;
|
||||||
new_token.str=token_str;
|
new_token.str=token_str;
|
||||||
new_token.type=0;
|
new_token.type=0;
|
||||||
for(int i=0;i<TOKEN_TABLE_SIZE;++i)
|
for(int i=0;token_table[i].str;++i)
|
||||||
if(token_str==tok_tbl[i].str)
|
if(token_str==token_table[i].str)
|
||||||
{
|
{
|
||||||
new_token.type=tok_tbl[i].tok_type;
|
new_token.type=token_table[i].tok_type;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if(!new_token.type)
|
if(!new_token.type)
|
||||||
|
@ -319,7 +335,7 @@ void nasal_lexer::scanner()
|
||||||
new_token.type=tok_number;
|
new_token.type=tok_number;
|
||||||
token_list.push_back(new_token);
|
token_list.push_back(new_token);
|
||||||
}
|
}
|
||||||
else if(IS_STRING_HEAD(res[ptr]))
|
else if(IS_STRING(res[ptr]))
|
||||||
{
|
{
|
||||||
token_str=string_gen();
|
token_str=string_gen();
|
||||||
token new_token;
|
token new_token;
|
||||||
|
@ -332,15 +348,19 @@ void nasal_lexer::scanner()
|
||||||
{
|
{
|
||||||
token_str="";
|
token_str="";
|
||||||
token_str+=res[ptr];
|
token_str+=res[ptr];
|
||||||
|
line_code+=res[ptr];
|
||||||
token new_token;
|
token new_token;
|
||||||
new_token.line=line;
|
new_token.line=line;
|
||||||
new_token.str=token_str;
|
new_token.str=token_str;
|
||||||
for(int i=0;i<TOKEN_TABLE_SIZE;++i)
|
new_token.type=-1;
|
||||||
if(token_str==tok_tbl[i].str)
|
for(int i=0;token_table[i].str;++i)
|
||||||
|
if(token_str==token_table[i].str)
|
||||||
{
|
{
|
||||||
new_token.type=tok_tbl[i].tok_type;
|
new_token.type=token_table[i].tok_type;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
if(new_token.type<0)
|
||||||
|
die("["+line_code+"_] incorrect operator.",line,line_code.length());
|
||||||
token_list.push_back(new_token);
|
token_list.push_back(new_token);
|
||||||
++ptr;
|
++ptr;
|
||||||
}
|
}
|
||||||
|
@ -356,13 +376,14 @@ void nasal_lexer::scanner()
|
||||||
token_str=".";
|
token_str=".";
|
||||||
++ptr;
|
++ptr;
|
||||||
}
|
}
|
||||||
|
line_code+=token_str;
|
||||||
token new_token;
|
token new_token;
|
||||||
new_token.line=line;
|
new_token.line=line;
|
||||||
new_token.str=token_str;
|
new_token.str=token_str;
|
||||||
for(int i=0;i<TOKEN_TABLE_SIZE;++i)
|
for(int i=0;token_table[i].str;++i)
|
||||||
if(token_str==tok_tbl[i].str)
|
if(token_str==token_table[i].str)
|
||||||
{
|
{
|
||||||
new_token.type=tok_tbl[i].tok_type;
|
new_token.type=token_table[i].tok_type;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
token_list.push_back(new_token);
|
token_list.push_back(new_token);
|
||||||
|
@ -370,26 +391,26 @@ void nasal_lexer::scanner()
|
||||||
else if(IS_CALC_OPERATOR(res[ptr]))
|
else if(IS_CALC_OPERATOR(res[ptr]))
|
||||||
{
|
{
|
||||||
// get calculation operator
|
// get calculation operator
|
||||||
token_str="";
|
token_str=res[ptr];
|
||||||
token_str+=res[ptr];
|
|
||||||
++ptr;
|
++ptr;
|
||||||
if(ptr<res.size() && res[ptr]=='=')
|
if(ptr<res.size() && res[ptr]=='=')
|
||||||
{
|
{
|
||||||
token_str+=res[ptr];
|
token_str+=res[ptr];
|
||||||
++ptr;
|
++ptr;
|
||||||
}
|
}
|
||||||
|
line_code+=token_str;
|
||||||
token new_token;
|
token new_token;
|
||||||
new_token.line=line;
|
new_token.line=line;
|
||||||
new_token.str=token_str;
|
new_token.str=token_str;
|
||||||
for(int i=0;i<TOKEN_TABLE_SIZE;++i)
|
for(int i=0;token_table[i].str;++i)
|
||||||
if(token_str==tok_tbl[i].str)
|
if(token_str==token_table[i].str)
|
||||||
{
|
{
|
||||||
new_token.type=tok_tbl[i].tok_type;
|
new_token.type=token_table[i].tok_type;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
token_list.push_back(new_token);
|
token_list.push_back(new_token);
|
||||||
}
|
}
|
||||||
else if(IS_NOTE_HEAD(res[ptr]))
|
else if(IS_NOTE(res[ptr]))
|
||||||
{
|
{
|
||||||
// avoid note
|
// avoid note
|
||||||
while(ptr<res_size && res[ptr]!='\n') ++ptr;
|
while(ptr<res_size && res[ptr]!='\n') ++ptr;
|
||||||
|
@ -398,8 +419,8 @@ void nasal_lexer::scanner()
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
++error;
|
line_code+=res[ptr];
|
||||||
std::cout<<">> [lexer] line "<<line<<": unknown char "<<(int)res[ptr]<<'.'<<std::endl;
|
die("["+line_code+"_] unknown character.",line,line_code.length());
|
||||||
++ptr;
|
++ptr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue