This commit is contained in:
Valk Richard Li 2020-06-01 01:24:26 -07:00 committed by GitHub
parent 6cb334c87a
commit 927b86ceee
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 506 additions and 2 deletions

View File

@ -11,6 +11,7 @@ void logo()
}
nasal_resource resource;
nasal_lexer lexer;
std::string command;
int main()
{
@ -74,7 +75,11 @@ int main()
resource.print_file();
else if(command=="lex")
{
;
lexer.scanner(resource.get_file());
if(!lexer.get_error())
lexer.print_token();
else
std::cout<<">> [lexer] error occurred,stop.\n";
}
else if(command=="par")
{

View File

@ -16,7 +16,9 @@
#include <map>
#include "nasal_enum.h"
#include "nasal_misc.h"
#include "nasal_resource.h"
#include "nasal_lexer.h"
#include "nasal_parse.h"
#endif

View File

@ -5,7 +5,7 @@
enum token_type
{
tok_null=0,
tok_number,tok_string,
tok_number,tok_string,tok_identifier,
tok_for,tok_forindex,tok_foreach,tok_while,
tok_var,tok_func,tok_break,tok_continue,
tok_return,tok_if,tok_elsif,tok_else,tok_nil,

View File

@ -79,10 +79,16 @@ struct token
class nasal_lexer
{
private:
int error;
std::vector<token> token_list;
public:
std::string identifier_gen(std::vector<char>&,int&,int&);
std::string number_gen(std::vector<char>&,int&,int&);
std::string string_gen(std::vector<char>&,int&,int&);
void delete_tokens();
void scanner(std::vector<char>&);
void print_token();
int get_error();
};
void nasal_lexer::delete_tokens()
@ -91,9 +97,230 @@ void nasal_lexer::delete_tokens()
return;
}
std::string nasal_lexer::identifier_gen(std::vector<char>& res,int& ptr,int& line)
{
int res_size=res.size();
std::string token_str="";
while(ptr<res_size && IS_IDENTIFIER_BODY(res[ptr]))
token_str+=res[ptr++];
return token_str;
// after running this process, ptr will point to the next token's beginning character
}
std::string nasal_lexer::number_gen(std::vector<char>& res,int& ptr,int& line)
{
int res_size=res.size();
bool scientific_notation=false;// numbers like 1e8 are scientific_notation
std::string token_str="";
while(ptr<res_size && IS_NUMBER_BODY(res[ptr]))
{
token_str+=res[ptr];
if(res[ptr]=='e' || res[ptr]=='E')
{
scientific_notation=true;
++ptr;
break;
}
++ptr;
}
if(scientific_notation && ptr<res_size)
{
if(res[ptr]=='-')
{
token_str+='-';
++ptr;
}
while(ptr<res_size && '0'<=res[ptr] && res[ptr]<='9')
{
token_str+=res[ptr];
++ptr;
}
}
if(!check_numerable_string(token_str))
{
++error;
std::cout<<">> [lexer] line "<<line<<": \'"<<token_str<<"\' is not a numerable string."<<std::endl;
token_str="0";
}
return token_str;
}
std::string nasal_lexer::string_gen(std::vector<char>& res,int& ptr,int& line)
{
int res_size=res.size();
std::string token_str="";
char str_begin=res[ptr];
++ptr;
if(ptr>=res_size) return token_str;
while(ptr<res_size && res[ptr]!=str_begin)
{
token_str+=res[ptr];
if(res[ptr]=='\n') ++line;
if(res[ptr]=='\\' && ptr+1<res.size())
{
++ptr;
switch(res[ptr])
{
case '\\':token_str.pop_back();token_str.push_back('\\');break;
case 'r': token_str.pop_back();token_str.push_back('\r');break;
case 't': token_str.pop_back();token_str.push_back('\t');break;
case 'n': token_str.pop_back();token_str.push_back('\n');break;
case '\'':token_str.pop_back();token_str.push_back('\'');break;
case '\"':token_str.pop_back();token_str.push_back('\"');break;
default: token_str.push_back(res[ptr]);break;
}
}
++ptr;
}
// check if this string ends with a " or '
if(ptr>=res_size)
{
++error;
std::cout<<">> [lexer] line "<<line<<": this string must have a \' "<<str_begin<<" \' as its end."<<std::endl;
}
++ptr;
return token_str;
}
void nasal_lexer::scanner(std::vector<char>& res)
{
error=0;
token_list.clear();
int line=1,ptr=0,res_size=res.size();
std::string token_str;
while(ptr<res_size)
{
while(ptr<res_size && (res[ptr]==' ' || res[ptr]=='\n' || res[ptr]=='\t' || res[ptr]=='\r' || res[ptr]<0))
{
// these characters will be ignored, and '\n' will cause ++line
if(res[ptr]=='\n') ++line;
++ptr;
}
if(ptr>=res_size) break;
if(IS_IDENTIFIER_HEAD(res[ptr]))
{
token_str=identifier_gen(res,ptr,line);
token new_token;
new_token.line=line;
new_token.str=token_str;
new_token.type=0;
for(int i=0;i<TOKEN_TABLE_SIZE;++i)
if(token_str==tok_tbl[i].str)
{
new_token.type=tok_tbl[i].tok_type;
break;
}
if(!new_token.type)
new_token.type=tok_identifier;
token_list.push_back(new_token);
}
else if(IS_NUMBER_HEAD(res[ptr]))
{
token_str=number_gen(res,ptr,line);
token new_token;
new_token.line=line;
new_token.str=token_str;
new_token.type=tok_number;
token_list.push_back(new_token);
}
else if(IS_STRING_HEAD(res[ptr]))
{
token_str=string_gen(res,ptr,line);
token new_token;
new_token.line=line;
new_token.type=tok_string;
new_token.str=token_str;
token_list.push_back(new_token);
}
else if(IS_SINGLE_OPRATOR(res[ptr]))
{
token_str="";
token_str+=res[ptr];
token new_token;
new_token.line=line;
new_token.str=token_str;
for(int i=0;i<TOKEN_TABLE_SIZE;++i)
if(token_str==tok_tbl[i].str)
{
new_token.type=tok_tbl[i].tok_type;
break;
}
token_list.push_back(new_token);
++ptr;
}
else if(IS_DOT(res[ptr]))
{
if(ptr+2<res_size && IS_DOT(res[ptr+1]) && IS_DOT(res[ptr+2]))
{
token_str="...";
ptr+=3;
}
else
{
token_str=".";
++ptr;
}
token new_token;
new_token.line=line;
new_token.str=token_str;
for(int i=0;i<TOKEN_TABLE_SIZE;++i)
if(token_str==tok_tbl[i].str)
{
new_token.type=tok_tbl[i].tok_type;
break;
}
token_list.push_back(new_token);
}
else if(IS_CALC_OPERATOR(res[ptr]))
{
// get calculation operator
token_str="";
token_str+=res[ptr];
++ptr;
if(ptr<res.size() && res[ptr]=='=')
{
token_str+=res[ptr];
++ptr;
}
token new_token;
new_token.line=line;
new_token.str=token_str;
for(int i=0;i<TOKEN_TABLE_SIZE;++i)
if(token_str==tok_tbl[i].str)
{
new_token.type=tok_tbl[i].tok_type;
break;
}
token_list.push_back(new_token);
}
else if(IS_NOTE_HEAD(res[ptr]))
{
// avoid note
while(ptr<res_size && res[ptr++]!='\n');
// after this process ptr will point to a '\n'
// don't ++ptr then the counter for line can work correctly
}
else
{
++error;
std::cout<<">> [lexer] line "<<line<<": unknown char."<<std::endl;
++ptr;
}
}
std::cout<<">> [lexer] complete scanning. "<<error<<" error(s)."<<std::endl;
return;
}
void nasal_lexer::print_token()
{
int size=token_list.size();
for(int i=0;i<size;++i)
std::cout<<"("<<token_list[i].line<<" | "<<token_list[i].str<<")\n";
return;
}
int nasal_lexer::get_error()
{
return error;
}
#endif

266
version3.0/nasal_misc.h Normal file
View File

@ -0,0 +1,266 @@
#ifndef __NASAL_MISC_H__
#define __NASAL_MISC_H__
/*
check_numerable_string:
check if a string can be converted to a number
strings like these below is correct:
'0.00012'
'12314.234'
'1234'
'0xdeadbeef'
'0xDEADBEEF'
'0o71230'
'1e23'
'1E-123'
'1.34E10'
*/
inline bool check_hex_string(std::string str,int len)
{
for(int i=2;i<len;++i)
if(!(('0'<=str[i] && str[i]<='9') || ('a'<=str[i] && str[i]<='f') || ('A'<=str[i] && str[i]<='F')))
return false;
return true;
}
inline bool check_oct_string(std::string str,int len)
{
for(int i=2;i<len;++i)
if(str[i]<'0' || str[i]>'7')
return false;
return true;
}
inline bool check_dec_string(std::string str,int len)
{
int dot_cnt=0;
if(str[0]=='.') return false;
if(str[0]=='0' && ('0'<=str[1] && str[1]<='9')) return false;
int i=0;
for(;i<len;++i)
{
if(str[i]=='.')
{
if(i==len-1 || str[i+1]=='e' || str[i+1]=='E') return false;
++dot_cnt;
}
else if(str[i]=='e' || str[i]=='E') break;
else if(str[i]<'0' || str[i]>'9' ) return false;
}
if(str[i]=='e' || str[i]=='E')
{
++i;
if(i==len) return false;
if(str[i]=='-')
{
++i;
if(i==len) return false;
}
for(;i<len;++i)
if(str[i]<'0' || str[i]>'9')
return false;
}
if(dot_cnt>1) return false;
return true;
}
bool check_numerable_string(std::string str)
{
int len=str.length();
if(!len) return false;
if(str[0]=='-' && len>1)
{
std::string tmp="";
for(int i=1;i<len;++i)
tmp.push_back(str[i]);
str=tmp;
}
else if(str[0]=='-' && len==1)
return false;
if(len==1 && '0'<=str[0] && str[0]<='9')
return true;
else if(len>2 && str[0]=='0' && str[1]=='x')
return check_hex_string(str,len);
else if(len>2 && str[0]=='0' && str[1]=='o')
return check_oct_string(str,len);
else if('0'<=str[0] && str[0]<='9')
return check_dec_string(str,len);
return false;
}
/*
trans_string_to_number:
convert string to number
*/
inline double hex_to_double(std::string str,int len)
{
double ret=0;
double num_pow=1;
for(int i=len-1;i>1;--i)
{
if('0'<=str[i] && str[i]<='9')
ret+=num_pow*(str[i]-'0');
else if('a'<=str[i] && str[i]<='f')
ret+=num_pow*(str[i]-'a'+10);
else if('A'<=str[i] && str[i]<='F')
ret+=num_pow*(str[i]-'A'+10);
num_pow*=16;
}
return ret;
}
inline double oct_to_double(std::string str,int len)
{
double ret=0;
double num_pow=1;
for(int i=len-1;i>1;--i)
{
ret+=num_pow*(str[i]-'0');
num_pow*=8;
}
return ret;
}
inline double dec_to_double(std::string str,int len)
{
double ret=0;
int i=0;
for(;i<len;++i)
{
if('0'<=str[i] && str[i]<='9')
ret=ret*10+(str[i]-'0');
else if(str[i]=='.' || str[i]=='e' || str[i]=='E')
break;
}
if(str[i]=='.')
{
++i;
double num_pow=0.1;
for(;i<len;++i)
{
if('0'<=str[i] && str[i]<='9')
{
ret+=num_pow*(str[i]-'0');
num_pow*=0.1;
}
else if(str[i]=='e' || str[i]=='E')
break;
}
}
if(str[i]=='e' || str[i]=='E')
{
++i;
bool is_negative=(str[i]=='-');
if(is_negative) ++i;
double num_pow=0;
for(;i<len;++i) num_pow=num_pow*10+(str[i]-'0');
num_pow=std::pow(10,is_negative?-num_pow:num_pow);
ret*=num_pow;
}
return ret;
}
double trans_string_to_number(std::string str)
{
bool is_negative=false;
int len=str.length();
double ret_num=0;
if(!len) return 0;
if(str[0]=='-' && len>1)
{
is_negative=true;
std::string tmp="";
for(int i=1;i<len;++i)
tmp.push_back(str[i]);
str=tmp;
}
else if(str[0]=='-' && len==1)
return 0;
if(len==1 && '0'<=str[0] && str[0]<='9')
ret_num=(double)(str[0]-'0');
else if(len>2 && str[0]=='0' && str[1]=='x')
ret_num=hex_to_double(str,len);
else if(len>2 && str[0]=='0' && str[1]=='o')
ret_num=oct_to_double(str,len);
else if('0'<=str[0] && str[0]<='9')
ret_num=dec_to_double(str,len);
return is_negative?-ret_num:ret_num;
}
/*
trans_number_to_string:
convert number to string
*/
std::string trans_number_to_string(double number)
{
std::string trans_num_string="";
if(number<0)
{
trans_num_string+='-';
number=-number;
}
double integer_bit=1;
while(number>=integer_bit)
integer_bit*=10;
integer_bit/=10;
while(integer_bit!=0.1)
{
trans_num_string+=(char)('0'+(int(number/integer_bit)));
number-=(double)(int(number/integer_bit))*integer_bit;
integer_bit/=10;
}
if(number!=0)
trans_num_string+='.';
while(number!=0)
{
trans_num_string+=(char)('0'+int(number*10));
number*=10;
number-=(double)(int(number));
}
return trans_num_string;
}
/*
prt_hex:
transform int to hex format and print it out (std::cout)
*/
void prt_hex(const int ptr)
{
char hex[9];
hex[8]=0;
int tmp_plc=ptr;
if(tmp_plc<0)
{
tmp_plc=-tmp_plc;
std::cout<<"-0x";
}
else
std::cout<<"0x";
/*
int: 00000000 00000000 00000000 00000000
int: 0x00 00 00 00
example:
a=0x13 57 9b df
a=00010011 01010111 10011011 11011111
a & 0x00 00 00 0f:
00010011 01010111 10011011 11011111
and 00000000 00000000 00000000 00001111
---------------------------------------
00000000 00000000 00000000 00001111
a>>=4:
00000001 00110101 01111001 10111101
a & 0x00 00 00 0f
00000001 00110101 01111001 10111101
and 00000000 00000000 00000000 00001111
---------------------------------------
00000000 00000000 00000000 00001101
then convert 0~15 to 0~9 a~f
*/
for(int j=7;j>=0;--j)
{
int tmp=(tmp_plc & 0x0000000f);
hex[j]=tmp<10? (char)('0'+tmp):(char)('a'+tmp-10);
tmp_plc>>=4;
}
std::cout<<hex;
return;
}
#endif

4
version3.0/nasal_parse.h Normal file
View File

@ -0,0 +1,4 @@
#ifndef __NASAL_PARSE_H__
#define __NAsAL_PARSE_H__
#endif