⚡ optimize lexer
This commit is contained in:
parent
be318abb2e
commit
9455a83df0
128
nasal_lexer.h
128
nasal_lexer.h
|
@ -25,7 +25,7 @@ enum class tok:u32 {
|
||||||
rif, // condition expression keyword if
|
rif, // condition expression keyword if
|
||||||
elsif, // condition expression keyword elsif
|
elsif, // condition expression keyword elsif
|
||||||
relse, // condition expression keyword else
|
relse, // condition expression keyword else
|
||||||
nil, // nil literal
|
tknil, // nil literal
|
||||||
lcurve, // (
|
lcurve, // (
|
||||||
rcurve, // )
|
rcurve, // )
|
||||||
lbracket, // [
|
lbracket, // [
|
||||||
|
@ -91,7 +91,7 @@ private:
|
||||||
{"if" ,tok::rif },
|
{"if" ,tok::rif },
|
||||||
{"elsif" ,tok::elsif },
|
{"elsif" ,tok::elsif },
|
||||||
{"else" ,tok::relse },
|
{"else" ,tok::relse },
|
||||||
{"nil" ,tok::nil },
|
{"nil" ,tok::tknil },
|
||||||
{"(" ,tok::lcurve },
|
{"(" ,tok::lcurve },
|
||||||
{")" ,tok::rcurve },
|
{")" ,tok::rcurve },
|
||||||
{"[" ,tok::lbracket},
|
{"[" ,tok::lbracket},
|
||||||
|
@ -135,11 +135,18 @@ private:
|
||||||
bool is_str(char);
|
bool is_str(char);
|
||||||
bool is_single_opr(char);
|
bool is_single_opr(char);
|
||||||
bool is_calc_opr(char);
|
bool is_calc_opr(char);
|
||||||
|
|
||||||
|
void skip_note();
|
||||||
|
void err_char();
|
||||||
|
|
||||||
void open(const string&);
|
void open(const string&);
|
||||||
string utf8_gen();
|
string utf8_gen();
|
||||||
string id_gen();
|
token id_gen();
|
||||||
string num_gen();
|
token num_gen();
|
||||||
string str_gen();
|
token str_gen();
|
||||||
|
token single_opr();
|
||||||
|
token dots();
|
||||||
|
token calc_opr();
|
||||||
public:
|
public:
|
||||||
lexer(error& e): line(1),column(0),ptr(0),res(""),err(e) {}
|
lexer(error& e): line(1),column(0),ptr(0),res(""),err(e) {}
|
||||||
const error& scan(const string&);
|
const error& scan(const string&);
|
||||||
|
@ -188,6 +195,18 @@ bool lexer::is_calc_opr(char c) {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void lexer::skip_note() {
|
||||||
|
// avoid note, after this process ptr will point to a '\n', so next loop line counter+1
|
||||||
|
while(++ptr<res.size() && res[ptr]!='\n') {}
|
||||||
|
}
|
||||||
|
|
||||||
|
void lexer::err_char() {
|
||||||
|
++column;
|
||||||
|
char c=res[ptr++];
|
||||||
|
err.err("lexer",line,column,1,"invalid character 0x"+chrhex(c));
|
||||||
|
err.fatal("lexer","fatal error occurred, stop");
|
||||||
|
}
|
||||||
|
|
||||||
void lexer::open(const string& file) {
|
void lexer::open(const string& file) {
|
||||||
struct stat buffer;
|
struct stat buffer;
|
||||||
if (stat(file.c_str(),&buffer)==0 && !S_ISREG(buffer.st_mode)) {
|
if (stat(file.c_str(),&buffer)==0 && !S_ISREG(buffer.st_mode)) {
|
||||||
|
@ -240,7 +259,7 @@ string lexer::utf8_gen() {
|
||||||
return str;
|
return str;
|
||||||
}
|
}
|
||||||
|
|
||||||
string lexer::id_gen() {
|
token lexer::id_gen() {
|
||||||
string str="";
|
string str="";
|
||||||
while(ptr<res.size() && (is_id(res[ptr])||is_dec(res[ptr]))) {
|
while(ptr<res.size() && (is_id(res[ptr])||is_dec(res[ptr]))) {
|
||||||
if (res[ptr]<0) { // utf-8
|
if (res[ptr]<0) { // utf-8
|
||||||
|
@ -250,10 +269,11 @@ string lexer::id_gen() {
|
||||||
++column;
|
++column;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return str;
|
tok type=get_type(str);
|
||||||
|
return {line,column,(type!=tok::null)?type:tok::id,str};
|
||||||
}
|
}
|
||||||
|
|
||||||
string lexer::num_gen() {
|
token lexer::num_gen() {
|
||||||
// generate hex number
|
// generate hex number
|
||||||
if (ptr+1<res.size() && res[ptr]=='0' && res[ptr+1]=='x') {
|
if (ptr+1<res.size() && res[ptr]=='0' && res[ptr+1]=='x') {
|
||||||
string str="0x";
|
string str="0x";
|
||||||
|
@ -265,7 +285,7 @@ string lexer::num_gen() {
|
||||||
if (str.length()<3) { // "0x"
|
if (str.length()<3) { // "0x"
|
||||||
err.err("lexer",line,column,str.length(),"invalid number `"+str+"`");
|
err.err("lexer",line,column,str.length(),"invalid number `"+str+"`");
|
||||||
}
|
}
|
||||||
return str;
|
return {line,column,tok::num,str};
|
||||||
} else if (ptr+1<res.size() && res[ptr]=='0' && res[ptr+1]=='o') { // generate oct number
|
} else if (ptr+1<res.size() && res[ptr]=='0' && res[ptr+1]=='o') { // generate oct number
|
||||||
string str="0o";
|
string str="0o";
|
||||||
ptr+=2;
|
ptr+=2;
|
||||||
|
@ -281,7 +301,7 @@ string lexer::num_gen() {
|
||||||
if (str.length()==2 || erfmt) {
|
if (str.length()==2 || erfmt) {
|
||||||
err.err("lexer",line,column,str.length(),"invalid number `"+str+"`");
|
err.err("lexer",line,column,str.length(),"invalid number `"+str+"`");
|
||||||
}
|
}
|
||||||
return str;
|
return {line,column,tok::num,str};
|
||||||
}
|
}
|
||||||
// generate dec number
|
// generate dec number
|
||||||
// dec number -> [0~9][0~9]*(.[0~9]*)(e|E(+|-)0|[1~9][0~9]*)
|
// dec number -> [0~9][0~9]*(.[0~9]*)(e|E(+|-)0|[1~9][0~9]*)
|
||||||
|
@ -298,7 +318,7 @@ string lexer::num_gen() {
|
||||||
if (str.back()=='.') {
|
if (str.back()=='.') {
|
||||||
column+=str.length();
|
column+=str.length();
|
||||||
err.err("lexer",line,column,str.length(),"invalid number `"+str+"`");
|
err.err("lexer",line,column,str.length(),"invalid number `"+str+"`");
|
||||||
return "0";
|
return {line,column,tok::num,"0"};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (ptr<res.size() && (res[ptr]=='e' || res[ptr]=='E')) {
|
if (ptr<res.size() && (res[ptr]=='e' || res[ptr]=='E')) {
|
||||||
|
@ -313,14 +333,14 @@ string lexer::num_gen() {
|
||||||
if (str.back()=='e' || str.back()=='E' || str.back()=='-' || str.back()=='+') {
|
if (str.back()=='e' || str.back()=='E' || str.back()=='-' || str.back()=='+') {
|
||||||
column+=str.length();
|
column+=str.length();
|
||||||
err.err("lexer",line,column,str.length(),"invalid number `"+str+"`");
|
err.err("lexer",line,column,str.length(),"invalid number `"+str+"`");
|
||||||
return "0";
|
return {line,column,tok::num,"0"};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
column+=str.length();
|
column+=str.length();
|
||||||
return str;
|
return {line,column,tok::num,str};
|
||||||
}
|
}
|
||||||
|
|
||||||
string lexer::str_gen() {
|
token lexer::str_gen() {
|
||||||
string str="";
|
string str="";
|
||||||
const char begin=res[ptr];
|
const char begin=res[ptr];
|
||||||
++column;
|
++column;
|
||||||
|
@ -356,13 +376,44 @@ string lexer::str_gen() {
|
||||||
// check if this string ends with a " or '
|
// check if this string ends with a " or '
|
||||||
if (ptr++>=res.size()) {
|
if (ptr++>=res.size()) {
|
||||||
err.err("lexer",line,column,1,"get EOF when generating string");
|
err.err("lexer",line,column,1,"get EOF when generating string");
|
||||||
return str;
|
return {line,column,tok::str,str};
|
||||||
}
|
}
|
||||||
++column;
|
++column;
|
||||||
if (begin=='`' && str.length()!=1) {
|
if (begin=='`' && str.length()!=1) {
|
||||||
err.err("lexer",line,column,1,"\'`\' is used for string that includes one character");
|
err.err("lexer",line,column,1,"\'`\' is used for string that includes one character");
|
||||||
}
|
}
|
||||||
return str;
|
return {line,column,tok::str,str};
|
||||||
|
}
|
||||||
|
|
||||||
|
token lexer::single_opr() {
|
||||||
|
string str(1,res[ptr]);
|
||||||
|
++column;
|
||||||
|
tok type=get_type(str);
|
||||||
|
if (type==tok::null) {
|
||||||
|
err.err("lexer",line,column,str.length(),"invalid operator `"+str+"`");
|
||||||
|
}
|
||||||
|
++ptr;
|
||||||
|
return {line,column,type,str};
|
||||||
|
}
|
||||||
|
|
||||||
|
token lexer::dots() {
|
||||||
|
string str=".";
|
||||||
|
if (ptr+2<res.size() && res[ptr+1]=='.' && res[ptr+2]=='.') {
|
||||||
|
str+="..";
|
||||||
|
}
|
||||||
|
ptr+=str.length();
|
||||||
|
column+=str.length();
|
||||||
|
return {line,column,get_type(str),str};
|
||||||
|
}
|
||||||
|
|
||||||
|
token lexer::calc_opr() {
|
||||||
|
// get calculation operator
|
||||||
|
string str(1,res[ptr++]);
|
||||||
|
if (ptr<res.size() && res[ptr]=='=') {
|
||||||
|
str+=res[ptr++];
|
||||||
|
}
|
||||||
|
column+=str.length();
|
||||||
|
return {line,column,get_type(str),str};
|
||||||
}
|
}
|
||||||
|
|
||||||
const error& lexer::scan(const string& file) {
|
const error& lexer::scan(const string& file) {
|
||||||
|
@ -371,7 +422,6 @@ const error& lexer::scan(const string& file) {
|
||||||
ptr=0;
|
ptr=0;
|
||||||
open(file);
|
open(file);
|
||||||
|
|
||||||
string str;
|
|
||||||
while(ptr<res.size()) {
|
while(ptr<res.size()) {
|
||||||
while(ptr<res.size() && skip(res[ptr])) {
|
while(ptr<res.size() && skip(res[ptr])) {
|
||||||
// these characters will be ignored, and '\n' will cause ++line
|
// these characters will be ignored, and '\n' will cause ++line
|
||||||
|
@ -385,47 +435,21 @@ const error& lexer::scan(const string& file) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (is_id(res[ptr])) {
|
if (is_id(res[ptr])) {
|
||||||
str=id_gen();
|
toks.push_back(id_gen());
|
||||||
tok type=get_type(str);
|
|
||||||
toks.push_back({line,column,(type!=tok::null)?type:tok::id,str});
|
|
||||||
} else if (is_dec(res[ptr])) {
|
} else if (is_dec(res[ptr])) {
|
||||||
str=num_gen(); // make sure column is correct
|
toks.push_back(num_gen());
|
||||||
toks.push_back({line,column,tok::num,str});
|
|
||||||
} else if (is_str(res[ptr])) {
|
} else if (is_str(res[ptr])) {
|
||||||
str=str_gen(); // make sure column is correct
|
toks.push_back(str_gen());
|
||||||
toks.push_back({line,column,tok::str,str});
|
|
||||||
} else if (is_single_opr(res[ptr])) {
|
} else if (is_single_opr(res[ptr])) {
|
||||||
str=res[ptr];
|
toks.push_back(single_opr());
|
||||||
++column;
|
|
||||||
tok type=get_type(str);
|
|
||||||
if (type==tok::null) {
|
|
||||||
err.err("lexer",line,column,str.length(),"invalid operator `"+str+"`");
|
|
||||||
}
|
|
||||||
toks.push_back({line,column,type,str});
|
|
||||||
++ptr;
|
|
||||||
} else if (res[ptr]=='.') {
|
} else if (res[ptr]=='.') {
|
||||||
str=".";
|
toks.push_back(dots());
|
||||||
if (ptr+2<res.size() && res[ptr+1]=='.' && res[ptr+2]=='.') {
|
|
||||||
str+="..";
|
|
||||||
}
|
|
||||||
ptr+=str.length();
|
|
||||||
column+=str.length();
|
|
||||||
toks.push_back({line,column,get_type(str),str});
|
|
||||||
} else if (is_calc_opr(res[ptr])) {
|
} else if (is_calc_opr(res[ptr])) {
|
||||||
// get calculation operator
|
toks.push_back(calc_opr());
|
||||||
str=res[ptr++];
|
} else if (res[ptr]=='#') {
|
||||||
if (ptr<res.size() && res[ptr]=='=') {
|
skip_note();
|
||||||
str+=res[ptr++];
|
|
||||||
}
|
|
||||||
column+=str.length();
|
|
||||||
toks.push_back({line,column,get_type(str),str});
|
|
||||||
} else if (res[ptr]=='#') { // avoid note, after this process ptr will point to a '\n', so next loop line counter+1
|
|
||||||
while(++ptr<res.size() && res[ptr]!='\n') {}
|
|
||||||
} else {
|
} else {
|
||||||
++column;
|
err_char();
|
||||||
char c=res[ptr++];
|
|
||||||
err.err("lexer",line,column,1,"invalid character 0x"+chrhex(c));
|
|
||||||
err.fatal("lexer","fatal error occurred, stop");
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
toks.push_back({line,column,tok::eof,"<eof>"});
|
toks.push_back({line,column,tok::eof,"<eof>"});
|
||||||
|
|
|
@ -61,7 +61,7 @@ private:
|
||||||
{tok::rif ,"if" },
|
{tok::rif ,"if" },
|
||||||
{tok::elsif ,"elsif" },
|
{tok::elsif ,"elsif" },
|
||||||
{tok::relse ,"else" },
|
{tok::relse ,"else" },
|
||||||
{tok::nil ,"nil" },
|
{tok::tknil ,"nil" },
|
||||||
{tok::lcurve ,"(" },
|
{tok::lcurve ,"(" },
|
||||||
{tok::rcurve ,")" },
|
{tok::rcurve ,")" },
|
||||||
{tok::lbracket,"[" },
|
{tok::lbracket,"[" },
|
||||||
|
@ -338,7 +338,7 @@ ast parse::vec() {
|
||||||
// array end with tok::null=0
|
// array end with tok::null=0
|
||||||
const tok panic[]={
|
const tok panic[]={
|
||||||
tok::id,tok::str,tok::num,
|
tok::id,tok::str,tok::num,
|
||||||
tok::opnot,tok::sub,tok::nil,
|
tok::opnot,tok::sub,tok::tknil,
|
||||||
tok::func,tok::var,tok::lcurve,
|
tok::func,tok::var,tok::lcurve,
|
||||||
tok::lbrace,tok::lbracket,tok::null
|
tok::lbrace,tok::lbracket,tok::null
|
||||||
};
|
};
|
||||||
|
@ -451,7 +451,7 @@ ast parse::expr()
|
||||||
die(thisline,thiscol,thislen,"must use return in functions");
|
die(thisline,thiscol,thislen,"must use return in functions");
|
||||||
}
|
}
|
||||||
switch(type) {
|
switch(type) {
|
||||||
case tok::nil:
|
case tok::tknil:
|
||||||
case tok::num:
|
case tok::num:
|
||||||
case tok::str:
|
case tok::str:
|
||||||
case tok::id:
|
case tok::id:
|
||||||
|
@ -607,9 +607,9 @@ ast parse::unary() {
|
||||||
|
|
||||||
ast parse::scalar() {
|
ast parse::scalar() {
|
||||||
ast node(toks[ptr].line,toks[ptr].col,ast_null);
|
ast node(toks[ptr].line,toks[ptr].col,ast_null);
|
||||||
if (lookahead(tok::nil)) {
|
if (lookahead(tok::tknil)) {
|
||||||
node=nil();
|
node=nil();
|
||||||
match(tok::nil);
|
match(tok::tknil);
|
||||||
} else if (lookahead(tok::num)) {
|
} else if (lookahead(tok::num)) {
|
||||||
node=num();
|
node=num();
|
||||||
} else if (lookahead(tok::str)) {
|
} else if (lookahead(tok::str)) {
|
||||||
|
@ -673,7 +673,7 @@ ast parse::callv() {
|
||||||
// array end with tok::null=0
|
// array end with tok::null=0
|
||||||
const tok panic[]={
|
const tok panic[]={
|
||||||
tok::id,tok::str,tok::num,
|
tok::id,tok::str,tok::num,
|
||||||
tok::opnot,tok::sub,tok::nil,
|
tok::opnot,tok::sub,tok::tknil,
|
||||||
tok::func,tok::var,tok::lcurve,
|
tok::func,tok::var,tok::lcurve,
|
||||||
tok::lbrace,tok::lbracket,tok::colon,
|
tok::lbrace,tok::lbracket,tok::colon,
|
||||||
tok::null
|
tok::null
|
||||||
|
@ -703,7 +703,7 @@ ast parse::callf() {
|
||||||
// array end with tok::null=0
|
// array end with tok::null=0
|
||||||
const tok panic[]={
|
const tok panic[]={
|
||||||
tok::id,tok::str,tok::num,
|
tok::id,tok::str,tok::num,
|
||||||
tok::opnot,tok::sub,tok::nil,
|
tok::opnot,tok::sub,tok::tknil,
|
||||||
tok::func,tok::var,tok::lcurve,
|
tok::func,tok::var,tok::lcurve,
|
||||||
tok::lbrace,tok::lbracket,tok::null
|
tok::lbrace,tok::lbracket,tok::null
|
||||||
};
|
};
|
||||||
|
@ -794,7 +794,7 @@ ast parse::multi_scalar() {
|
||||||
// if check_call_memory is true,we will check if value called here can reach a memory space
|
// if check_call_memory is true,we will check if value called here can reach a memory space
|
||||||
const tok panic[]={
|
const tok panic[]={
|
||||||
tok::id,tok::str,tok::num,
|
tok::id,tok::str,tok::num,
|
||||||
tok::opnot,tok::sub,tok::nil,
|
tok::opnot,tok::sub,tok::tknil,
|
||||||
tok::func,tok::var,tok::lcurve,
|
tok::func,tok::var,tok::lcurve,
|
||||||
tok::lbrace,tok::lbracket,tok::null
|
tok::lbrace,tok::lbracket,tok::null
|
||||||
};
|
};
|
||||||
|
@ -979,7 +979,7 @@ ast parse::ret_expr() {
|
||||||
ast node(toks[ptr].line,toks[ptr].col,ast_ret);
|
ast node(toks[ptr].line,toks[ptr].col,ast_ret);
|
||||||
match(tok::ret);
|
match(tok::ret);
|
||||||
tok type=toks[ptr].type;
|
tok type=toks[ptr].type;
|
||||||
if (type==tok::nil || type==tok::num || type==tok::str || type==tok::id ||
|
if (type==tok::tknil || type==tok::num || type==tok::str || type==tok::id ||
|
||||||
type==tok::func || type==tok::sub || type==tok::opnot || type==tok::lcurve ||
|
type==tok::func || type==tok::sub || type==tok::opnot || type==tok::lcurve ||
|
||||||
type==tok::lbracket || type==tok::lbrace
|
type==tok::lbracket || type==tok::lbrace
|
||||||
) {
|
) {
|
||||||
|
|
Loading…
Reference in New Issue