From 9455a83df05d567a828b8c27ad8d754652217141 Mon Sep 17 00:00:00 2001 From: ValKmjolnir Date: Mon, 28 Nov 2022 21:16:39 +0800 Subject: [PATCH] :zap: optimize lexer --- nasal_lexer.h | 128 ++++++++++++++++++++++++++++++-------------------- nasal_parse.h | 18 +++---- 2 files changed, 85 insertions(+), 61 deletions(-) diff --git a/nasal_lexer.h b/nasal_lexer.h index 54adb59..8ef3314 100644 --- a/nasal_lexer.h +++ b/nasal_lexer.h @@ -25,7 +25,7 @@ enum class tok:u32 { rif, // condition expression keyword if elsif, // condition expression keyword elsif relse, // condition expression keyword else - nil, // nil literal + tknil, // nil literal lcurve, // ( rcurve, // ) lbracket, // [ @@ -91,7 +91,7 @@ private: {"if" ,tok::rif }, {"elsif" ,tok::elsif }, {"else" ,tok::relse }, - {"nil" ,tok::nil }, + {"nil" ,tok::tknil }, {"(" ,tok::lcurve }, {")" ,tok::rcurve }, {"[" ,tok::lbracket}, @@ -135,11 +135,18 @@ private: bool is_str(char); bool is_single_opr(char); bool is_calc_opr(char); + + void skip_note(); + void err_char(); + void open(const string&); string utf8_gen(); - string id_gen(); - string num_gen(); - string str_gen(); + token id_gen(); + token num_gen(); + token str_gen(); + token single_opr(); + token dots(); + token calc_opr(); public: lexer(error& e): line(1),column(0),ptr(0),res(""),err(e) {} const error& scan(const string&); @@ -188,6 +195,18 @@ bool lexer::is_calc_opr(char c) { ); } +void lexer::skip_note() { + // avoid note, after this process ptr will point to a '\n', so next loop line counter+1 + while(++ptr [0~9][0~9]*(.[0~9]*)(e|E(+|-)0|[1~9][0~9]*) @@ -298,7 +318,7 @@ string lexer::num_gen() { if (str.back()=='.') { column+=str.length(); err.err("lexer",line,column,str.length(),"invalid number `"+str+"`"); - return "0"; + return {line,column,tok::num,"0"}; } } if (ptr=res.size()) { err.err("lexer",line,column,1,"get EOF when generating string"); - return str; + return {line,column,tok::str,str}; } ++column; if (begin=='`' && str.length()!=1) { err.err("lexer",line,column,1,"\'`\' is used for string that includes one character"); } - return str; + return {line,column,tok::str,str}; +} + +token lexer::single_opr() { + string str(1,res[ptr]); + ++column; + tok type=get_type(str); + if (type==tok::null) { + err.err("lexer",line,column,str.length(),"invalid operator `"+str+"`"); + } + ++ptr; + return {line,column,type,str}; +} + +token lexer::dots() { + string str="."; + if (ptr+2"}); diff --git a/nasal_parse.h b/nasal_parse.h index 8e84bd7..77e3aa4 100644 --- a/nasal_parse.h +++ b/nasal_parse.h @@ -61,7 +61,7 @@ private: {tok::rif ,"if" }, {tok::elsif ,"elsif" }, {tok::relse ,"else" }, - {tok::nil ,"nil" }, + {tok::tknil ,"nil" }, {tok::lcurve ,"(" }, {tok::rcurve ,")" }, {tok::lbracket,"[" }, @@ -338,7 +338,7 @@ ast parse::vec() { // array end with tok::null=0 const tok panic[]={ tok::id,tok::str,tok::num, - tok::opnot,tok::sub,tok::nil, + tok::opnot,tok::sub,tok::tknil, tok::func,tok::var,tok::lcurve, tok::lbrace,tok::lbracket,tok::null }; @@ -451,7 +451,7 @@ ast parse::expr() die(thisline,thiscol,thislen,"must use return in functions"); } switch(type) { - case tok::nil: + case tok::tknil: case tok::num: case tok::str: case tok::id: @@ -607,9 +607,9 @@ ast parse::unary() { ast parse::scalar() { ast node(toks[ptr].line,toks[ptr].col,ast_null); - if (lookahead(tok::nil)) { + if (lookahead(tok::tknil)) { node=nil(); - match(tok::nil); + match(tok::tknil); } else if (lookahead(tok::num)) { node=num(); } else if (lookahead(tok::str)) { @@ -673,7 +673,7 @@ ast parse::callv() { // array end with tok::null=0 const tok panic[]={ tok::id,tok::str,tok::num, - tok::opnot,tok::sub,tok::nil, + tok::opnot,tok::sub,tok::tknil, tok::func,tok::var,tok::lcurve, tok::lbrace,tok::lbracket,tok::colon, tok::null @@ -703,7 +703,7 @@ ast parse::callf() { // array end with tok::null=0 const tok panic[]={ tok::id,tok::str,tok::num, - tok::opnot,tok::sub,tok::nil, + tok::opnot,tok::sub,tok::tknil, tok::func,tok::var,tok::lcurve, tok::lbrace,tok::lbracket,tok::null }; @@ -794,7 +794,7 @@ ast parse::multi_scalar() { // if check_call_memory is true,we will check if value called here can reach a memory space const tok panic[]={ tok::id,tok::str,tok::num, - tok::opnot,tok::sub,tok::nil, + tok::opnot,tok::sub,tok::tknil, tok::func,tok::var,tok::lcurve, tok::lbrace,tok::lbracket,tok::null }; @@ -979,7 +979,7 @@ ast parse::ret_expr() { ast node(toks[ptr].line,toks[ptr].col,ast_ret); match(tok::ret); tok type=toks[ptr].type; - if (type==tok::nil || type==tok::num || type==tok::str || type==tok::id || + if (type==tok::tknil || type==tok::num || type==tok::str || type==tok::id || type==tok::func || type==tok::sub || type==tok::opnot || type==tok::lcurve || type==tok::lbracket || type==tok::lbrace ) {