From c95810b46ce388695c32b8c7be07c1fe6c51e81a Mon Sep 17 00:00:00 2001 From: ValKmjolnir Date: Fri, 16 Jun 2023 22:23:46 +0800 Subject: [PATCH] :sparkles: add nasal_new_lexer --- ast/ast_visitor.cpp | 26 ++-- ast/ast_visitor.h | 12 +- ast/nasal_new_ast.cpp | 23 +-- ast/nasal_new_ast.h | 152 ++++++++++++++---- ast/nasal_new_lexer.cpp | 333 ++++++++++++++++++++++++++++++++++++++++ ast/nasal_new_lexer.h | 181 ++++++++++++++++++++++ ast/new_main.cpp | 176 +++++++++++++++++++++ makefile | 5 +- 8 files changed, 852 insertions(+), 56 deletions(-) create mode 100644 ast/nasal_new_lexer.cpp create mode 100644 ast/nasal_new_lexer.h create mode 100644 ast/new_main.cpp diff --git a/ast/ast_visitor.cpp b/ast/ast_visitor.cpp index 4e6df01..01b6f93 100644 --- a/ast/ast_visitor.cpp +++ b/ast/ast_visitor.cpp @@ -1,21 +1,29 @@ #include "ast_visitor.h" -void ast_visitor::visit_expr(expr* node) { +bool ast_visitor::visit_expr(expr* node) { node->accept(this); } -void ast_visitor::visit_null_expr(null_expr* node) { - node->accept(this); +bool ast_visitor::visit_null_expr(null_expr* node) { + return true; } -void ast_visitor::visit_nil_expr(nil_expr* node) { - node->accept(this); +bool ast_visitor::visit_nil_expr(nil_expr* node) { + return true; } -void ast_visitor::visit_number_literal(number_literal* node) { - node->accept(this); +bool ast_visitor::visit_number_literal(number_literal* node) { + return true; } -void ast_visitor::visit_string_literal(string_literal* node) { - node->accept(this); +bool ast_visitor::visit_string_literal(string_literal* node) { + return true; +} + +bool ast_visitor::visit_identifier(identifier* node) { + return true; +} + +bool ast_visitor::visit_bool_literal(bool_literal* node) { + return true; } \ No newline at end of file diff --git a/ast/ast_visitor.h b/ast/ast_visitor.h index d077d52..94dd66c 100644 --- a/ast/ast_visitor.h +++ b/ast/ast_visitor.h @@ -4,9 +4,11 @@ class ast_visitor { public: - virtual void visit_expr(expr*); - virtual void visit_null_expr(null_expr*); - virtual void visit_nil_expr(nil_expr*); - virtual void visit_number_literal(number_literal*); - virtual void visit_string_literal(string_literal*); + virtual bool visit_expr(expr*); + virtual bool visit_null_expr(null_expr*); + virtual bool visit_nil_expr(nil_expr*); + virtual bool visit_number_literal(number_literal*); + virtual bool visit_string_literal(string_literal*); + virtual bool visit_identifier(identifier*); + virtual bool visit_bool_literal(bool_literal*); }; \ No newline at end of file diff --git a/ast/nasal_new_ast.cpp b/ast/nasal_new_ast.cpp index 5bf617f..c34a1fe 100644 --- a/ast/nasal_new_ast.cpp +++ b/ast/nasal_new_ast.cpp @@ -1,27 +1,30 @@ #include "nasal_new_ast.h" #include "ast_visitor.h" -bool expr::accept(ast_visitor* visitor) { +void expr::accept(ast_visitor* visitor) { visitor->visit_expr(this); - return true; } -bool null_expr::accept(ast_visitor* visitor) { +void null_expr::accept(ast_visitor* visitor) { visitor->visit_null_expr(this); - return true; } -bool nil_expr::accept(ast_visitor* visitor) { +void nil_expr::accept(ast_visitor* visitor) { visitor->visit_nil_expr(this); - return true; } -bool number_literal::accept(ast_visitor* visitor) { +void number_literal::accept(ast_visitor* visitor) { visitor->visit_number_literal(this); - return true; } -bool string_literal::accept(ast_visitor* visitor) { +void string_literal::accept(ast_visitor* visitor) { visitor->visit_string_literal(this); - return true; +} + +void identifier::accept(ast_visitor* visitor) { + visitor->visit_identifier(this); +} + +void bool_literal::accept(ast_visitor* visitor) { + visitor->visit_bool_literal(this); } \ No newline at end of file diff --git a/ast/nasal_new_ast.h b/ast/nasal_new_ast.h index 8121270..1a6712c 100644 --- a/ast/nasal_new_ast.h +++ b/ast/nasal_new_ast.h @@ -83,7 +83,7 @@ public: expr(const span& location, expr_type node_type): nd_loc(location), nd_type(node_type) {} ~expr() = default; - virtual bool accept(ast_visitor*) = 0; + virtual void accept(ast_visitor*) = 0; }; class null_expr:public expr { @@ -91,7 +91,7 @@ public: null_expr(const span& location): expr(location, expr_type::ast_null) {} ~null_expr() = default; - virtual bool accept(ast_visitor*); + virtual void accept(ast_visitor*) override; }; class nil_expr:public expr { @@ -99,7 +99,7 @@ public: nil_expr(const span& location): expr(location, expr_type::ast_nil) {} ~nil_expr() = default; - virtual bool accept(ast_visitor*); + virtual void accept(ast_visitor*) override; }; class number_literal:public expr { @@ -110,7 +110,7 @@ public: number_literal(const span& location, const f64 num): expr(location, expr_type::ast_num), number(num) {} ~number_literal() = default; - virtual bool accept(ast_visitor*); + virtual void accept(ast_visitor*) override; }; class string_literal:public expr { @@ -121,57 +121,147 @@ public: string_literal(const span& location, const string& str): expr(location, expr_type::ast_str), content(str) {} ~string_literal() = default; - virtual bool accept(ast_visitor*); + virtual void accept(ast_visitor*) override; }; -class identifier:public expr {}; +class identifier:public expr { +private: + string name; -class bool_literal:public expr {}; +public: + identifier(const span& location, const string& str): + expr(location, expr_type::ast_id), name(str) {} + ~identifier() = default; + virtual void accept(ast_visitor*) override; +}; -class vector_expr:public expr {}; +class bool_literal:public expr { +private: + bool flag; -class hash_expr:public expr {}; +public: + bool_literal(const span& location, const bool bool_flag): + expr(location, expr_type::ast_bool), flag(bool_flag) {} + ~bool_literal() = default; + virtual void accept(ast_visitor*) override; +}; -class hash_pair:public expr {}; +class vector_expr:public expr { +public: + virtual void accept(ast_visitor*) override; +}; -class function:public expr {}; +class hash_expr:public expr { +public: + virtual void accept(ast_visitor*) override; +}; -class parameter:public expr {}; +class hash_pair:public expr { +public: + virtual void accept(ast_visitor*) override; +}; -class ternary_operator:public expr {}; +class function:public expr { +public: + virtual void accept(ast_visitor*) override; +}; -class binary_operator:public expr {}; +class parameter:public expr { +public: + virtual void accept(ast_visitor*) override; +}; -class unary_operator:public expr {}; +class ternary_operator:public expr { +public: + virtual void accept(ast_visitor*) override; +}; -class call_expr:public expr {}; +class binary_operator:public expr { +public: + virtual void accept(ast_visitor*) override; +}; -class call_hash:public expr {}; +class unary_operator:public expr { +public: + virtual void accept(ast_visitor*) override; +}; -class call_vector:public expr {}; +class call_expr:public expr { +public: + virtual void accept(ast_visitor*) override; +}; -class call_function:public expr {}; +class call_hash:public expr { +public: + virtual void accept(ast_visitor*) override; +}; -class slice_vector:public expr {}; +class call_vector:public expr { +public: + virtual void accept(ast_visitor*) override; +}; -class definition:public expr {}; +class call_function:public expr { +public: + virtual void accept(ast_visitor*) override; +}; -class multi_define:public expr {}; +class slice_vector:public expr { +public: + virtual void accept(ast_visitor*) override; +}; -class while_expr:public expr {}; +class definition:public expr { +public: + virtual void accept(ast_visitor*) override; +}; -class for_expr:public expr {}; +class multi_define:public expr { +public: + virtual void accept(ast_visitor*) override; +}; -class foreach_expr:public expr {}; +class while_expr:public expr { +public: + virtual void accept(ast_visitor*) override; +}; -class forindex_expr:public expr {}; +class for_expr:public expr { +public: + virtual void accept(ast_visitor*) override; +}; -class condition_expr:public expr {}; +class foreach_expr:public expr { +public: + virtual void accept(ast_visitor*) override; +}; -class if_expr:public expr {}; +class forindex_expr:public expr { +public: + virtual void accept(ast_visitor*) override; +}; -class continue_expr:public expr {}; +class condition_expr:public expr { +public: + virtual void accept(ast_visitor*) override; +}; -class break_expr:public expr {}; +class if_expr:public expr { +public: + virtual void accept(ast_visitor*) override; +}; -class return_expr:public expr {}; +class continue_expr:public expr { +public: + virtual void accept(ast_visitor*) override; +}; + +class break_expr:public expr { +public: + virtual void accept(ast_visitor*) override; +}; + +class return_expr:public expr { +public: + virtual void accept(ast_visitor*) override; +}; diff --git a/ast/nasal_new_lexer.cpp b/ast/nasal_new_lexer.cpp new file mode 100644 index 0000000..a476a91 --- /dev/null +++ b/ast/nasal_new_lexer.cpp @@ -0,0 +1,333 @@ +#ifdef _MSC_VER +#pragma warning (disable:4244) +#pragma warning (disable:4267) +#pragma warning (disable:4102) +#endif + +#include "nasal_new_lexer.h" + +bool lexer::skip(char c) { + return c==' '||c=='\n'||c=='\t'||c=='\r'||c==0; +} + +bool lexer::is_id(char c) { + return (c=='_')||('a'<=c && c<='z')||('A'<=c&&c<='Z')||(c<0); +} + +bool lexer::is_hex(char c) { + return ('0'<=c&&c<='9')||('a'<=c&&c<='f')||('A'<=c && c<='F'); +} + +bool lexer::is_oct(char c) { + return '0'<=c&&c<='7'; +} + +bool lexer::is_dec(char c) { + return '0'<=c&&c<='9'; +} + +bool lexer::is_str(char c) { + return c=='\''||c=='\"'||c=='`'; +} + +bool lexer::is_single_opr(char c) { + return ( + c=='('||c==')'||c=='['||c==']'|| + c=='{'||c=='}'||c==','||c==';'|| + c==':'||c=='?'||c=='`'||c=='@'|| + c=='%'||c=='$'||c=='\\' + ); +} + +bool lexer::is_calc_opr(char c) { + return ( + c=='='||c=='+'||c=='-'||c=='*'|| + c=='!'||c=='/'||c=='<'||c=='>'|| + c=='~'||c=='|'||c=='&'||c=='^' + ); +} + +void lexer::skip_note() { + // avoid note, after this process ptr will point to a '\n', so next loop line counter+1 + while(++ptr is not a regular file"); + err.chkerr(); + } + + // load + filename=file; + std::ifstream in(file, std::ios::binary); + if (in.fail()) { + err.err("lexer", "failed to open <"+file+">"); + } else { + err.load(file); + } + std::stringstream ss; + ss<"); + err.fatal("lexer", "fatal error occurred, stop"); + } + str+=tmp; + column+=2; // may have some problems because not all the unicode takes 2 space + } + return str; +} + +token lexer::id_gen() { + u32 begin_line=line; + u32 begin_column=column; + string str=""; + while(ptr [0~9][0~9]*(.[0~9]*)(e|E(+|-)0|[1~9][0~9]*) + string str=""; + while(ptr=res.size()) { + err.err("lexer", {begin_line, begin_column, line, column, filename}, "get EOF when generating string"); + return {{begin_line, begin_column, line, column, filename}, tok::str, str}; + } + ++column; + if (begin=='`' && str.length()!=1) { + err.err("lexer", {begin_line, begin_column, line, column, filename}, "\'`\' is used for string including one character"); + } + return {{begin_line, begin_column, line, column, filename}, tok::str, str}; +} + +token lexer::single_opr() { + u32 begin_line=line; + u32 begin_column=column; + string str(1,res[ptr]); + ++column; + tok type=get_type(str); + if (type==tok::null) { + err.err("lexer", {begin_line, begin_column, line, column, filename}, "invalid operator `"+str+"`"); + } + ++ptr; + return {{begin_line, begin_column, line, column, filename}, type, str}; +} + +token lexer::dots() { + u32 begin_line=line; + u32 begin_column=column; + string str="."; + if (ptr+2=res.size()) { + break; + } + if (is_id(res[ptr])) { + toks.push_back(id_gen()); + } else if (is_dec(res[ptr])) { + toks.push_back(num_gen()); + } else if (is_str(res[ptr])) { + toks.push_back(str_gen()); + } else if (is_single_opr(res[ptr])) { + toks.push_back(single_opr()); + } else if (res[ptr]=='.') { + toks.push_back(dots()); + } else if (is_calc_opr(res[ptr])) { + toks.push_back(calc_opr()); + } else if (res[ptr]=='#') { + skip_note(); + } else { + err_char(); + } + } + toks.push_back({{line, column, line, column, filename}, tok::eof, ""}); + res=""; + return err; +} diff --git a/ast/nasal_new_lexer.h b/ast/nasal_new_lexer.h new file mode 100644 index 0000000..0f4adfd --- /dev/null +++ b/ast/nasal_new_lexer.h @@ -0,0 +1,181 @@ +#pragma once + +#ifdef _MSC_VER +#pragma warning (disable:4244) +#pragma warning (disable:4267) +#pragma warning (disable:4102) +#endif + +#include +#include +#include +#include +#include + +#include "nasal.h" +#include "nasal_err.h" + +#ifdef _MSC_VER +#define S_ISREG(m) (((m)&0xF000)==0x8000) +#endif + +enum class tok:u32 { + null=0, // null token (default token type) + num, // number literal + str, // string literal + id, // identifier + tktrue, // keyword true + tkfalse, // keyword false + rfor, // loop keyword for + forindex, // loop keyword forindex + foreach, // loop keyword foreach + rwhile, // loop keyword while + var, // keyword for definition + func, // keyword for definition of function + brk, // loop keyword break + cont, // loop keyword continue + ret, // function keyword return + rif, // condition expression keyword if + elsif, // condition expression keyword elsif + relse, // condition expression keyword else + tknil, // nil literal + lcurve, // ( + rcurve, // ) + lbracket, // [ + rbracket, // ] + lbrace, // { + rbrace, // } + semi, // ; + opand, // operator and + opor, // operator or + comma, // , + dot, // . + ellipsis, // ... + quesmark, // ? + colon, // : + add, // operator + + sub, // operator - + mult, // operator * + div, // operator / + floater, // operator ~ and binary operator ~ + btand, // bitwise operator & + btor, // bitwise operator | + btxor, // bitwise operator ^ + opnot, // operator ! + eq, // operator = + addeq, // operator += + subeq, // operator -= + multeq, // operator *= + diveq, // operator /= + lnkeq, // operator ~= + btandeq, // operator &= + btoreq, // operator |= + btxoreq, // operator ^= + cmpeq, // operator == + neq, // operator != + less, // operator < + leq, // operator <= + grt, // operator > + geq, // operator >= + eof // end of token list +}; + +struct token { + span loc; // location + tok type; // token type + string str; // content + token() = default; + token(const token&) = default; +}; + +class lexer { +private: + u32 line; + u32 column; + usize ptr; + string filename; + string res; + error& err; + std::vector toks; + const std::unordered_map typetbl { + {"true" ,tok::tktrue }, + {"false" ,tok::tkfalse }, + {"for" ,tok::rfor }, + {"forindex",tok::forindex}, + {"foreach" ,tok::foreach }, + {"while" ,tok::rwhile }, + {"var" ,tok::var }, + {"func" ,tok::func }, + {"break" ,tok::brk }, + {"continue",tok::cont }, + {"return" ,tok::ret }, + {"if" ,tok::rif }, + {"elsif" ,tok::elsif }, + {"else" ,tok::relse }, + {"nil" ,tok::tknil }, + {"(" ,tok::lcurve }, + {")" ,tok::rcurve }, + {"[" ,tok::lbracket}, + {"]" ,tok::rbracket}, + {"{" ,tok::lbrace }, + {"}" ,tok::rbrace }, + {";" ,tok::semi }, + {"and" ,tok::opand }, + {"or" ,tok::opor }, + {"," ,tok::comma }, + {"." ,tok::dot }, + {"..." ,tok::ellipsis}, + {"?" ,tok::quesmark}, + {":" ,tok::colon }, + {"+" ,tok::add }, + {"-" ,tok::sub }, + {"*" ,tok::mult }, + {"/" ,tok::div }, + {"~" ,tok::floater }, + {"&" ,tok::btand }, + {"|" ,tok::btor }, + {"^" ,tok::btxor }, + {"!" ,tok::opnot }, + {"=" ,tok::eq }, + {"+=" ,tok::addeq }, + {"-=" ,tok::subeq }, + {"*=" ,tok::multeq }, + {"/=" ,tok::diveq }, + {"~=" ,tok::lnkeq }, + {"&=" ,tok::btandeq }, + {"|=" ,tok::btoreq }, + {"^=" ,tok::btxoreq }, + {"==" ,tok::cmpeq }, + {"!=" ,tok::neq }, + {"<" ,tok::less }, + {"<=" ,tok::leq }, + {">" ,tok::grt }, + {">=" ,tok::geq } + }; + + tok get_type(const string&); + bool skip(char); + bool is_id(char); + bool is_hex(char); + bool is_oct(char); + bool is_dec(char); + bool is_str(char); + bool is_single_opr(char); + bool is_calc_opr(char); + + void skip_note(); + void err_char(); + + void open(const string&); + string utf8_gen(); + token id_gen(); + token num_gen(); + token str_gen(); + token single_opr(); + token dots(); + token calc_opr(); +public: + lexer(error& e): line(1), column(0), ptr(0), filename(""), res(""), err(e) {} + const error& scan(const string&); + const std::vector& result() const {return toks;} +}; diff --git a/ast/new_main.cpp b/ast/new_main.cpp new file mode 100644 index 0000000..aef7268 --- /dev/null +++ b/ast/new_main.cpp @@ -0,0 +1,176 @@ +#include "nasal.h" +#include "nasal_err.h" +#include "nasal_lexer.h" +#include "nasal_ast.h" +#include "nasal_parse.h" +#include "nasal_import.h" +#include "nasal_opt.h" +#include "nasal_gc.h" +#include "nasal_builtin.h" +#include "nasal_codegen.h" +#include "nasal_vm.h" +#include "nasal_dbg.h" + +#include + +const u32 VM_AST =0x01; +const u32 VM_CODE =0x02; +const u32 VM_TIME =0x04; +const u32 VM_EXEC =0x08; +const u32 VM_DETAIL=0x10; +const u32 VM_DEBUG =0x20; + +std::ostream& help(std::ostream& out) { + out + <<" ,--#-,\n" + <<"<3 / \\____\\ <3\n" + <<" |_|__A_|\n" +#ifdef _WIN32 + <<"use command to use unicode.\n" +#endif + <<"\nnasal