From 91649607bf8f53574f2501ee18f16b3d8467f7b0 Mon Sep 17 00:00:00 2001 From: Valk Richard Li <48872266+ValKmjolnir@users.noreply.github.com> Date: Tue, 10 Dec 2019 13:51:33 +0800 Subject: [PATCH] refactor --- version2.0/abstract_syntax_tree.h | 255 +++++++++++++ version2.0/main.cpp | 87 +++++ version2.0/nasal.h | 18 + version2.0/nasal_enum.h | 144 ++++++++ version2.0/nasal_lexer.h | 593 ++++++++++++++++++++++++++++++ version2.0/nasal_parse.h | 95 +++++ 6 files changed, 1192 insertions(+) create mode 100644 version2.0/abstract_syntax_tree.h create mode 100644 version2.0/main.cpp create mode 100644 version2.0/nasal.h create mode 100644 version2.0/nasal_enum.h create mode 100644 version2.0/nasal_lexer.h create mode 100644 version2.0/nasal_parse.h diff --git a/version2.0/abstract_syntax_tree.h b/version2.0/abstract_syntax_tree.h new file mode 100644 index 0000000..2e7ebe0 --- /dev/null +++ b/version2.0/abstract_syntax_tree.h @@ -0,0 +1,255 @@ +#ifndef __ABSTRACT_SYNTAX_TREE_H__ +#define __ABSTRACT_SYNTAX_TREE_H__ + +class abstract_syntax_tree +{ + private: + int line; + int type; + double number; + std::string str; + std::string name; + std::list children; + public: + // basic + abstract_syntax_tree(); + abstract_syntax_tree(const abstract_syntax_tree&); + ~abstract_syntax_tree(); + abstract_syntax_tree& operator=(const abstract_syntax_tree&); + + // main functions + void set_clear(); + void print_tree(const int); + void set_type(const int); + void set_line(const int); + void set_string(std::string); + void set_number(std::string); + void set_name(std::string); + void add_child(abstract_syntax_tree); + int get_type(); + int get_line(); + double get_number(); + std::string get_string(); + std::string get_name(); + std::list& get_children(); +}; + +abstract_syntax_tree::abstract_syntax_tree() +{ + type=0; + line=0; + number=0; + str=""; + name=""; + children.clear(); + return; +} + +abstract_syntax_tree::abstract_syntax_tree(const abstract_syntax_tree& p) +{ + type=p.type; + line=p.line; + number=p.number; + str=p.str; + name=p.name; + children=p.children; + return; +} + +abstract_syntax_tree::~abstract_syntax_tree() +{ + children.clear(); + return; +} + +abstract_syntax_tree& abstract_syntax_tree::operator=(const abstract_syntax_tree& p) +{ + type=p.type; + line=p.line; + number=p.number; + str=p.str; + name=p.name; + children.clear(); + children=p.children; + return *this; +} + +void abstract_syntax_tree::set_clear() +{ + type=0; + line=0; + number=0; + str=""; + name=""; + children.clear(); + return; +} + +void abstract_syntax_tree::print_tree(const int n) +{ + std::string __str=""; + for(int i=0;iprint_tree(n+1); + } + return; +} + +void abstract_syntax_tree::set_type(const int __type) +{ + type=__type; + return; +} + +void abstract_syntax_tree::set_line(const int __line) +{ + line=__line; + return; +} + +void abstract_syntax_tree::set_string(std::string __str) +{ + str=__str; + return; +} + +void abstract_syntax_tree::set_number(std::string _str) +{ + bool is_negative=false; + if(_str.length()>1 && _str[0]=='-') + { + // this statements only used in "input" function + // but in parse this statements are useless + // because lexer recognizes a number that begins with a '0'~'9' char + std::string temp=""; + for(int i=1;i<_str.length();++i) + temp+=_str[i]; + _str=temp; + is_negative=true; + } + if((int)_str.length()>2 && (_str[1]=='x' || _str[1]=='o')) + { + double num=0; + double pw=1; + if(_str[1]=='x') // hex + for(int i=(int)_str.length()-1;i>1;--i) + { + if('0'<=_str[i] && _str[i]<='9') + num+=(_str[i]-'0')*pw; + else if('a'<=_str[i] && _str[i]<='f') + num+=(10+_str[i]-'a')*pw; + else if('A'<=_str[i] && _str[i]<='F') + num+=(10+_str[i]-'A')*pw; + pw*=16; + } + else // oct + for(int i=(int)_str.length()-1;i>1;--i) + { + num+=(_str[i]-'0')*pw; + pw*=8; + } + number=num; + if(is_negative) + number*=-1; + return; + } + int dot_place=-1; + for(int i=0;i<(int)_str.length();++i) + if(_str[i]=='.') + { + dot_place=i; + break; + } + if(dot_place==-1) + { + // integer + number=0; + double pw=1; + for(int i=(int)_str.length()-1;i>=0;--i) + { + number+=(_str[i]-'0')*pw; + pw*=10; + } + if(is_negative) + number*=-1; + } + else + { + // float + number=0; + double pw=0.1; + for(int i=dot_place+1;i<(int)_str.length();++i) + { + number+=(_str[i]-'0')*pw; + pw/=10; + } + pw=1; + for(int i=dot_place-1;i>=0;--i) + { + number+=(_str[i]-'0')*pw; + pw*=10; + } + if(is_negative) + number*=-1; + } + return; +} + +void abstract_syntax_tree::set_name(std::string __str) +{ + name=__str; + return; +} + +void abstract_syntax_tree::add_child(abstract_syntax_tree p) +{ + children.push_back(p); + return; +} + +int abstract_syntax_tree::get_type() +{ + return type; +} + +int abstract_syntax_tree::get_line() +{ + return line; +} + +double abstract_syntax_tree::get_number() +{ + return number; +} + +std::string abstract_syntax_tree::get_string() +{ + return str; +} + +std::string abstract_syntax_tree::get_name() +{ + return name; +} + +std::list& abstract_syntax_tree::get_children() +{ + return children; +} +#endif diff --git a/version2.0/main.cpp b/version2.0/main.cpp new file mode 100644 index 0000000..63470ec --- /dev/null +++ b/version2.0/main.cpp @@ -0,0 +1,87 @@ +#include "nasal.h" + +resource_file res; +nasal_lexer lexer; +nasal_parse parser; + +std::string command; + +int main() +{ + std::cout<<">> Nasal interpreter by github:ValKmjolnir"<> Input \"help\" to get help."<> "; + std::cin>>command; + if(command=="help") + { + std::cout<<">> [\'file\'] input a file."<> [cls ] clear the screen."<> [del ] clear the resource code."<> [lib ] add lib file."<> [rs ] print resource code."<> [total ] print resource code with lib code."<> [lexer ] turn code into tokens."<> [parser] turn tokens into abstract syntax tree."<> [ast ] check the abstract syntax tree."<> [exit ] quit nasal interpreter."<> [Delete] complete."<> [Lib] loaded."<>[Lexer] error occurred,stop."<>[Lexer] error occurred,stop."< +#include +#include +#include +#include +//#include +#include +#include + +#include "nasal_enum.h" +#include "abstract_syntax_tree.h" +#include "nasal_lexer.h" +#include "nasal_parse.h" + +#endif diff --git a/version2.0/nasal_enum.h b/version2.0/nasal_enum.h new file mode 100644 index 0000000..0a097bc --- /dev/null +++ b/version2.0/nasal_enum.h @@ -0,0 +1,144 @@ +#ifndef __NASAL_ENUM_H__ +#define __NASAL_ENUM_H__ + +enum lexer_token_type +{ + __token_reserve_word=1, + __token_identifier, + __token_number, + __token_string, + __token_operator, +}; +void print_lexer_token(int type) +{ + switch(type) + { + case __token_reserve_word:std::cout<<"reserve word";break; + case __token_identifier: std::cout<<"identifier ";break; + case __token_number: std::cout<<"number ";break; + case __token_string: std::cout<<"string ";break; + case __token_operator: std::cout<<"operator ";break; + } + return; +} + +enum parse_token_type +{ + __stack_end=1, + __cmp_equal,__cmp_not_equal,__cmp_less,__cmp_less_or_equal,__cmp_more,__cmp_more_or_equal, + // == != < <= > >= + __and_operator,__or_operator,__nor_operator,__add_operator,__sub_operator,__mul_operator,__div_operator,__link_operator, + // and or ! + - * / ~ + __equal,__add_equal,__sub_equal,__mul_equal,__div_equal,__link_equal, + // = += -= *= /= ~= + __left_brace,__right_brace, // {} + __left_bracket,__right_bracket, // [] + __left_curve,__right_curve, // () + __semi,__comma,__colon,__dot,__ques_mark, // ; , : . ? + __unknown_operator, + // operators + __var,__func,__return,__nil, + __if,__elsif,__else, + __continue,__break, + __for,__forindex,__foreach,__while, + // reserve words + __number,__string,__id,__dynamic_id, + // basic scalar type: number string identifier dynamic_identifier + + // absttract_syntax_tree type below + __root, + __null_type, + __list,__hash, + __hash_member, + __call_function,__call_array,__call_hash, + __normal_statement_block, + __definition,__assignment, + __function,__loop,__ifelse +}; + +void print_parse_token(int type) +{ + std::string context=""; + switch(type) + { + case __stack_end: context="#"; break; + + case __cmp_equal: context="=="; break; + case __cmp_not_equal: context="!="; break; + case __cmp_less: context="<"; break; + case __cmp_less_or_equal: context="<="; break; + case __cmp_more: context=">"; break; + case __cmp_more_or_equal: context=">="; break; + + case __and_operator: context="and";break; + case __or_operator: context="or"; break; + case __nor_operator: context="!"; break; + case __add_operator: context="+"; break; + case __sub_operator: context="-"; break; + case __mul_operator: context="*"; break; + case __div_operator: context="/"; break; + case __link_operator: context="~"; break; + + case __equal: context="="; break; + case __add_equal: context="+="; break; + case __sub_equal: context="-="; break; + case __mul_equal: context="*="; break; + case __div_equal: context="/="; break; + case __link_equal: context="~="; break; + + case __left_brace: context="{"; break; + case __right_brace: context="}"; break; + case __left_bracket: context="["; break; + case __right_bracket: context="]"; break; + case __left_curve: context="("; break; + case __right_curve: context=")"; break; + + case __semi: context=";"; break; + case __comma: context=","; break; + case __colon: context=":"; break; + case __dot: context="."; break; + case __ques_mark: context="?"; break; + + case __unknown_operator: context="unknown_operator";break; + + case __var: context="var"; break; + case __func: context="func"; break; + case __continue: context="continye"; break; + case __break: context="break"; break; + case __for: context="for"; break; + case __forindex: context="forindex"; break; + case __foreach: context="foreach"; break; + case __while: context="while"; break; + case __if: context="if"; break; + case __elsif: context="elsif"; break; + case __else: context="else"; break; + case __return: context="return"; break; + case __nil: context="nil"; break; + + case __id: context="identifier"; break; + case __dynamic_id: context="identifier...";break; + case __number: context="number"; break; + case __string: context="string"; break; + + case __root: context="root"; break; + case __null_type: context="null_type"; break; + case __list: context="list"; break; + case __hash: context="hash"; break; + case __hash_member: context="hash_member";break; + case __call_function: context="call_func"; break; + case __call_array: context="call_array"; break; + case __call_hash: context="call_hash"; break; + case __normal_statement_block:context="block"; break; + case __definition: context="definition"; break; + case __assignment: context="assignment"; break; + case __function: context="function"; break; + case __loop: context="loop"; break; + case __ifelse: context="if-else"; break; + + default: context="undefined_token";break; + } + std::cout< >= < <= ('and' 'or' are operators too but they are recognized as operator in generate_detail_token()) + () [] {} ; , . : ? + others: __unknown_operator +*/ + +std::string reserve_word[15]= +{ + "for","foreach","forindex","while", + "var","func","break","continue","return", + "if","else","elsif","and","or","nil" +}; + +int is_reserve_word(std::string str) +{ + for(int i=0;i<15;++i) + if(reserve_word[i]==str) + return __token_reserve_word; + return __token_identifier; +} + +bool check_number(std::string str) +{ + if(str.length()>1 && str[0]=='-') + { + // this statements only used in "input" function + // but in lexer this statements are useless + // because lexer judge a number that begins with 0~9 (or 0x for hex & 0o for oct) + std::string temp=""; + for(int i=1;i=3 && str[0]=='0' && str[1]=='x') + { + for(int i=2;i=3 && str[0]=='0' && str[1]=='o') + { + for(int i=2;i1) + return false; + if(str[0]=='.') + return false; + if(!dotcnt && str[0]=='0') + return false; + return true; + } + return false; +} + +class resource_file +{ + private: + std::list libsource; + std::list resource; + std::list total; + public: + /* + resource_file(); + ~resource_file(); + void delete_all_source(); + void input_file(std::string); + void load_lib_file(); + std::list& get_source(); + void print_resource(bool); + */ + resource_file() + { + libsource.clear(); + resource.clear(); + total.clear(); + return; + } + ~resource_file() + { + libsource.clear(); + resource.clear(); + total.clear(); + return; + } + void delete_all_source() + { + libsource.clear(); + resource.clear(); + total.clear(); + return; + } + void input_file(std::string filename) + { + char c=0; + std::ifstream fin(filename,std::ios::binary); + if(fin.fail()) + { + std::cout<<">>[Resource] cannot open file \'"<& get_source() + { + total.clear(); + for(std::list::iterator i=libsource.begin();i!=libsource.end();++i) + total.push_back(*i); + for(std::list::iterator i=resource.begin();i!=resource.end();++i) + total.push_back(*i); + return total; + } + void print_resource(bool withlib) + { + std::list tmp; + if(withlib) + for(std::list::iterator i=libsource.begin();i!=libsource.end();++i) + tmp.push_back(*i); + for(std::list::iterator i=resource.begin();i!=resource.end();++i) + tmp.push_back(*i); + + int line=1; + std::cout<::iterator i=tmp.begin();i!=tmp.end();++i) + { + if(32<=*i && *i<128) + std::cout<<*i; + else + std::cout<<" "; + if(*i=='\n') + { + ++line; + std::cout< token_list; + std::list detail_token; + int error; + public: + /* + nasal_lexer(); + ~nasal_lexer(); + void print_token_list(); + void scanner(std::list&); + void generate_detail_token(); + int get_error(); + std::list& get_detail_token(); + */ + nasal_lexer() + { + token_list.clear(); + detail_token.clear(); + error=0; + return; + } + ~nasal_lexer() + { + token_list.clear(); + detail_token.clear(); + return; + } + void print_token_list() + { + for(std::list::iterator i=token_list.begin();i!=token_list.end();++i) + { + std::cout<<"line "<line<<" ( "; + print_lexer_token(i->type); + std::cout<<" | "<str<<" )"<& res) + { + token_list.clear(); + detail_token.clear(); + error=0; + + int line=1; + std::string token_str; + std::list::iterator ptr=res.begin(); + while(ptr!=res.end()) + { + while(*ptr==' ' || *ptr=='\n' || *ptr=='\t' || *ptr=='\r' || *ptr<0 || *ptr>127) + { + if(*ptr=='\n') + ++line; + ++ptr; + if(ptr==res.end()) + break; + } + if(ptr==res.end()) + break; + if(*ptr=='_' || ('a'<=*ptr && *ptr<='z') || ('A'<=*ptr && *ptr<='Z')) + { + // get identifier or reserve word + token_str=""; + while(*ptr=='_' || ('a'<=*ptr && *ptr<='z') || ('A'<=*ptr && *ptr<='Z') || ('0'<=*ptr && *ptr<='9')) + { + token_str+=*ptr; + ++ptr; + if(ptr==res.end()) + break; + } + // check dynamic identifier "..." + if(*ptr=='.') + { + ++ptr; + if(ptr!=res.end() && *ptr=='.') + { + ++ptr; + if(ptr!=res.end() && *ptr=='.') + { + token_str+="..."; + ++ptr; + } + else + { + --ptr; + --ptr; + } + } + else + --ptr; + } + token new_token; + new_token.line=line; + new_token.type=is_reserve_word(token_str); + new_token.str=token_str; + token_list.push_back(new_token); + if(ptr==res.end()) + break; + } + else if('0'<=*ptr && *ptr<='9') + { + token_str=""; + while(('0'<=*ptr && *ptr<='9') || ('a'<=*ptr && *ptr<='f') || ('A'<=*ptr && *ptr<='F') || *ptr=='.' || *ptr=='x' || *ptr=='o') + { + token_str+=*ptr; + ++ptr; + if(ptr==res.end()) + break; + } + if(!check_number(token_str)) + { + ++error; + std::cout<<">>[Lexer-error] line "<>[Lexer-error] line "<' || *ptr=='~') + { + // get calculation operator + token_str=""; + token_str+=*ptr; + ++ptr; + if(ptr!=res.end() && *ptr=='=') + { + token_str+=*ptr; + ++ptr; + } + token new_token; + new_token.line=line; + new_token.type=__token_operator; + new_token.str=token_str; + token_list.push_back(new_token); + if(ptr==res.end()) + break; + } + else if(*ptr=='#') + { + // avoid note + while(ptr!=res.end() && *ptr!='\n') + ++ptr; + if(ptr==res.end()) + break; + } + else + { + ++error; + std::cout<<">>[Lexer-error] line "<>[Pre-lexer] complete scanning. "<::iterator i=token_list.begin();i!=token_list.end();++i) + { + if(i->type==__token_number) + { + detail.line=i->line; + detail.str =i->str; + detail.type=__number; + detail_token.push_back(detail); + } + else if(i->type==__token_string) + { + detail.line=i->line; + detail.str =i->str; + detail.type=__string; + detail_token.push_back(detail); + } + else if(i->type==__token_reserve_word) + { + detail.line=i->line; + detail.str=i->str; + if(i->str=="for") + detail.type=__for; + else if(i->str=="foreach") + detail.type=__foreach; + else if(i->str=="forindex") + detail.type=__forindex; + else if(i->str=="while") + detail.type=__while; + else if(i->str=="var") + detail.type=__var; + else if(i->str=="func") + detail.type=__func; + else if(i->str=="break") + detail.type=__break; + else if(i->str=="continue") + detail.type=__continue; + else if(i->str=="return") + detail.type=__return; + else if(i->str=="if") + detail.type=__if; + else if(i->str=="else") + detail.type=__else; + else if(i->str=="elsif") + detail.type=__elsif; + else if(i->str=="nil") + detail.type=__nil; + else if(i->str=="and") + detail.type=__and_operator; + else if(i->str=="or") + detail.type=__or_operator; + detail_token.push_back(detail); + } + else if(i->type==__token_identifier) + { + detail.line=i->line; + detail.str=i->str; + if(i->str.length()<=3) + detail.type=__id; + else + { + std::string tempstr=i->str; + int strback=tempstr.length()-1; + if(tempstr.length()>3 &&tempstr[strback]=='.' && tempstr[strback-1]=='.' && tempstr[strback-2]=='.') + detail.type=__dynamic_id; + else + detail.type=__id; + } + detail_token.push_back(detail); + } + else if(i->type==__token_operator) + { + detail.line=i->line; + detail.str=i->str; + if(i->str=="+") + detail.type=__add_operator; + else if(i->str=="-") + detail.type=__sub_operator; + else if(i->str=="*") + detail.type=__mul_operator; + else if(i->str=="/") + detail.type=__div_operator; + else if(i->str=="~") + detail.type=__link_operator; + else if(i->str=="+=") + detail.type=__add_equal; + else if(i->str=="-=") + detail.type=__sub_equal; + else if(i->str=="*=") + detail.type=__mul_equal; + else if(i->str=="/=") + detail.type=__div_equal; + else if(i->str=="~=") + detail.type=__link_equal; + else if(i->str=="=") + detail.type=__equal; + else if(i->str=="==") + detail.type=__cmp_equal; + else if(i->str=="!=") + detail.type=__cmp_not_equal; + else if(i->str=="<") + detail.type=__cmp_less; + else if(i->str=="<=") + detail.type=__cmp_less_or_equal; + else if(i->str==">") + detail.type=__cmp_more; + else if(i->str==">=") + detail.type=__cmp_more_or_equal; + else if(i->str==";") + detail.type=__semi; + else if(i->str==".") + detail.type=__dot; + else if(i->str==":") + detail.type=__colon; + else if(i->str==",") + detail.type=__comma; + else if(i->str=="?") + detail.type=__ques_mark; + else if(i->str=="!") + detail.type=__nor_operator; + else if(i->str=="[") + detail.type=__left_bracket; + else if(i->str=="]") + detail.type=__right_bracket; + else if(i->str=="(") + detail.type=__left_curve; + else if(i->str==")") + detail.type=__right_curve; + else if(i->str=="{") + detail.type=__left_brace; + else if(i->str=="}") + detail.type=__right_brace; + else + { + ++error; + std::cout<<">>[Lexer-error] line "<str<<"\'."<>[Lexer] complete generating. "<& get_detail_token() + { + return detail_token; + } +}; + +#endif diff --git a/version2.0/nasal_parse.h b/version2.0/nasal_parse.h new file mode 100644 index 0000000..7d7699e --- /dev/null +++ b/version2.0/nasal_parse.h @@ -0,0 +1,95 @@ +#ifndef __NASAL_PARSE_H__ +#define __NASAL_PARSE_H__ + +class nasal_parse +{ + private: + std::stack parse; + token this_token; + int error; + int warning; + abstract_syntax_tree root; + public: + // basic + void print_detail_token(); + void get_token_list(std::list&); + void get_token(); + + // abstract_syntax_tree generation + void main_generate(); +}; + +void nasal_parse::print_detail_token() +{ + std::stack tmp=parse; + std::string space=""; + int line=1; + std::cout<& detail_token) +{ + std::stack tmp; + for(std::list::iterator i=detail_token.begin();i!=detail_token.end();++i) + tmp.push(*i); + while(!tmp.empty()) + { + parse.push(tmp.top()); + tmp.pop(); + } + return; +} + +void nasal_parse::get_token() +{ + if(!parse.empty()) + { + this_token=parse.top(); + parse.pop(); + } + else + { + this_token.type=__stack_end; + this_token.str="__stack_end"; + } + return; +} + +void nasal_parse::main_generate() +{ + error=0; + warning=0; + root.set_clear(); + root.set_line(1); + root.set_type(__root); + + while(!parse.empty()) + { + get_token(); + } + return; +} + +#endif