From c0e5130dd358dfcd725d53291932a8a9e94cea9b Mon Sep 17 00:00:00 2001 From: Valk Richard Li <48872266+ValKmjolnir@users.noreply.github.com> Date: Sat, 31 Aug 2019 11:08:56 -0500 Subject: [PATCH] LR(1) may not do the work --- version0.10/a.txt | 2 + version0.10/main.cpp | 68 +++++ version0.10/nasal_lexer.h | 379 ++++++++++++++++++++++++ version0.10/nasal_parser.h | 571 +++++++++++++++++++++++++++++++++++++ 4 files changed, 1020 insertions(+) create mode 100644 version0.10/a.txt create mode 100644 version0.10/main.cpp create mode 100644 version0.10/nasal_lexer.h create mode 100644 version0.10/nasal_parser.h diff --git a/version0.10/a.txt b/version0.10/a.txt new file mode 100644 index 0000000..d96e3c7 --- /dev/null +++ b/version0.10/a.txt @@ -0,0 +1,2 @@ +var e=e+e+e+(e)+e+(e+e)*(e+e)~e-e/e+e/e+e/(e+e); + diff --git a/version0.10/main.cpp b/version0.10/main.cpp new file mode 100644 index 0000000..bf1d5ca --- /dev/null +++ b/version0.10/main.cpp @@ -0,0 +1,68 @@ +#include "nasal_lexer.h" +#include "nasal_parser.h" +#include +#include + +int main() +{ + resource_programme_process prog; + nasal_lexer lex; + nasal_parser pas; + std::string command; + std::cout<<">> nasal script interpreter by ValKmjolnir"<> input \"help\" to find help."<> Be careful: this interpreter uses strict methods to check if there's an error."<> "; + std::getline(std::cin,command); + if(command=="help") + { + std::cout<<">> nasal script interpreter by ValKmjolnir"<> 1. |input file name to load the file."<> 2. \"cls\" |clear the screen."<> 3. \"exit\" |shut down the interpreter."<> 4. \"lexer\" |run and show the lexer. (-lexer)"<> 5. \"parser\"|run parser. (-parser)"<> 6. \"del\" |delete all elements in stack."<> 7. \"run\" |run the programme in stack. (-lexer -parser)"<> 8. \"rs\" |check the source program."<>[Delete] Complete."< +#include +#include +#include + +#define OPERATOR 1 //operator +#define IDENTIFIER 2 //id +#define NUMBER 3 //number +#define RESERVEWORD 4 //reserve word +#define STRING 5 //string +#define CHAR 6 //char +#define FAIL -1 //fail +#define SCANEND -2 //complete scanning +#define ERRORFOUND -3 //error occurred + +std::string reserve_word[15]= +{ + "for","foreach","forindex","while", + "var","func","break","continue","return", + "if","else","elsif","nil","and","or" +}; + +int isReserveWord(std::string &p) +{ + for(int i=0;i<15;++i) + if(reserve_word[i]==p) + return i+1; + return FAIL; +} + +bool isLetter(char t) +{ + return (('a'<=t) && (t<='z') || ('A'<=t) && (t<='Z')); +} + +bool isNumber(char t) +{ + return (('0'<=t) && (t<='9')); +} + +bool isHex(char t) +{ + return ((('0'<=t) && (t<='9')) || (('a'<=t) && (t<='f'))); +} + +bool isOct(char t) +{ + return (('0'<=t) && (t<='7')); +} + +class resource_programme_process +{ + private: + char *resource; + public: + resource_programme_process() + { + resource=NULL; + resource=new char[16777216]; + } + ~resource_programme_process() + { + if(resource) + delete []resource; + } + char* use_file() + { + return resource; + } + void input_file(std::string& filename) + { + std::ifstream fin(filename); + if(fin.fail()) + { + std::cout<<">>[Error] Cannot load file: "< lexer; + public: + void scanner(int &syn,const char* source,std::string &__token,int &ptr,int &line) + { + char temp; + temp=source[ptr]; + while(temp==' ' || temp=='\n' || temp=='\t' || temp=='\r' || temp<0 || temp>127) + { + ++ptr; + if(temp=='\n') + ++line; + temp=source[ptr]; + } + __token=""; + if(isLetter(temp) || temp=='_') + { + __token+=temp; + ++ptr; + temp=source[ptr]; + while(isLetter(temp) || isNumber(temp) || temp=='_') + { + __token+=temp; + ++ptr; + temp=source[ptr]; + } + syn=isReserveWord(__token); + if(syn==FAIL) + syn=IDENTIFIER; + else + syn=RESERVEWORD; + } + else if(isNumber(temp)) + { + if((source[ptr]=='0') && (source[ptr+1]=='x')) + { + __token+=source[ptr]; + __token+=source[ptr+1]; + ptr+=2; + temp=source[ptr]; + while(isNumber(temp) || isHex(temp)) + { + __token+=temp; + ++ptr; + temp=source[ptr]; + } + } + else if((source[ptr]=='0') && (source[ptr+1]=='o')) + { + __token+=source[ptr]; + __token+=source[ptr+1]; + ptr+=2; + temp=source[ptr]; + while(isNumber(temp) || isOct(temp)) + { + __token+=temp; + ++ptr; + temp=source[ptr]; + } + } + else + { + int PointCnt=0; + while(isNumber(temp)) + { + __token+=temp; + ++ptr; + temp=source[ptr]; + if(temp=='.' && !PointCnt) + { + ++PointCnt; + __token+=temp; + ++ptr; + temp=source[ptr]; + } + } + } + syn=NUMBER; + } + else if(temp=='(' || temp==')' || temp=='[' || temp==']' || temp=='{' || + temp=='}' || temp==',' || temp==';' || temp=='|' || temp==':' || + temp=='?' || temp=='.' || temp=='`' || temp=='&'|| + temp=='%' || temp=='$' || temp=='^') + { + __token+=temp; + ++ptr; + syn=OPERATOR; + } + else if(temp=='\'') + { + __token+=temp; + ++ptr; + temp=source[ptr]; + __token+=temp; + ++ptr; + temp=source[ptr]; + __token+=temp; + ++ptr; + if(temp!='\'') + std::cout<<">>[Lexer] Abnormal char type detected: "<<__token<<" ."<' || temp=='~') + { + syn=OPERATOR; + __token+=temp; + ++ptr; + temp=source[ptr]; + if(temp=='=') + { + __token+=temp; + ++ptr; + } + } + else if(temp=='\\') + { + syn=OPERATOR; + __token+=temp; + ++ptr; + temp=source[ptr]; + if(temp=='=' || temp=='n' || temp=='t' || temp=='r' || temp=='\\' || temp=='\'' || temp=='\"') + { + __token+=temp; + ++ptr; + } + } + else if(temp=='\"') + { + syn=STRING; + __token+=temp; + ++ptr; + temp=source[ptr]; + while(temp!='\"') + { + if(temp=='\\') + { + __token+=temp; + + ++ptr; + temp=source[ptr]; + __token+=temp; + + ++ptr; + temp=source[ptr]; + } + else + { + __token+=temp; + ++ptr; + temp=source[ptr]; + } + if(temp==0 || temp=='\n') + break; + } + //add the last char \" + if(temp=='\"') + { + __token+=temp; + ++ptr; + } + else + __token+=" __missing_end_of_string"; + } + else if(temp==0) + { + syn=SCANEND; + return; + } + else + { + syn=FAIL; + std::cout<<">>[Error] Unexpected error occurred: "<>[Error] Cannot identify "<>[Lexer] max size: "<0)//all Syn type is larger than zero + { + temp.line=line; + temp.type=syn; + temp.content=__token; + lexer.push_back(temp); + } + } + std::cout<<">>[Lexer] Complete scanning."<::iterator i=lexer.begin();i!=lexer.end();++i) + { + temp=*i; + std::cout<<"line "<& return_list() + { + return lexer; + } +}; + + + + +#endif diff --git a/version0.10/nasal_parser.h b/version0.10/nasal_parser.h new file mode 100644 index 0000000..78b7943 --- /dev/null +++ b/version0.10/nasal_parser.h @@ -0,0 +1,571 @@ +#ifndef __NASAL_PARSER_H__ +#define __NASAL_PARSER_H__ + +#include + +enum token_type +{ + __stack_end=1, + __equal,// = + __cmp_equal,// == + __cmp_not_equal,// != + __cmp_less,__cmp_less_or_equal,// < <= + __cmp_more,__cmp_more_or_equal,// > >= + __and_operator,__or_operator,__nor_operator,// and or ! + __add_operator,__sub_operator,__mul_operator,__div_operator,__link_operator,// + - * / ~ + __add_equal,__sub_equal,__mul_equal,__div_equal,__link_equal,// += -= *= /= ~= + __left_brace,__right_brace,// {} + __left_bracket,__right_bracket,// [] + __left_curve,__right_curve,// () + __semi,// ; + __comma,// , + __colon,// : + __dot,// . + __var, + __func, + __id, + __return, + __if,__elsif,__else, + __continue,__break,__for,__forindex,__foreach,__while, + + __scalar, + __number,__string,__char, + __list, + __hash, + __hash_member, + __identifier, + __statement, + __function, + __call_function, + __definition, + __assignment, + __calculation,__add_calc,__mul_calc,__curve_calc, + __loop, + __choose, + __program +}; + +const int max_len=10; +struct cmp_seq +{ + int tokens[max_len]; + int res; +}; +// must put in a reverse way +cmp_seq par[]= +{ + {{__program,__statement}, __program}, + {{__calculation,__add_operator,__calculation}, __calculation}, + {{__calculation,__add_operator,__id}, __calculation}, + {{__id,__add_operator,__calculation}, __calculation}, + {{__id,__add_operator,__id}, __calculation}, + + {{__calculation,__sub_operator,__calculation}, __calculation}, + {{__calculation,__sub_operator,__id}, __calculation}, + {{__id,__sub_operator,__calculation}, __calculation}, + {{__id,__sub_operator,__id}, __calculation}, + + {{__calculation,__mul_operator,__calculation}, __calculation}, + {{__calculation,__mul_operator,__id}, __calculation}, + {{__id,__mul_operator,__calculation}, __calculation}, + {{__id,__mul_operator,__id}, __calculation}, + + {{__calculation,__div_operator,__calculation}, __calculation}, + {{__calculation,__div_operator,__id}, __calculation}, + {{__id,__div_operator,__calculation}, __calculation}, + {{__id,__div_operator,__id}, __calculation}, + + {{__calculation,__link_operator,__calculation}, __calculation}, + {{__calculation,__link_operator,__id}, __calculation}, + {{__id,__link_operator,__calculation}, __calculation}, + {{__id,__link_operator,__id}, __calculation}, + + {{__right_curve,__id,__left_curve}, __calculation}, + {{__right_curve,__calculation,__left_curve}, __calculation}, + {{__semi,__calculation,__equal,__id,__var}, __definition}, + {{__definition}, __statement} +}; +int num_of_par=sizeof(par)/sizeof(cmp_seq); + +void print_token(int type) +{ + std::string context=""; + switch(type) + { + case __stack_end: + context="#"; + break; + case __equal: + context="="; + break; + case __cmp_equal: + context="=="; + break; + case __cmp_not_equal: + context="!="; + break; + case __cmp_less: + context="<"; + break; + case __cmp_less_or_equal: + context="<="; + break; + case __cmp_more: + context=">"; + break; + case __cmp_more_or_equal: + context=">="; + break; + case __and_operator: + context="and"; + break; + case __or_operator: + context="or"; + break; + case __nor_operator: + context="!"; + break; + case __add_operator: + context="+"; + break; + case __sub_operator: + context="-"; + break; + case __mul_operator: + context="*"; + break; + case __div_operator: + context="/"; + break; + case __link_operator: + context="~"; + break; + case __add_equal: + context="+="; + break; + case __sub_equal: + context="-="; + break; + case __mul_equal: + context="*="; + break; + case __div_equal: + context="/="; + break; + case __link_equal: + context="~="; + break; + case __left_brace: + context="{"; + break; + case __right_brace: + context="}"; + break; + case __left_bracket: + context="["; + break; + case __right_bracket: + context="]"; + break; + case __left_curve: + context="("; + break; + case __right_curve: + context=")"; + break; + case __semi: + context=";"; + break; + case __comma: + context=","; + break; + case __colon: + context=":"; + break; + case __dot: + context="."; + break; + case __var: + context="var"; + break; + case __func: + context="func"; + break; + case __id: + context="id"; + break; + case __number: + context="number"; + break; + case __string: + context="string"; + break; + case __char: + context="char"; + break; + case __continue: + context="continue"; + break; + case __break: + context="break"; + break; + case __for: + context="for"; + break; + case __forindex: + context="forindex"; + break; + case __foreach: + context="foreach"; + break; + case __while: + context="while"; + break; + case __if: + context="if"; + break; + case __elsif: + context="elsif"; + break; + case __else: + context="else"; + break; + case __return: + context="return"; + break; + case __scalar: + context="scalar"; + break; + case __list: + context="list"; + break; + case __hash: + context="hash"; + break; + case __hash_member: + context="hash_member"; + break; + case __identifier: + context="identifier"; + break; + case __statement: + context="statement"; + break; + case __function: + context="function"; + break; + case __call_function: + context="call_func"; + break; + case __definition: + context="definition"; + break; + case __assignment: + context="assignment"; + break; + case __calculation: + context="calc"; + break; + case __loop: + context="loop"; + break; + case __choose: + context="choose"; + break; + case __program: + context="program"; + break; + } + std::cout< main_stack; + std::stack comp_stack; + public: + void set_stack_empty() + { + while(!main_stack.empty()) + main_stack.pop(); + while(!comp_stack.empty()) + comp_stack.pop(); + main_stack.push(__stack_end); + comp_stack.push(__stack_end); + comp_stack.push(__program); + } + void stack_input(std::stack& temp) + { + set_stack_empty(); + while(!temp.empty()) + { + main_stack.push(temp.top().type); + temp.pop(); + } + return; + } + void print_stack(std::stack& temp) + { + std::stack t; + while(!temp.empty()) + { + t.push(temp.top()); + print_token(t.top()); + std::cout<<" "; + temp.pop(); + } + while(!t.empty()) + { + temp.push(t.top()); + t.pop(); + } + return; + } + void print_main_and_comp() + { + std::cout<<">>[Main stack] "; + print_stack(main_stack); + std::cout<>[Comp stack] "; + print_stack(comp_stack); + std::cout< temp; + for(int i=0;i=0;--j) + { + if(!par[i].tokens[j]) + continue; + // if par[i].tokens[j] is 0 then continue until the tokens[j] is not 0 + temp.push(comp_stack.top()); + comp_stack.pop(); + if((par[i].tokens[j]!=temp.top()) || (comp_stack.empty() && par[i].tokens[j])) + { + while(!temp.empty()) + { + comp_stack.push(temp.top()); + temp.pop(); + } + break; + } + if((par[i].tokens[j]==temp.top()) && (j==0)) + { + comp_stack.push(par[i].res); + return true; + } + } + } + return false; + } + void main_comp_progress(bool show) + { + if(show) + print_main_and_comp(); + while(main_stack.top()!=__stack_end) + { + comp_stack.push(main_stack.top()); + main_stack.pop(); + if(show) + print_main_and_comp(); + while(1) + { + if(!reducable()) + break; + if(show) + print_main_and_comp(); + } + } + if(comp_stack.top()==__program) + { + comp_stack.pop(); + if(comp_stack.top()==__stack_end) + std::cout<<">>[Parse] 0 error(s)."<>[Parse] Error:"<>[Parse] Error:"<>[Parse] Complete checking."< parser; + public: + void parse_process(std::list& lexer,bool show) + { + while(!parser.empty()) + parser.pop(); + for(std::list::iterator i=lexer.begin();i!=lexer.end();++i) + { + parse_unit temp_parse; + temp_parse.line=(*i).line; + if(((*i).content=="var") || ((*i).content=="func") || ((*i).content=="return") || ((*i).content=="nil") || ((*i).content=="continue") || ((*i).content=="break") || ((*i).content=="and") || ((*i).content=="or")) + { + if((*i).content=="var") + temp_parse.type=__var; + else if((*i).content=="func") + temp_parse.type=__func; + else if((*i).content=="return") + temp_parse.type=__return; + else if((*i).content=="nil") + temp_parse.type=__number; + else if((*i).content=="continue") + temp_parse.type=__continue; + else if((*i).content=="break") + temp_parse.type=__break; + else if((*i).content=="and") + temp_parse.type=__and_operator; + else if((*i).content=="or") + temp_parse.type=__or_operator; + } + else if((*i).type==IDENTIFIER) + { + temp_parse.type=__id; + } + else if(((*i).content=="for") || ((*i).content=="foreach") || ((*i).content=="while") || ((*i).content=="forindex")) + { + if((*i).content=="for") + temp_parse.type=__for; + else if((*i).content=="forindex") + temp_parse.type=__forindex; + else if((*i).content=="foreach") + temp_parse.type=__foreach; + else if((*i).content=="while") + temp_parse.type=__while; + } + else if(((*i).content=="if") || ((*i).content=="else") || ((*i).content=="elsif")) + { + if((*i).content=="if") + temp_parse.type=__if; + else if((*i).content=="else") + temp_parse.type=__else; + else if((*i).content=="elsif") + temp_parse.type=__elsif; + } + else if(((*i).content=="==") || ((*i).content=="!=") || ((*i).content==">") || ((*i).content==">=") || ((*i).content=="<") || ((*i).content=="<=")) + { + if((*i).content=="==") + temp_parse.type=__cmp_equal; + else if((*i).content=="!=") + temp_parse.type=__cmp_not_equal; + else if((*i).content==">") + temp_parse.type=__cmp_more; + else if((*i).content==">=") + temp_parse.type=__cmp_more_or_equal; + else if((*i).content=="<") + temp_parse.type=__cmp_less; + else if((*i).content=="<=") + temp_parse.type=__cmp_less_or_equal; + } + else if(((*i).content==";") || ((*i).content==",") || ((*i).content=="=") || ((*i).content==":") || ((*i).content==".")) + { + if((*i).content==";") + temp_parse.type=__semi; + else if((*i).content==",") + temp_parse.type=__comma; + else if((*i).content=="=") + temp_parse.type=__equal; + else if((*i).content==":") + temp_parse.type=__colon; + else if((*i).content==".") + temp_parse.type=__dot; + } + else if(((*i).type==NUMBER) || ((*i).type==STRING) || ((*i).type==CHAR)) + { + if((*i).type==NUMBER) + temp_parse.type=__number; + else if((*i).type==STRING) + temp_parse.type=__string; + else if((*i).type==CHAR) + temp_parse.type=__char; + } + else if(((*i).content=="+") || ((*i).content=="-") || ((*i).content=="*") || ((*i).content=="/") || ((*i).content=="~") || ((*i).content=="!")) + { + if((*i).content=="+") + temp_parse.type=__add_operator; + else if((*i).content=="-") + temp_parse.type=__sub_operator; + else if((*i).content=="*") + temp_parse.type=__mul_operator; + else if((*i).content=="/") + temp_parse.type=__div_operator; + else if((*i).content=="~") + temp_parse.type=__link_operator; + else if((*i).content=="!") + temp_parse.type=__nor_operator; + } + else if(((*i).content=="+=") || ((*i).content=="-=") || ((*i).content=="*=") || ((*i).content=="/=") || ((*i).content=="~=")) + { + if((*i).content=="+=") + temp_parse.type=__add_equal; + else if((*i).content=="-=") + temp_parse.type=__sub_equal; + else if((*i).content=="*=") + temp_parse.type=__mul_equal; + else if((*i).content=="/=") + temp_parse.type=__div_equal; + else if((*i).content=="~=") + temp_parse.type=__link_equal; + } + else if(((*i).content=="(") || ((*i).content==")") || ((*i).content=="[") || ((*i).content=="]") || ((*i).content=="{") || ((*i).content=="}")) + { + char c=(*i).content[0]; + switch(c) + { + case '(': + temp_parse.type=__left_curve; + break; + case ')': + temp_parse.type=__right_curve; + break; + case '[': + temp_parse.type=__left_bracket; + break; + case ']': + temp_parse.type=__right_bracket; + break; + case '{': + temp_parse.type=__left_brace; + break; + case '}': + temp_parse.type=__right_brace; + break; + } + } + parser.push(temp_parse);//push this into stack + } + if(parser.empty()) + { + std::cout<<">>[Warning] Empty lexer."<>[Parse] Complete checking."< temp; + while(!parser.empty()) + { + temp.push(parser.top()); + parser.pop(); + } + PDA automata; + automata.stack_input(temp); + automata.main_comp_progress(show); + return; + } +}; + +#endif