From da8f45ccdb5280c0f3295900844ef33c93a35ad6 Mon Sep 17 00:00:00 2001 From: Valk Richard Li <48872266+ValKmjolnir@users.noreply.github.com> Date: Fri, 6 Sep 2019 22:49:49 +0800 Subject: [PATCH] Try ast but failed --- version0.15/ast.h | 82 ++++++++ version0.15/main.cpp | 68 +++++++ version0.15/nasal.bison | 269 ++++++++++++++++++++++++ version0.15/nasal_lexer.h | 408 +++++++++++++++++++++++++++++++++++++ version0.15/nasal_parser.h | 211 +++++++++++++++++++ version0.15/template.txt | 14 ++ version0.15/token_type.h | 96 +++++++++ 7 files changed, 1148 insertions(+) create mode 100644 version0.15/ast.h create mode 100644 version0.15/main.cpp create mode 100644 version0.15/nasal.bison create mode 100644 version0.15/nasal_lexer.h create mode 100644 version0.15/nasal_parser.h create mode 100644 version0.15/template.txt create mode 100644 version0.15/token_type.h diff --git a/version0.15/ast.h b/version0.15/ast.h new file mode 100644 index 0000000..98bd9b5 --- /dev/null +++ b/version0.15/ast.h @@ -0,0 +1,82 @@ +#ifndef __AST_H__ +#define __AST_H__ + +#include "token_type.h" +#include +#include +#include + +enum tree_node_type +{ + __syntax_tree_begin=128, + __error_syntax, + __null_statement, + __definition, + __assignment, + __loop, + __choose +}; + +struct node +{ + int type; + int line; + std::string content; + std::list children; +}; + +class abstract_syntax_tree +{ + private: + node tree; + std::stack parser_stack; + public: + abstract_syntax_tree() + { + tree.type=__syntax_tree_begin; + tree.line=0; + tree.content=""; + tree.children.clear(); + } + void init_parser_stack(std::stack& temp) + { + parser_stack=temp; + return; + } + void tree_generator(std::list& error_list) + { + while(!parser_stack.empty()) + { + int type=parser_stack.top().type; + node child_node; + child_node.line=parser_stack.top().line; + child_node.content=""; + child_node.children.clear(); + switch(type) + { + case __var:break; + case __id:break; + case __number:break; + case __string:break; + case __if:break; + case __for:break; + case __forindex:break; + case __foreach:break; + case __while:break; + case __semi:child_node.type=__null_statement;break; + default:child_node.type=__error_syntax;break; + } + if(child_node.type==__error_syntax) + error_list.push_back(parser_stack.top().line); + parser_stack.pop(); + } + return; + } + void run() + { + ; + return; + } +}; + +#endif diff --git a/version0.15/main.cpp b/version0.15/main.cpp new file mode 100644 index 0000000..59065aa --- /dev/null +++ b/version0.15/main.cpp @@ -0,0 +1,68 @@ +#include "nasal_lexer.h" +#include "nasal_parser.h" +#include "ast.h" +#include +#include + +int main() +{ + resource_programme_process prog; + nasal_lexer lex; + nasal_parser pas; + std::string command; + std::cout<<">> Nasal interpreter by ValKmjolnir"<> input [help] to find help."<> "; + std::getline(std::cin,command); + if(command=="help") + { + std::cout<<">> Nasal interpreter by ValKmjolnir"<> 1. [ ] |input file name to load the file."<> 2. [cls ] |clear the screen."<> 3. [exit ] |shut down the interpreter."<> 4. [lexer ] |run and show the lexer. (-lexer)"<> 5. [parser] |run parser. (-parser)"<> 6. [del ] |delete all elements in stack."<> 7. [run ] |run the programme in stack. (-lexer -parser)"<> 8. [rs ] |check the source program."<>[Delete] Complete."<' + |'>=' + |and + |or + ; + +calculation + :id operator id + |id operator scalar + |id operator call + |scalar operator id + |scalar operator scalar + |scalar operator call + |call operator id + |call operator scalar + |call operator call + |'('id')' + |'('scalar')' + |'('call')' + |'!' id + |'!' scalar + |'!' call + |'-' id + |'-' scalar + |'-' call + |'+' id + |'+' scalar + |'+' call + ; + +definition + :var id '=' id ';' + |var id '=' scalar ';' + |var id '=' call ';' + |var id '=' function + |var id '=' list ';' + |var id '=' hash ';' + ; + +assign_operator + :'+=' + |'-=' + |'*=' + |'/=' + |'~=' + |'=' + ; +pre_assignment + :id assign_operator id + |id assign_operator scalar + |id assign_operator call + |call assign_operator id + |call assign_operator scalar + |call assign_operator call + ; +assignment + :pre_assignment ';' + |id '=' function + |call '=' function + ; + +function + :func'{''}' + |func'{'statement'}' + |func'('')''{''}' + |func'('')''{'statement'}' + |func'('id')''{''}' + |func'('id')''{'statement'}' + |func'('scalar')''{''}' + |func'('scalar')''{'statement'}' + |func'('call')''{''}' + |func'('call')''{'statement'}' + |func'('list')''{''}' + |func'('list')''{'statement}' + |func'('hash')''{''}' + |func'('hash')''{'statement'}' + |func'('function')''{''}' + |func'('function')''{'statement'}' + |func'('dynamic_id')''{''}' + |func'('dynamic_id')''{'statement'}' + |func'('id_list')''{''}' + |func'('id_list')''{'statement'}' + ; + +choose + :if'('id')''{''}' + |if'('scalar')''{''}' + |if'('call')''{'statement'}' + |if'('id')''{'statement'}' + |if'('scalar')''{'statement'}' + |if'('call')''{'statement'}' + |else if'('id')''{''}' + |else if'('scalar')''{''}' + |else if'('call')''{'statement'}' + |else if'('id')''{'statement'}' + |else if'('scalar')''{'statement'}' + |else if'('call')''{'statement'}' + |elsif'('id')''{''}' + |elsif'('scalar')''{''}' + |elsif'('call')''{'statement'}' + |elsif'('id')''{'statement'}' + |elsif'('scalar')''{'statement'}' + |elsif'('call')''{'statement'}' + |else'{''}' + |else'{'statement'}' + ; + +loop + :while'('id')''{''}' + |while'('scalar')''{''}' + |while'('call')''{''}' + |while'('id')''{'statement'}' + |while'('scalar')''{'statement'}' + |while'('call')''{'statement'}' + |foreach'('statement id')''{''}' + |foreach'('statement scalar')''{''}' + |foreach'('statement call')''{''}' + |foreach'('statement list')''{''}' + |foreach'('statement id')''{'statement'}' + |foreach'('statement scalar')''{'statement'}' + |foreach'('statement call')''{'statement'}' + |foreach'('statement list')''{'statement'}' + |forindex'('statement id')''{''}' + |forindex'('statement scalar')''{''}' + |forindex'('statement call')''{''}' + |forindex'('statement list')''{''}' + |forindex'('statement id')''{'statement'}' + |forindex'('statement scalar')''{'statement'}' + |forindex'('statement call')''{'statement'}' + |forindex'('statement list')''{'statement'}' + |for'('statement pre_assignment')''{''}' + |for'('statement pre_assignment')''{'statement'}' + ; + +statement + :definition + |assignment + |loop + |choose + |return_scalar + |continue ';' + |break ';' + |call_function';' + |';' + ; diff --git a/version0.15/nasal_lexer.h b/version0.15/nasal_lexer.h new file mode 100644 index 0000000..40dd597 --- /dev/null +++ b/version0.15/nasal_lexer.h @@ -0,0 +1,408 @@ +#ifndef __NASAL_LEXER_H__ +#define __NASAL_LEXER_H__ + +#include +#include +#include +#include + +#define OPERATOR 1 // operator +#define IDENTIFIER 2 // id +#define NUMBER 3 // number +#define RESERVEWORD 4 // reserve word +#define STRING 5 // string +#define DYNAMIC_ID 6 // id... +#define FAIL -1 //fail +#define SCANEND -2 //complete scanning +#define ERRORFOUND -3 //error occurred + +std::string reserve_word[15]= +{ + "for","foreach","forindex","while", + "var","func","break","continue","return", + "if","else","elsif","nil","and","or" +}; + +int isReserveWord(std::string &p) +{ + for(int i=0;i<15;++i) + if(reserve_word[i]==p) + return i+1; + return FAIL; +} + +bool isLetter(char t) +{ + return (('a'<=t) && (t<='z') || ('A'<=t) && (t<='Z')); +} + +bool isNumber(char t) +{ + return (('0'<=t) && (t<='9')); +} + +bool isHex(char t) +{ + return ((('0'<=t) && (t<='9')) || (('a'<=t) && (t<='f'))); +} + +bool isOct(char t) +{ + return (('0'<=t) && (t<='7')); +} + +class resource_programme_process +{ + private: + char *resource; + public: + resource_programme_process() + { + resource=NULL; + resource=new char[16777216]; + } + ~resource_programme_process() + { + if(resource) + delete []resource; + } + char* use_file() + { + return resource; + } + void input_file(std::string& filename) + { + std::ifstream fin(filename); + if(fin.fail()) + { + std::cout<<">>[Error] Cannot load file: "< lexer; + public: + void scanner(int &syn,const char* source,std::string &__token,int &ptr,int &line) + { + char temp; + temp=source[ptr]; + while(temp==' ' || temp=='\n' || temp=='\t' || temp=='\r' || temp<0 || temp>127) + { + ++ptr; + if(temp=='\n') + ++line; + temp=source[ptr]; + } + __token=""; + if(isLetter(temp) || temp=='_') + { + __token+=temp; + ++ptr; + temp=source[ptr]; + while(isLetter(temp) || isNumber(temp) || temp=='_') + { + __token+=temp; + ++ptr; + temp=source[ptr]; + } + syn=isReserveWord(__token); + if(syn==FAIL) + syn=IDENTIFIER; + else + syn=RESERVEWORD; + if((syn==IDENTIFIER) && source[ptr]=='.' && source[ptr+1]=='.' && source[ptr+2]=='.') + { + __token+="..."; + syn=DYNAMIC_ID; + ptr+=3; + } + } + else if(isNumber(temp)) + { + if((source[ptr]=='0') && (source[ptr+1]=='x')) + { + __token+=source[ptr]; + __token+=source[ptr+1]; + ptr+=2; + temp=source[ptr]; + while(isNumber(temp) || isHex(temp)) + { + __token+=temp; + ++ptr; + temp=source[ptr]; + } + } + else if((source[ptr]=='0') && (source[ptr+1]=='o')) + { + __token+=source[ptr]; + __token+=source[ptr+1]; + ptr+=2; + temp=source[ptr]; + while(isNumber(temp) || isOct(temp)) + { + __token+=temp; + ++ptr; + temp=source[ptr]; + } + } + else + { + int PointCnt=0; + while(isNumber(temp)) + { + __token+=temp; + ++ptr; + temp=source[ptr]; + if(temp=='.' && !PointCnt) + { + ++PointCnt; + __token+=temp; + ++ptr; + temp=source[ptr]; + } + } + } + syn=NUMBER; + } + else if(temp=='(' || temp==')' || temp=='[' || temp==']' || temp=='{' || + temp=='}' || temp==',' || temp==';' || temp=='|' || temp==':' || + temp=='?' || temp=='.' || temp=='`' || temp=='&'|| + temp=='%' || temp=='$' || temp=='^') + { + __token+=temp; + ++ptr; + syn=OPERATOR; + } + else if(temp=='\'') + { + syn=STRING; + __token+=temp; + ++ptr; + temp=source[ptr]; + while(temp!='\'') + { + if(temp=='\\') + { + __token+=temp; + + ++ptr; + temp=source[ptr]; + __token+=temp; + + ++ptr; + temp=source[ptr]; + } + else + { + __token+=temp; + ++ptr; + temp=source[ptr]; + } + if(temp==0 || temp=='\n') + break; + } + //add the last char \" + if(temp=='\'') + { + __token+=temp; + ++ptr; + } + else + __token+=" __missing_end_of_string"; + } + else if(temp=='=' || temp=='+' || temp=='-' || temp=='*' || temp=='!' || temp=='/' || temp=='<' || temp=='>' || temp=='~') + { + syn=OPERATOR; + __token+=temp; + ++ptr; + temp=source[ptr]; + if(temp=='=') + { + __token+=temp; + ++ptr; + } + } + else if(temp=='\\') + { + syn=OPERATOR; + __token+=temp; + ++ptr; + temp=source[ptr]; + if(temp=='=' || temp=='n' || temp=='t' || temp=='r' || temp=='\\' || temp=='\'' || temp=='\"') + { + __token+=temp; + ++ptr; + } + } + else if(temp=='\"') + { + syn=STRING; + __token+=temp; + ++ptr; + temp=source[ptr]; + while(temp!='\"') + { + if(temp=='\\') + { + __token+=temp; + + ++ptr; + temp=source[ptr]; + __token+=temp; + + ++ptr; + temp=source[ptr]; + } + else + { + __token+=temp; + ++ptr; + temp=source[ptr]; + } + if(temp==0 || temp=='\n') + break; + } + //add the last char \" + if(temp=='\"') + { + __token+=temp; + ++ptr; + } + else + __token+=" __missing_end_of_string"; + } + else if(temp==0) + { + syn=SCANEND; + return; + } + else + { + syn=FAIL; + std::cout<<">>[Error] Unexpected error occurred: "<>[Error] Cannot identify "<>[Lexer] max size: "<0)//all Syn type is larger than zero + { + temp.line=line; + temp.type=syn; + temp.content=__token; + lexer.push_back(temp); + } + } + std::cout<<">>[Lexer] Complete scanning."<::iterator i=lexer.begin();i!=lexer.end();++i) + { + temp=*i; + std::cout<<"line "<& return_list() + { + return lexer; + } +}; + + + + +#endif diff --git a/version0.15/nasal_parser.h b/version0.15/nasal_parser.h new file mode 100644 index 0000000..a6dfd84 --- /dev/null +++ b/version0.15/nasal_parser.h @@ -0,0 +1,211 @@ +#ifndef __NASAL_PARSER_H__ +#define __NASAL_PARSER_H__ + +#include "ast.h" +#include +#include +#include + +class nasal_parser +{ + private: + std::stack parser; + abstract_syntax_tree ast; + public: + void print_parser_stack() + { + int line=0; + std::stack temp; + while(!parser.empty()) + { + temp.push(parser.top()); + if(line!=temp.top().line) + { + if(line+1==temp.top().line) + { + line=temp.top().line; + std::cout<& lexer) + { + while(!parser.empty()) + parser.pop(); + std::stack temp; + for(std::list::iterator i=lexer.begin();i!=lexer.end();++i) + { + parse_unit temp_parse; + temp_parse.line=i->line; + temp_parse.content=i->content; + if((*i).type==RESERVEWORD) + { + if((*i).content=="var") + temp_parse.type=__var; + else if((*i).content=="func") + temp_parse.type=__func; + else if((*i).content=="return") + temp_parse.type=__return; + else if((*i).content=="nil") + temp_parse.type=__number; + else if((*i).content=="continue") + temp_parse.type=__continue; + else if((*i).content=="break") + temp_parse.type=__break; + else if((*i).content=="and") + temp_parse.type=__and_operator; + else if((*i).content=="or") + temp_parse.type=__or_operator; + else if((*i).content=="for") + temp_parse.type=__for; + else if((*i).content=="forindex") + temp_parse.type=__forindex; + else if((*i).content=="foreach") + temp_parse.type=__foreach; + else if((*i).content=="while") + temp_parse.type=__while; + else if((*i).content=="if") + temp_parse.type=__if; + else if((*i).content=="else") + temp_parse.type=__else; + else if((*i).content=="elsif") + temp_parse.type=__elsif; + } + else if(((*i).content=="==") || ((*i).content=="!=") || ((*i).content==">") || ((*i).content==">=") || ((*i).content=="<") || ((*i).content=="<=")) + { + if((*i).content=="==") + temp_parse.type=__cmp_equal; + else if((*i).content=="!=") + temp_parse.type=__cmp_not_equal; + else if((*i).content==">") + temp_parse.type=__cmp_more; + else if((*i).content==">=") + temp_parse.type=__cmp_more_or_equal; + else if((*i).content=="<") + temp_parse.type=__cmp_less; + else if((*i).content=="<=") + temp_parse.type=__cmp_less_or_equal; + } + else if(((*i).content==";") || ((*i).content==",") || ((*i).content=="=") || ((*i).content==":") || ((*i).content==".")) + { + char c=(*i).content[0]; + switch(c) + { + case ';':temp_parse.type=__semi;break; + case ',':temp_parse.type=__comma;break; + case '=':temp_parse.type=__equal;break; + case ':':temp_parse.type=__colon;break; + case '.':temp_parse.type=__dot;break; + } + } + else if(((*i).type==NUMBER) || ((*i).type==STRING) || ((*i).type==IDENTIFIER) || ((*i).type==DYNAMIC_ID)) + { + int t=(*i).type; + switch(t) + { + case NUMBER:temp_parse.type=__number;break; + case STRING:temp_parse.type=__string;break; + case IDENTIFIER:temp_parse.type=__id;break; + case DYNAMIC_ID:temp_parse.type=__dynamic_id;break; + } + } + else if(((*i).content=="+") || ((*i).content=="-") || ((*i).content=="*") || ((*i).content=="/") || ((*i).content=="~") || ((*i).content=="!")) + { + char c=(*i).content[0]; + switch(c) + { + case '+':temp_parse.type=__add_operator;break; + case '-':temp_parse.type=__sub_operator;break; + case '*':temp_parse.type=__mul_operator;break; + case '/':temp_parse.type=__div_operator;break; + case '~':temp_parse.type=__link_operator;break; + case '!':temp_parse.type=__nor_operator;break; + } + } + else if(((*i).content=="+=") || ((*i).content=="-=") || ((*i).content=="*=") || ((*i).content=="/=") || ((*i).content=="~=")) + { + char c=(*i).content[0]; + switch(c) + { + case '+':temp_parse.type=__add_equal;break; + case '-':temp_parse.type=__sub_equal;break; + case '*':temp_parse.type=__mul_equal;break; + case '/':temp_parse.type=__div_equal;break; + case '~':temp_parse.type=__link_equal;break; + } + } + else if(((*i).content=="(") || ((*i).content==")") || ((*i).content=="[") || ((*i).content=="]") || ((*i).content=="{") || ((*i).content=="}")) + { + char c=(*i).content[0]; + switch(c) + { + case '(':temp_parse.type=__left_curve;break; + case ')':temp_parse.type=__right_curve;break; + case '[':temp_parse.type=__left_bracket;break; + case ']':temp_parse.type=__right_bracket;break; + case '{':temp_parse.type=__left_brace;break; + case '}':temp_parse.type=__right_brace;break; + } + } + temp.push(temp_parse);//push this into stack + } + if(temp.empty()) + { + std::cout<<">>[Warning] Empty lexer."<>[Parse] Parser exited."< error_list; + error_list.clear(); + ast.init_parser_stack(parser); + ast.tree_generator(error_list); + if(error_list.empty()) + { + std::cout<<">>[Parse] 0 error(s)."<>[Parse] Complete checking."<::iterator i=error_list.begin();i!=error_list.end();++i) + if(line!=*i) + { + line=*i; + std::cout<<">>[Parse] parse error in line "<>[Parse] Error occurred, stop."< + +enum token_type +{ + __stack_end=1, + __equal,// = + __cmp_equal,__cmp_not_equal,// == != + __cmp_less,__cmp_less_or_equal,// < <= + __cmp_more,__cmp_more_or_equal,// > >= + __and_operator,__or_operator,__nor_operator,// and or ! + __add_operator,__sub_operator,// + - + __mul_operator,__div_operator,__link_operator,// * / ~ + __add_equal,__sub_equal,// += -= + __mul_equal,__div_equal,__link_equal,// *= /= ~= + __left_brace,__right_brace,// {} + __left_bracket,__right_bracket,// [] + __left_curve,__right_curve,// () + __semi,__comma,__colon,__dot,// ; , : . + __var,__func,__return, + __if,__elsif,__else, + __id,__dynamic_id, + __continue,__break, + __for,__forindex,__foreach,__while, + //end of operators & reserve words + __number,__string, +}; + +void print_token(int type) +{ + std::string context=""; + switch(type) + { + case __stack_end: context="#";break; + case __equal: context="=";break; + case __cmp_equal: context="==";break; + case __cmp_not_equal: context="!=";break; + case __cmp_less: context="<";break; + case __cmp_less_or_equal: context="<=";break; + case __cmp_more: context=">";break; + case __cmp_more_or_equal: context=">=";break; + case __and_operator: context="and";break; + case __or_operator: context="or";break; + case __nor_operator: context="!";break; + case __add_operator: context="+";break; + case __sub_operator: context="-";break; + case __mul_operator: context="*";break; + case __div_operator: context="/";break; + case __link_operator: context="~";break; + case __add_equal: context="+=";break; + case __sub_equal: context="-=";break; + case __mul_equal: context="*=";break; + case __div_equal: context="/=";break; + case __link_equal: context="~=";break; + case __left_brace: context="{";break; + case __right_brace: context="}";break; + case __left_bracket: context="[";break; + case __right_bracket: context="]";break; + case __left_curve: context="(";break; + case __right_curve: context=")";break; + case __semi: context=";";break; + case __comma: context=",";break; + case __colon: context=":";break; + case __dot: context=".";break; + case __var: context="var";break; + case __func: context="func";break; + case __id: context="id";break; + case __dynamic_id: context="id...";break; + case __number: context="number";break; + case __string: context="string";break; + case __continue: context="continue";break; + case __break: context="break";break; + case __for: context="for";break; + case __forindex: context="forindex";break; + case __foreach: context="foreach";break; + case __while: context="while";break; + case __if: context="if";break; + case __elsif: context="elsif";break; + case __else: context="else";break; + case __return: context="return";break; + default: context="unknown_token";break; + } + std::cout<