diff --git a/version0.7/ebnf.cpp b/version0.7/ebnf.cpp new file mode 100644 index 0000000..394d7cd --- /dev/null +++ b/version0.7/ebnf.cpp @@ -0,0 +1,53 @@ +LR(1) + ::= + | + <=> ||| + ::= + |||| <,> ||||||| + ::= + + <[> || <]> + <[> || <:> || <]> + <.> + <(><)> + <(> ||||||| <)> + ::= + <,> | + ::= + <[><]> + <[> ||| <]> + ::= + <{><}> + <{> | <}> + ::= + <:> |||| + ::= + <,> | + ::= + <=> ||| <;> + <=> + <(> <)> <=> <(> <)> <;> + <(> <)> <=> ||| <;> + <(> <)> <=> <(> <)> <;> + <(> <)> <=> ||| <;> + ::= + ||| <;> + <(> <)> <(> <)> <;> + <(> <)> ||| <;> + ::= + <{><}> + <{> <}> + <(><)> <{><}> + <(><)> <{> <}> + <(> | <)> <{><}> + <(> | <)> <{> <}> + ::= + || <+>|<->|<*>||<~>|<<>|<<=>|<>>|<>=>|<==>||| || + <(> || <)> <+>|<->|<*>||<~>|<<>|<<=>|<>>|<>=>|<==>||| || + || <+>|<->|<*>||<~>|<<>|<<=>|<>>|<>=>|<==>||| <(> || <)> + <(> || <)> <+>|<->|<*>||<~>|<<>|<<=>|<>>|<>=>|<==>||| <(> || <)> + + + + + diff --git a/version0.7/ll(1).txt b/version0.7/ll(1).txt new file mode 100644 index 0000000..7fb9467 --- /dev/null +++ b/version0.7/ll(1).txt @@ -0,0 +1,116 @@ +LL(1) for nasal + +number -> NUMBER +id -> IDENTIFIER +string -> STRING + +scalar -> number +scalar -> id +scalar -> identifier +scalar -> calculation +scalars -> scalar scalar_end +scalar_end -> , scalar scalar_end +scalar_end -> $ + +lacked_identifier -> id = scalar +lacked_identifier -> id = string +lacked_identifier -> id = function +identifier -> id [ scalar ] +identifier -> id . identifier +identifier -> id [ scalar : scalar ] +identifier -> id [ scalar : ] +identifier -> id ( ) +identifier -> id ( scalar ) +identifier -> id ( scalars ) +identifiers -> identifier identifier_end +identifier_end -> , identifier identifier_end +identifier_end -> $ + +definition -> var id = scalar ; +definition -> var id = string ; +definition -> var id = function ; + +pre_assignment -> id =|+=|-=|*=|\=|~= scalar +pre_assignment -> id =|+=|-=|*=|\=|~= string +pre_assignment -> identifier =|+=|-=|*=|\=|~= scalar +pre_assignment -> identifier =|+=|-=|*=|\=|~= string +assignment -> pre_assignment ; + +parameter -> identifier +parameter -> lacked_identifier +parameters -> parameter parameter_end +parameter_end -> ,identifier parameter_end +parameter_end -> ,lacked_identifier parameter_end +parameter_end -> $ +function -> func { } +function -> func { statement } +function -> func { statements } +function -> func ( ) { } +function -> func ( ) { statement } +function -> func ( ) { statements } +function -> func ( parameter ) { } +function -> func ( parameter ) { statement } +function -> func ( parameter ) { statements } +function -> func ( parameters ) { } +function -> func ( parameters ) { statement } +function -> func ( parameters ) { statements } + +calculation -> scalar + scalar +calculation -> scalar - scalar +calculation -> scalar * scalar +calculation -> scalar \ scalar +calculation -> scalar ~ scalar +calculation -> scalar == scalar +calculation -> scalar != scalar +calculation -> scalar > scalar +calculation -> scalar >= scalar +calculation -> scalar < scalar +calculation -> scalar <= scalar +calculation -> scalar and scalar +calculation -> scalar or scalar +calculation -> ! scalar +calculation -> scalar ? scalar : scalar +calculation -> (calculation) +calculation -> (scalar) + +loop -> for ( definition calculation ; pre_assignment) { } +loop -> for ( definition calculation ; pre_assignment) statement +loop -> for ( definition calculation ; pre_assignment) { statement } +loop -> for ( definition calculation ; pre_assignment) { statements } +loop -> forindex ( id ; id|identifier) { } +loop -> forindex ( id ; id|identifier) statement +loop -> forindex ( id ; id|identifier) { statement } +loop -> forindex ( id ; id|identifier) { statements } +loop -> foreach ( id ; id|identifier) { } +loop -> foreach ( id ; id|identifier) statement +loop -> foreach ( id ; id|identifier) { statement } +loop -> foreach ( id ; id|identifier) { statements } +loop -> while ( scalar ) { } +loop -> while ( scalar ) statement +loop -> while ( scalar ) { statement } +loop -> while ( scalar ) { statements } + +if_choose -> if ( scalar ) { } +if_choose -> if ( scalar ) statement +if_choose -> if ( scalar ) { statement } +if_choose -> if ( scalar ) { statements } +else_if_choose -> else if { } +else_if_choose -> else if statement +else_if_choose -> else if { statement } +else_if_choose -> else if { statements } +else_if_choose -> elsif { } +else_if_choose -> elsif statement +else_if_choose -> elsif { statement } +else_if_choose -> elsif { statements } +else_choose -> else { } +else_choose -> else statement +else_choose -> else { statement } +else_choose -> else { statements } + +mul_else_if -> else_if_choose mul_else_if +mul_else_if -> $ + +choose -> if_choose +choose -> if_choose mul_else_if +choose -> if_choose else_choose +choose -> if_choose mul_else_if else_choose diff --git a/version0.7/n.cpp b/version0.7/n.cpp new file mode 100644 index 0000000..c6fa10c --- /dev/null +++ b/version0.7/n.cpp @@ -0,0 +1,515 @@ +#include "nasal_lexer.h" +#include + +enum token_type +{ + __stack_end, + __equal,// = + __cmp_equal,// == + __cmp_not_equal,// != + __cmp_less,__cmp_less_or_equal,// < <= + __cmp_more,__cmp_more_or_equal,// > >= + __and_operator,__or_operator,__nor_operator,// and or ! + __add_operator,__sub_operator,__mul_operator,__div_operator,__link_operator,// + - * / ~ + __add_equal,__sub_equal,__mul_equal,__div_equal,__link_equal,// += -= *= /= ~= + __left_brace,__right_brace,// {} + __left_bracket,__right_bracket,// [] + __left_curve,__right_curve,// () + __semi,// ; + __comma,// , + __colon,// : + __dot,// . + __var,// var reserve word + __func,// func reserve word + __unknown_type_id,__identifier,__identifiers, + __scalar,__scalars,__list,__hash, + __hash_member,__hash_members, + __statement,__statements, + __function,//function(){} + __definition,__assignment,__calculation, + __loop,__continue,__break,__for,__forindex,__foreach,__while,// for()while() continue; break; + __choose,__if,__elsif,__else,// if else if else + __return +}; + +struct token_seq +{ + int tokens[15]; + int res; +}par[13]= +{ + {{__var,__identifier,__equal,__scalar,__semi}, __definition}, + {{__var,__identifier,__equal,__identifier,__semi}, __definition}, + {{__var,__identifier,__equal,__list,__semi}, __definition}, + {{__var,__identifier,__equal,__hash,__semi}, __definition}, + {{__var,__identifier,__equal,__function}, __definition}, + {{__var,__left_curve,__identifiers,__right_curve,__equal,__identifier,__semi},__definition}, + {{__var,__left_curve,__identifiers,__right_curve,__equal,__list,__semi}, __definition}, + {{__identifier,__dot,__identifier}, __identifier}, + {{__identifier,__left_bracket,__scalar,__right_bracket}, __identifier}, + {{__identifier,__left_bracket,__identifier,__right_bracket}, __identifier}, + {{__identifier,__left_bracket,__calculation,__right_bracket}, __identifier}, + {{__identifier,__left_curve,__right_curve}, __identifier}, + {{__identifier,__left_curve,__scalar,__right_curve}, __identifier} +}; + +struct par_info +{ + int res; + int len; +}; +par_info isPar(int *t) +{ + par_info temp; + temp.len=0; + temp.res=0; + for(int i=0;i<13;++i) + { + int cnt=0; + for(int j=0;j<15;++j) + { + if(par[i].tokens[j]) + ++cnt; + else + break; + } + for(int j=0;j parser; + public: + void parse_quiet_process(std::list& lexer) + { + while(!parser.empty()) + parser.pop(); + for(int i=0;i<15;++i) + { + parse_unit t; + t.line=0; + t.type=__stack_end; + parser.push(t); + } + for(std::list::iterator i=lexer.begin();i!=lexer.end();++i) + { + parse_unit temp_parse; + temp_parse.line=(*i).line; + if(((*i).content=="var") || ((*i).content=="func") || ((*i).content=="return") || ((*i).content=="nil") || ((*i).content=="continue") || ((*i).content=="break") || ((*i).content=="and") || ((*i).content=="or")) + { + if((*i).content=="var") + temp_parse.type=__var; + else if((*i).content=="func") + temp_parse.type=__func; + else if((*i).content=="return") + temp_parse.type=__return; + else if((*i).content=="nil") + temp_parse.type=__scalar; + else if((*i).content=="continue") + temp_parse.type=__continue; + else if((*i).content=="break") + temp_parse.type=__break; + else if((*i).content=="and") + temp_parse.type=__and_operator; + else if((*i).content=="or") + temp_parse.type=__or_operator; + } + else if((*i).type==IDENTIFIER) + { + temp_parse.type=__identifier; + } + else if(((*i).content=="for") || ((*i).content=="foreach") || ((*i).content=="while") || ((*i).content=="forindex")) + { + if((*i).content=="for") + temp_parse.type=__for; + else if((*i).content=="forindex") + temp_parse.type=__forindex; + else if((*i).content=="foreach") + temp_parse.type=__foreach; + else if((*i).content=="while") + temp_parse.type=__while; + } + else if(((*i).content=="if") || ((*i).content=="else") || ((*i).content=="elsif")) + { + if((*i).content=="if") + temp_parse.type=__if; + else if((*i).content=="else") + temp_parse.type=__else; + else if((*i).content=="elsif") + temp_parse.type=__elsif; + } + else if(((*i).content=="==") || ((*i).content=="!=") || ((*i).content==">") || ((*i).content==">=") || ((*i).content=="<") || ((*i).content=="<=")) + { + if((*i).content=="==") + temp_parse.type=__cmp_equal; + else if((*i).content=="!=") + temp_parse.type=__cmp_not_equal; + else if((*i).content==">") + temp_parse.type=__cmp_more; + else if((*i).content==">=") + temp_parse.type=__cmp_more_or_equal; + else if((*i).content=="<") + temp_parse.type=__cmp_less; + else if((*i).content=="<=") + temp_parse.type=__cmp_less_or_equal; + } + else if(((*i).content==";") || ((*i).content==",") || ((*i).content=="=") || ((*i).content==":") || ((*i).content==".")) + { + if((*i).content==";") + temp_parse.type=__semi; + else if((*i).content==",") + temp_parse.type=__comma; + else if((*i).content=="=") + temp_parse.type=__equal; + else if((*i).content==":") + temp_parse.type=__colon; + else if((*i).content==".") + temp_parse.type=__dot; + } + else if(((*i).type==NUMBER) || ((*i).type==STRING)) + { + temp_parse.type=__scalar; + } + else if(((*i).content=="+") || ((*i).content=="-") || ((*i).content=="*") || ((*i).content=="/") || ((*i).content=="~") || ((*i).content=="!")) + { + if((*i).content=="+") + temp_parse.type=__add_operator; + else if((*i).content=="-") + temp_parse.type=__sub_operator; + else if((*i).content=="*") + temp_parse.type=__mul_operator; + else if((*i).content=="/") + temp_parse.type=__div_operator; + else if((*i).content=="~") + temp_parse.type=__link_operator; + else if((*i).content=="!") + temp_parse.type=__nor_operator; + } + else if(((*i).content=="+=") || ((*i).content=="-=") || ((*i).content=="*=") || ((*i).content=="/=") || ((*i).content=="~=")) + { + if((*i).content=="+=") + temp_parse.type=__add_equal; + else if((*i).content=="-=") + temp_parse.type=__sub_equal; + else if((*i).content=="*=") + temp_parse.type=__mul_equal; + else if((*i).content=="/=") + temp_parse.type=__div_equal; + else if((*i).content=="~=") + temp_parse.type=__link_equal; + } + else if(((*i).content=="(") || ((*i).content==")") || ((*i).content=="[") || ((*i).content=="]") || ((*i).content=="{") || ((*i).content=="}")) + { + char c=(*i).content[0]; + switch(c) + { + case '(': + temp_parse.type=__left_curve; + break; + case ')': + temp_parse.type=__right_curve; + break; + case '[': + temp_parse.type=__left_bracket; + break; + case ']': + temp_parse.type=__right_bracket; + break; + case '{': + temp_parse.type=__left_brace; + break; + case '}': + temp_parse.type=__right_brace; + break; + } + } + parser.push(temp_parse);//push this into stack + } + while(!parser.empty()) + { + int tbl[15]={0}; + std::stack temp; + for(int i=0;i<15;++i) + { + if(!parser.empty()) + { + temp.push(parser.top()); + parser.pop(); + } + else + break; + } + for(int i=0;i<15;++i) + { + if(!temp.empty()) + { + tbl[i]=temp.top().type; + parser.push(temp.top()); + temp.pop(); + } + else + break; + } + for(int i=0;i<15;++i) + { + if(tbl[i]) + break; + if(!tbl[i] && i==14) + return; + } + par_info tk=isPar(tbl); + if(tk.res) + { + parse_unit temp; + temp.line=parser.top().line; + temp.type=tk.res; + for(int i=0;i>[Parser] Complete scanning."< temp_stack; + while(!parser.empty()) + { + if((parser.top().type!=__stack_end) && (parser.top().type!=__statement) && (parser.top().type!=__statements) && (parser.top().type!=__function) && (parser.top().type!=__definition) && (parser.top().type!=__assignment) && (parser.top().type!=__loop) && (parser.top().type!=__choose)) + temp_stack.push(parser.top()); + parser.pop(); + } + if(!temp_stack.empty()) + { + std::cout<<">>[Error] Parse error."<>[Parser] No error occurred."< +#include +#include +#include + +#define OPERATOR 1 //界符 or 运算符 +#define IDENTIFIER 2 //自定义标识符 +#define NUMBER 3 //数字 +#define RESERVEWORD 4 //关键字 +#define STRING 5 //字符串类型 +#define FAIL -1 //失败 +#define SCANEND -2 //扫描完成 +#define ERRORFOUND -3 //异常错误 + +std::string reserve_word[15]= +{ + "for","foreach","forindex","while", + "var","func","break","continue","return", + "if","else","elsif","nil","and","or" +}; + +int isReserveWord(std::string &p) +{ + for(int i=0;i<15;++i) + if(reserve_word[i]==p) + return i+1; + return FAIL; +} + +bool isLetter(char t) +{ + return (('a'<=t) && (t<='z') || ('A'<=t) && (t<='Z')); +} + +bool isNumber(char t) +{ + return (('0'<=t) && (t<='9')); +} + +bool isHex(char t) +{ + return ((('0'<=t) && (t<='9')) || (('a'<=t) && (t<='f'))); +} + +bool isOct(char t) +{ + return (('0'<=t) && (t<='7')); +} + +class resource_programme_process +{ + private: + char *resource; + public: + resource_programme_process() + { + resource=NULL; + resource=new char[16777216]; + } + ~resource_programme_process() + { + if(resource) + delete []resource; + } + char* use_file() + { + return resource; + } + void input_file(std::string& filename) + { + std::ifstream fin(filename); + if(fin.fail()) + { + std::cout<<">> [Error] Cannot load file: "< lexer; + public: + void scanner(int &syn,const char* source,std::string &__token,int &ptr,int &line) + { + char temp; + temp=source[ptr]; + while(temp==' ' || temp=='\n' || temp=='\t' || temp=='\r' || temp<0 || temp>127) + { + ++ptr; + if(temp=='\n') + ++line; + temp=source[ptr]; + } + __token=""; + if(isLetter(temp) || temp=='_') + { + __token+=temp; + ++ptr; + temp=source[ptr]; + while(isLetter(temp) || isNumber(temp) || temp=='_') + { + __token+=temp; + ++ptr; + temp=source[ptr]; + } + syn=isReserveWord(__token); + if(syn==FAIL) + syn=IDENTIFIER; + else + syn=RESERVEWORD; + } + else if(isNumber(temp)) + { + if((source[ptr]=='0') && (source[ptr+1]=='x')) + { + __token+=source[ptr]; + __token+=source[ptr+1]; + ptr+=2; + temp=source[ptr]; + while(isNumber(temp) || isHex(temp)) + { + __token+=temp; + ++ptr; + temp=source[ptr]; + } + } + else if((source[ptr]=='0') && (source[ptr+1]=='o')) + { + __token+=source[ptr]; + __token+=source[ptr+1]; + ptr+=2; + temp=source[ptr]; + while(isNumber(temp) || isOct(temp)) + { + __token+=temp; + ++ptr; + temp=source[ptr]; + } + } + else + { + int PointCnt=0; + while(isNumber(temp)) + { + __token+=temp; + ++ptr; + temp=source[ptr]; + if(temp=='.' && !PointCnt) + { + ++PointCnt; + __token+=temp; + ++ptr; + temp=source[ptr]; + } + } + } + syn=NUMBER; + } + else if(temp=='(' || temp==')' || temp=='[' || temp==']' || temp=='{' || + temp=='}' || temp==',' || temp==';' || temp=='|' || temp==':' || + temp=='?' || temp=='.' || temp=='`' || temp=='\'' || temp=='&'|| + temp=='%' || temp=='$' || temp=='^') + { + __token+=temp; + ++ptr; + syn=OPERATOR; + } + else if(temp=='=' || temp=='+' || temp=='-' || temp=='*' || temp=='!' || temp=='/' || temp=='<' || temp=='>' || temp=='~') + { + syn=OPERATOR; + __token+=temp; + ++ptr; + temp=source[ptr]; + if(temp=='=') + { + __token+=temp; + ++ptr; + } + } + else if(temp=='\\') + { + syn=OPERATOR; + __token+=temp; + ++ptr; + temp=source[ptr]; + if(temp=='=' || temp=='n' || temp=='t' || temp=='r' || temp=='\\' || temp=='\'' || temp=='\"') + { + __token+=temp; + ++ptr; + } + } + else if(temp=='\"') + { + syn=STRING; + __token+=temp; + ++ptr; + temp=source[ptr]; + while(temp!='\"') + { + if(temp=='\\') + { + __token+=temp; + + ++ptr; + temp=source[ptr]; + __token+=temp; + + ++ptr; + temp=source[ptr]; + } + else + { + __token+=temp; + ++ptr; + temp=source[ptr]; + } + if(temp==0 || temp=='\n') + break; + } + //add the last char \" + if(temp=='\"') + { + __token+=temp; + ++ptr; + } + else + __token+=" __missing_end_of_string"; + } + else if(temp==0) + { + syn=SCANEND; + return; + } + else + { + syn=FAIL; + std::cout<<">>[Error] Unexpected error occurred: "<>[Error] Cannot identify "<>[Lexer] max size: "<0)//all Syn type is larger than zero + { + temp.line=line; + temp.type=syn; + temp.content=__token; + lexer.push_back(temp); + } + } + std::cout<<">>[Lexer] Complete scanning."<::iterator i=lexer.begin();i!=lexer.end();++i) + { + temp=*i; + std::cout<<"line "<& return_list() + { + return lexer; + } +}; + + + + +#endif diff --git a/version0.7/pda.h b/version0.7/pda.h new file mode 100644 index 0000000..d4f627a --- /dev/null +++ b/version0.7/pda.h @@ -0,0 +1,74 @@ +#ifndef __PDA_H__ +#define __PDA_H__ + +#include + +struct parse_unit +{ + int line; + int type; +}; + +enum token_type +{ + __stack_end, + __equal,// = + __cmp_equal,// == + __cmp_not_equal,// != + __cmp_less,__cmp_less_or_equal,// < <= + __cmp_more,__cmp_more_or_equal,// > >= + __and_operator,__or_operator,__nor_operator,// and or ! + __add_operator,__sub_operator,__mul_operator,__div_operator,__link_operator,// + - * / ~ + __add_equal,__sub_equal,__mul_equal,__div_equal,__link_equal,// += -= *= /= ~= + __left_brace,__right_brace,// {} + __left_bracket,__right_bracket,// [] + __left_curve,__right_curve,// () + __semi,// ; + __comma,// , + __colon,// : + __dot,// . + __var,// var reserve word + __func,// func reserve word + __unknown_type_id,__identifier,__identifiers, + __scalar,__scalars,__list,__hash, + __hash_member,__hash_members, + __statement,__statements, + __function,//function(){} + __definition,__assignment,__calculation, + __loop,__continue,__break,__for,__forindex,__foreach,__while,// for()while() continue; break; + __choose,__if,__elsif,__else,// if else if else + __return +}; + +struct cmp_seq +{ + int tokens[20]; + int res; +}par[]= +{ + {{__var,__identifier,__equal,__scalar,__semi}, __definition}, + {{__var,__identifier,__equal,__identifier,__semi}, __definition}, + {{__var,__identifier,__equal,__list,__semi}, __definition}, + {{__var,__identifier,__equal,__hash,__semi}, __definition}, + {{__var,__identifier,__equal,__function}, __definition}, + {{__var,__left_curve,__identifiers,__right_curve,__equal,__identifier,__semi},__definition}, + {{__var,__left_curve,__identifiers,__right_curve,__equal,__list,__semi}, __definition}, + {{__identifier,__dot,__identifier}, __identifier}, + {{__identifier,__left_bracket,__scalar,__right_bracket}, __identifier}, + {{__identifier,__left_bracket,__identifier,__right_bracket}, __identifier}, + {{__identifier,__left_bracket,__calculation,__right_bracket}, __identifier}, + {{__identifier,__left_curve,__right_curve}, __identifier}, + {{__identifier,__left_curve,__scalar,__right_curve}, __identifier} +}; + +int num_of_par=sizeof(par)/sizeof(cmp_seq); + +class pda +{ + private: + std::stack main_stack; + std::stack cmp_stack; + std::stack recog_stack; + public: + +};