This commit is contained in:
Valk Richard Li 2020-05-26 02:59:45 -07:00 committed by GitHub
parent 54c855e17e
commit c1803a455d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 235 additions and 234 deletions

View File

@ -4,9 +4,10 @@
#include <iostream>
#include <fstream>
#include <cstring>
#include <cstdlib>
#include <ctime>
#include <cmath>
// if thread is used, don't forget to add -std=c++11 or higher standard before executing
/* if thread is used, don't forget to add -std=c++11 or higher standard before executing */
// #include <thread>
#include <list>
#include <stack>
@ -20,6 +21,7 @@
including a function that print the hex format number of an integer
*/
#include "nasal_misc.h"
/*
nasal_enum.h
including enums of: lexer token type,parse generated type,scalar type
@ -29,12 +31,14 @@
scalar type is used in nasal_runtime and nasal_gc
*/
#include "nasal_enum.h"
/*
nasal_ast.h
including a class named abstract_syntax_tree
this class is frequently used in nasal_parse nasal_runtime
*/
#include "nasal_ast.h"
/*
nasal_lexer.h
including a class named resource_file
@ -45,6 +49,7 @@
including a function named is_reserve_word, checking if an identifier is a reserve word
*/
#include "nasal_lexer.h"
/*
nasal_parse.h
including a class named nasal_parse
@ -53,6 +58,7 @@
if parse errors occur,this enum will be into use
*/
#include "nasal_parse.h"
/*
nasal_gc.h(garbage collector and memory manager of nasal_runtime)
including basic classed named: nasal_number, nasal_string, nasal_vector, nasal_hash, nasal_function
@ -61,6 +67,14 @@
nasal_gc is an object of class gc_manager,and nasal_runtime uses this object as it's memory manager
*/
#include "nasal_gc.h"
/*
nasal_builtinfunc.h
including built-in functions of nasal lib
all functions in this .cpp will be used in nasal_runtime::builtin_function()
*/
#include "nasal_builtinfunc.h"
/*
nasal_runtime.h
including a class named nasal_runtime

View File

@ -2,7 +2,38 @@
#define __NASAL_ENUM_H__
// lexer token type is only used in nasal_lexer
// each scanned token will be recognized as one of these below
/*
__token_reserve_word:
for,foreach,forindex,while : loop head
var,func : definition
break,continue : in loop
return : in function
if,else,elsif : conditional expr
and,or : calculation
nil : special type
__token_identifier:
must begin with '_' or 'a'~'z' or 'A'~'Z'
can include '_' or 'a'~'z' or 'A'~'Z' or '0'~'9'
__token_string:
example:
"string"
'string'
if a string does not end with " or ' then lexer will throw an error
__token_number:
example:
2147483647 (integer)
2.71828 (float)
0xdeadbeef (hex) or 0xDEADBEEF (hex)
0o170001 (oct)
1e-1234 (dec) or 10E2 (dec)
__token_operator:
! + - * / ~
= += -= *= /= ~=
== != > >= < <=
('and' 'or' are operators too but they are recognized as operator in generate_detail_token())
() [] {} ; , . : ?
others: __unknown_operator
*/
enum lexer_token_type
{
__token_reserve_word=1,
@ -29,40 +60,39 @@ enum parse_gen_type
{
/*
stack end is an important flag for parse token stack to
check if it's stack is at end
check if it's stack is at end/empty
if stack is empty,the parser will get a wrong memory space and cause SIGSEGV
*/
__stack_end=1,
// operators == != < <= > >=
__cmp_equal,
__cmp_not_equal,
__cmp_less,__cmp_less_or_equal,
__cmp_more,__cmp_more_or_equal,
// operators and or ! + - * / ~
__cmp_more,__cmp_more_or_equal, // operators == != < <= > >=
__and_operator, __or_operator,__nor_operator,
__add_operator,__sub_operator,
__mul_operator,__div_operator,__link_operator,
// operators = += -= *= /= ~=
__mul_operator,__div_operator,__link_operator, // operators and or ! + - * / ~
__equal,
__add_equal,__sub_equal,
__mul_equal,__div_equal,__link_equal,
// operators {} [] () ; , : . ?
__mul_equal,__div_equal,__link_equal, // operators = += -= *= /= ~=
__left_brace,__right_brace, // {}
__left_bracket,__right_bracket, // []
__left_curve,__right_curve, // ()
__semi,__comma,__colon,__dot,__ques_mark, // ; , : . ?
__unknown_operator,
// reserve words
__var,
__func,__return,__nil,
__if,__elsif,__else,
__continue,__break,
__for,__forindex,__foreach,__while,
__for,__forindex,__foreach,__while, // reserve words
// basic scalar type: number string
__number,__string,
// basic identifier type: identifier dynamic_identifier
__id,__dynamic_id,
__number,__string, // basic scalar type: number string
__id,__dynamic_id, // basic identifier type: identifier dynamic_identifier
// abstract_syntax_tree type below
// abstract_syntax_tree also uses the types above, such as operators

View File

@ -1,38 +1,17 @@
#ifndef __NASAL_LEXER_H__
#define __NASAL_LEXER_H__
/*
__token_reserve_word:
for,foreach,forindex,while : loop
var,func : definition
break,continue : in loop
return : in function
if,else,elsif : conditional expr
and,or : calculation
nil : special type
__token_identifier:
must begin with '_' or 'a'~'z' or 'A'~'Z'
can include '_' or 'a'~'z' or 'A'~'Z' or '0'~'9'
__token_string:
example:
"string"
'string'
if a string does not end with " or ' then lexer will throw an error
__token_number:
example:
2147483647 (integer)
2.71828 (float)
0xdeadbeef (hex) or 0xDEADBEEF (hex)
0o170001 (oct)
1e-1234 (dec) or 10E2 (dec)
__token_operator:
! + - * / ~
= += -= *= /= ~=
== != > >= < <=
('and' 'or' are operators too but they are recognized as operator in generate_detail_token())
() [] {} ; , . : ?
others: __unknown_operator
*/
#define IS_IDENTIFIER_HEAD(c) (c=='_')||('a'<=c && c<='z')||('A'<=c&&c<='Z')
#define IS_IDENTIFIER_BODY(c) (c=='_')||('a'<=c && c<='z')||('A'<=c&&c<='Z')||('0'<=c&&c<='9')
#define IS_NUMBER_HEAD(c) ('0'<=c&&c<='9')
#define IS_NUMBER_BODY(c) ('0'<=c&&c<='9')||('a'<=c&&c<='f')||('A'<=c&&c<='F')||(c=='e'||c=='E'||c=='.'||c=='x'||c=='o')
#define IS_STRING_HEAD(c) (c=='\''||c=='\"')
// single operators have only one character
#define IS_SINGLE_OPRATOR(c) (c=='('||c==')'||c=='['||c==']'||c=='{'||c=='}'||c==','||c==';'||c=='|'||c==':'||\
c=='?'||c=='.'||c=='`'||c=='&'||c=='@'||c=='%'||c=='$'||c=='^'||c=='\\')
// calculation operators may have two chars, for example: += -= *= /= ~= != == >= <=
#define IS_CALC_OPERATOR(c) (c=='='||c=='+'||c=='-'||c=='*'||c=='!'||c=='/'||c=='<'||c=='>'||c=='~')
#define IS_NOTE_HEAD(c) (c=='#')
/* filenames of lib files */
#ifndef LIB_FILE_NUM
@ -52,6 +31,7 @@ const std::string lib_filename[LIB_FILE_NUM]=
"lib/utf8.nas"
};
#endif
/* reserve words */
#ifndef RESERVE_WORD_NUM
#define RESERVE_WORD_NUM 15
@ -141,6 +121,7 @@ void resource_file::delete_all_source()
std::vector<char> tmp;
source_code.clear();
source_code.swap(tmp);
// use tmp's destructor to delete the memory space that source_code used before
return;
}
void resource_file::input_file(std::string filename)
@ -156,9 +137,7 @@ void resource_file::input_file(std::string filename)
while(!fin.eof())
{
c=fin.get();
if(fin.eof())
break;
//source_code.push_back(c<0? '?':c);
if(fin.eof()) break;
source_code.push_back(c);
}
fin.close();
@ -178,9 +157,8 @@ void resource_file::load_lib_file()
while(!fin.eof())
{
c=fin.get();
if(fin.eof())
break;
source_code.push_back(c<0? '?':c);
if(fin.eof()) break;
source_code.push_back(c);
}
}
fin.close();
@ -193,26 +171,25 @@ std::vector<char>& resource_file::get_source()
}
void resource_file::print_resource()
{
int size=source_code.size();
int line=1;
std::cout<<line<<"\t";
for(int i=0;i<source_code.size();++i)
for(int i=0;i<size;++i)
{
if(32<=source_code[i])
std::cout<<source_code[i];
if(32<=source_code[i]) std::cout<<source_code[i];
else if(source_code[i]<0)
{
// print unicode
std::string tmp="";
for(;i<source_code.size();++i)
for(;i<size;++i)
{
if(source_code[i]>=0)
break;
if(source_code[i]>=0) break;
tmp.push_back(source_code[i]);
}
std::cout<<tmp;--i;
}
else
std::cout<<" ";
if(source_code[i]=='\n')
else std::cout<<" ";
if(i<size && source_code[i]=='\n')
{
++line;
std::cout<<std::endl<<line<<"\t";
@ -225,42 +202,27 @@ void resource_file::print_resource()
std::string nasal_lexer::identifier_gen(std::vector<char>& res,int& ptr,int& line)
{
std::string token_str="";
while(res[ptr]=='_' || ('a'<=res[ptr] && res[ptr]<='z') || ('A'<=res[ptr] && res[ptr]<='Z') || ('0'<=res[ptr] && res[ptr]<='9'))
while(IS_IDENTIFIER_BODY(res[ptr]))
{
token_str+=res[ptr];
++ptr;
if(ptr>=res.size())
break;
if(ptr>=res.size()) break;
}
// check dynamic identifier "..."
if(res[ptr]=='.')
{
++ptr;
if(ptr<res.size() && res[ptr]=='.')
{
++ptr;
if(ptr<res.size() && res[ptr]=='.')
if(ptr+2<res.size() && res[ptr]=='.' && res[ptr+1]=='.' && res[ptr+2]=='.')
{
token_str+="...";
++ptr;
}
else
ptr-=2;
}
else
--ptr;
ptr+=3;
}
return token_str;
// after running this process, ptr will point to the next token's beginning character
}
std::string nasal_lexer::number_gen(std::vector<char>& res,int& ptr,int& line)
{
bool scientific_notation=false;
bool scientific_notation=false;// numbers like 1e8 are scientific_notation
std::string token_str="";
while(('0'<=res[ptr] && res[ptr]<='9') ||
('a'<=res[ptr] && res[ptr]<='f') ||
('A'<=res[ptr] && res[ptr]<='F') ||
res[ptr]=='.' || res[ptr]=='x' || res[ptr]=='o' ||
res[ptr]=='e' || res[ptr]=='E')
while(IS_NUMBER_BODY(res[ptr]))
{
token_str+=res[ptr];
if(res[ptr]=='e' || res[ptr]=='E')
@ -299,14 +261,12 @@ std::string nasal_lexer::string_gen(std::vector<char>& res,int& ptr,int& line)
std::string token_str="";
char str_begin=res[ptr];
++ptr;
if(ptr>=res.size())
return token_str;
if(ptr>=res.size()) return token_str;
while(ptr<res.size() && res[ptr]!=str_begin)
{
token_str+=res[ptr];
if(res[ptr]=='\n')
++line;
if(res[ptr]=='\\')
if(res[ptr]=='\n') ++line;
if(res[ptr]=='\\' && ptr+1<res.size())
{
++ptr;
switch(res[ptr])
@ -321,8 +281,7 @@ std::string nasal_lexer::string_gen(std::vector<char>& res,int& ptr,int& line)
}
}
++ptr;
if(ptr>=res.size())
break;
if(ptr>=res.size()) break;
}
// check if this string ends with a " or '
if(ptr>=res.size())
@ -377,13 +336,12 @@ void nasal_lexer::scanner(std::vector<char>& res)
{
while(ptr<res.size() && (res[ptr]==' ' || res[ptr]=='\n' || res[ptr]=='\t' || res[ptr]=='\r' || res[ptr]<0))
{
if(res[ptr]=='\n')
++line;
// these characters will be ignored, and '\n' will cause ++line
if(res[ptr]=='\n') ++line;
++ptr;
}
if(ptr>=res.size())
break;
if(res[ptr]=='_' || ('a'<=res[ptr] && res[ptr]<='z') || ('A'<=res[ptr] && res[ptr]<='Z'))
if(ptr>=res.size()) break;
if(IS_IDENTIFIER_HEAD(res[ptr]))
{
token_str=identifier_gen(res,ptr,line);
token new_token;
@ -392,7 +350,7 @@ void nasal_lexer::scanner(std::vector<char>& res)
new_token.str=token_str;
token_list.push_back(new_token);
}
else if('0'<=res[ptr] && res[ptr]<='9')
else if(IS_NUMBER_HEAD(res[ptr]))
{
token_str=number_gen(res,ptr,line);
token new_token;
@ -401,7 +359,7 @@ void nasal_lexer::scanner(std::vector<char>& res)
new_token.str=token_str;
token_list.push_back(new_token);
}
else if(res[ptr]=='\'' || res[ptr]=='\"')
else if(IS_STRING_HEAD(res[ptr]))
{
token_str=string_gen(res,ptr,line);
token new_token;
@ -410,10 +368,7 @@ void nasal_lexer::scanner(std::vector<char>& res)
new_token.str=token_str;
token_list.push_back(new_token);
}
else if(res[ptr]=='(' || res[ptr]==')' || res[ptr]=='[' || res[ptr]==']' || res[ptr]=='{' ||
res[ptr]=='}' || res[ptr]==',' || res[ptr]==';' || res[ptr]=='|' || res[ptr]==':' ||
res[ptr]=='?' || res[ptr]=='.' || res[ptr]=='`' || res[ptr]=='&' || res[ptr]=='@' ||
res[ptr]=='%' || res[ptr]=='$' || res[ptr]=='^' || res[ptr]=='\\')
else if(IS_SINGLE_OPRATOR(res[ptr]))
{
token_str="";
token_str+=res[ptr];
@ -424,8 +379,7 @@ void nasal_lexer::scanner(std::vector<char>& res)
token_list.push_back(new_token);
++ptr;
}
else if(res[ptr]=='=' || res[ptr]=='+' || res[ptr]=='-' || res[ptr]=='*' || res[ptr]=='!' ||
res[ptr]=='/' || res[ptr]=='<' || res[ptr]=='>' || res[ptr]=='~')
else if(IS_CALC_OPERATOR(res[ptr]))
{
// get calculation operator
token_str="";
@ -442,7 +396,7 @@ void nasal_lexer::scanner(std::vector<char>& res)
new_token.str=token_str;
token_list.push_back(new_token);
}
else if(res[ptr]=='#')
else if(IS_NOTE_HEAD(res[ptr]))
{
// avoid note
while(ptr<res.size() && res[ptr]!='\n')

View File

@ -773,7 +773,8 @@ int nasal_runtime::call_function(std::list<std::map<std::string,int> >& local_s
nasal_gc.get_scalar(addr).get_function().get_parameter_list(),
nasal_gc.get_scalar(addr).get_function().get_statement_block(),
*call_node,
last_hash_addr);
last_hash_addr
);
if(addr<0)
return -1;
nasal_gc.reference_delete(tmp_addr);
@ -968,6 +969,8 @@ int nasal_runtime::function_generation(std::list<std::map<std::string,int> >& lo
void nasal_runtime::update_closure(std::list<std::map<std::string,int> >& local_scope,int local_scope_addr)
{
// update_closure
// each new function will be updated only once, after updating closure,functions' closure_updated flag will be set true
// but this has a bug, if this new function is a member of vector or hash, it will not be updated
if(!local_scope.size())
return;
for(std::map<std::string,int>::iterator i=local_scope.back().begin();i!=local_scope.back().end();++i)