🐛 complete function of arg in all scopes

This commit is contained in:
ValKmjolnir 2023-07-09 16:21:09 +08:00
parent 3509655424
commit 7e72661332
12 changed files with 291 additions and 306 deletions

View File

@ -761,6 +761,8 @@ If get this, Congratulations!
<details><summary>Must use `var` to define variables</summary>
This interpreter uses more strict syntax to make sure it is easier for you to program and debug.
And flightgear's nasal interpreter also has the same rule.
So do not use varibale without using `var` to declare it.
In Andy's interpreter:
@ -794,32 +796,6 @@ code: undefined symbol "i"
</details>
<details><summary>Default dynamic arguments not supported</summary>
In this interpreter,
function doesn't put dynamic args into vector `arg` by default.
So if you use `arg` without definition,
you'll get an error of `undefined symbol`.
```javascript
var f=func(){
println(arg)
}
f(1,2,3);
```
Compilation result:
```javascript
code: undefined symbol "arg"
--> test.nas:2:15
|
2 | println(arg)
| ^ undefined symbol "arg"
```
</details>
## __Trace Back Info__
![stackoverflow](./doc/gif/stackoverflow.gif)

View File

@ -735,6 +735,7 @@ dylib.dlclose(dlhandle.lib);
<details><summary>必须用 var 定义变量</summary>
这个解释器使用了更加严格的语法检查来保证你可以更轻松地debug。这是非常有必要的严格否则debug会非常痛苦。
同样的flightgear 内置的 nasal 解释器也采取了类似的措施,所以使用变量前务必用 `var` 先进行声明。
在Andy的解释器中:
@ -762,29 +763,6 @@ code: undefined symbol "i"
```
</details>
<details><summary>默认不定长参数</summary>
这个解释器在运行时,函数不会将超出参数表的那部分不定长参数放到默认的`arg`中。所以你如果不定义`arg`就使用它,那你只会得到`undefined symbol`。
```javascript
var f=func(){
println(arg)
}
f(1,2,3);
```
编译结果:
```javascript
code: undefined symbol "arg"
--> test.nas:2:15
|
2 | println(arg)
| ^ undefined symbol "arg"
```
</details>
## __堆栈追踪信息__
![stackoverflow](../doc/gif/stackoverflow.gif)

View File

@ -1210,12 +1210,6 @@ var builtin_millisec(var* local, gc& ngc) {
return var::num(res);
}
var builtin_sysargv(var* local, gc& ngc) {
var res = ngc.alloc(vm_vec);
res.vec().elems = ngc.env_argv;
return res;
}
var builtin_gcextend(var* local, gc& ngc) {
var type = local[1];
if (type.type!=vm_str) {
@ -1356,7 +1350,6 @@ nasal_builtin_table builtin[] = {
{"__costatus", builtin_costatus},
{"__corun", builtin_corun},
{"__millisec", builtin_millisec},
{"__sysargv", builtin_sysargv},
{"__gcextd", builtin_gcextend},
{"__logtime", builtin_logtime},
{"__ghosttype", builtin_ghosttype},

View File

@ -124,7 +124,6 @@ var builtin_coyield(var*, gc&);
var builtin_costatus(var*, gc&);
var builtin_corun(var*, gc&);
var builtin_millisec(var*, gc&);
var builtin_sysargv(var*, gc&);
var builtin_gcextend(var*, gc&);
var builtin_logtime(var*, gc&);
var builtin_ghosttype(var*, gc&);

View File

@ -211,6 +211,16 @@ void codegen::func_gen(function* node) {
// search symbols first, must use after loading parameters
// or the location of symbols will change and cause fatal error
find_symbol(block);
// add special varibale "arg", which is used to store overflowed args
// but if dynamic parameter is declared, this variable will be useless
// for example:
// var f = func(a) {print(arg)}
// f(1, 2, 3);
// then the arg is [2, 3], because 1 is accepted by "a"
// so in fact "f" is the same as:
// var f = func(a, arg...) {return(arg)}
add_symbol("arg");
in_iterloop.push(0);
block_gen(block);
in_iterloop.pop();
@ -1094,13 +1104,7 @@ const error& codegen::compile(parse& parse, linker& import) {
// add special symbol globals, which is a hash stores all global variables
add_symbol("globals");
// add special symbol arg here, which is used to store function arguments
// for example:
// var f = func(a) {print(arg)}
// f(1, 2, 3);
// then the arg is [2, 3], because 1 is accepted by "a"
// so in fact "f" is the same as:
// var f = func(a, arg...) {return(arg)}
// add special symbol arg here, which is used to store command line args
add_symbol("arg");
find_symbol(parse.tree()); // search symbols first

View File

@ -55,7 +55,9 @@ void lexer::skip_note() {
void lexer::err_char() {
++column;
char c = res[ptr++];
err.err("lexer", {line, column-1, line, column, filename}, "invalid character 0x"+chrhex(c));
err.err("lexer",
{line, column-1, line, column, filename},
"invalid character 0x"+chrhex(c));
err.fatal("lexer", "fatal error occurred, stop");
}
@ -109,7 +111,9 @@ std::string lexer::utf8_gen() {
for(u32 i = 1; i<tmp.size(); ++i) {
utf_info += " 0x"+chrhex(tmp[i]);
}
err.err("lexer", {line, column-1, line, column, filename}, "invalid utf-8 <"+utf_info+">");
err.err("lexer",
{line, column-1, line, column, filename},
"invalid utf-8 <"+utf_info+">");
err.fatal("lexer", "fatal error occurred, stop");
}
str += tmp;
@ -131,7 +135,9 @@ token lexer::id_gen() {
}
}
tok type = get_type(str);
return {{begin_line, begin_column, line, column, filename}, (type!=tok::null)?type:tok::id, str};
return {
{begin_line, begin_column, line, column, filename},
(type!=tok::null)? type:tok::id, str};
}
token lexer::num_gen() {
@ -145,8 +151,11 @@ token lexer::num_gen() {
str += res[ptr++];
}
column += str.length();
if (str.length()<3) { // "0x"
err.err("lexer", {begin_line, begin_column, line, column, filename}, "invalid number `"+str+"`");
// "0x"
if (str.length()<3) {
err.err("lexer",
{begin_line, begin_column, line, column, filename},
"invalid number `"+str+"`");
}
return {{begin_line, begin_column, line, column, filename}, tok::num, str};
} else if (ptr+1<res.size() && res[ptr]=='0' && res[ptr+1]=='o') { // generate oct number
@ -162,7 +171,9 @@ token lexer::num_gen() {
}
column += str.length();
if (str.length()==2 || erfmt) {
err.err("lexer", {begin_line, begin_column, line, column, filename}, "invalid number `"+str+"`");
err.err("lexer",
{begin_line, begin_column, line, column, filename},
"invalid number `"+str+"`");
}
return {{begin_line, begin_column, line, column, filename}, tok::num, str};
}
@ -180,7 +191,9 @@ token lexer::num_gen() {
// "xxxx." is not a correct number
if (str.back()=='.') {
column += str.length();
err.err("lexer",{begin_line, begin_column, line, column, filename}, "invalid number `"+str+"`");
err.err("lexer",
{begin_line, begin_column, line, column, filename},
"invalid number `"+str+"`");
return {{begin_line, begin_column, line, column, filename}, tok::num, "0"};
}
}
@ -195,7 +208,9 @@ token lexer::num_gen() {
// "xxxe(-|+)" is not a correct number
if (str.back()=='e' || str.back()=='E' || str.back()=='-' || str.back()=='+') {
column += str.length();
err.err("lexer",{begin_line, begin_column, line, column, filename}, "invalid number `"+str+"`");
err.err("lexer",
{begin_line, begin_column, line, column, filename},
"invalid number `"+str+"`");
return {{begin_line, begin_column, line, column, filename}, tok::num, "0"};
}
}
@ -244,12 +259,18 @@ token lexer::str_gen() {
}
// check if this string ends with a " or '
if (ptr++>=res.size()) {
err.err("lexer", {begin_line, begin_column, line, column, filename}, "get EOF when generating string");
err.err("lexer",
{begin_line, begin_column, line, column, filename},
"get EOF when generating string");
return {{begin_line, begin_column, line, column, filename}, tok::str, str};
}
++column;
if (begin=='`' && str.length()!=1) {
err.err("lexer", {begin_line, begin_column, line, column, filename}, "\'`\' is used for string including one character");
// if is not utf8, 1+utf8_hdchk should be 1
if (begin=='`' && str.length()!=1+utf8_hdchk(str[0])) {
err.err("lexer",
{begin_line, begin_column, line, column, filename},
"\'`\' is used for string including one character");
}
return {{begin_line, begin_column, line, column, filename}, tok::str, str};
}
@ -261,7 +282,9 @@ token lexer::single_opr() {
++column;
tok type = get_type(str);
if (type==tok::null) {
err.err("lexer", {begin_line, begin_column, line, column, filename}, "invalid operator `"+str+"`");
err.err("lexer",
{begin_line, begin_column, line, column, filename},
"invalid operator `"+str+"`");
}
++ptr;
return {{begin_line, begin_column, line, column, filename}, type, str};

View File

@ -15,8 +15,10 @@ void vm::init(
/* set canary and program counter */
ctx.pc = 0;
ctx.localr=ctx.memr=nullptr;
ctx.funcr=ctx.upvalr=nil;
ctx.localr = nullptr;
ctx.memr = nullptr;
ctx.funcr = nil;
ctx.upvalr = nil;
ctx.canary = stack+STACK_DEPTH-1; // stack[STACK_DEPTH-1]
ctx.top = stack;
ctx.stack = stack;
@ -35,6 +37,11 @@ void vm::init(
for(const auto& i : global) {
map_instance.map().mapper[i.first] = stack+i.second;
}
/* init vm arg */
auto arg_instance = ngc.alloc(vm_vec);
stack[global.at("arg")] = arg_instance;
arg_instance.vec().elems = ngc.env_argv;
}
void vm::valinfo(var& val) {
@ -49,7 +56,8 @@ void vm::valinfo(var& val) {
case vm_nil: std::clog<<"| nil |";break;
case vm_num: std::clog<<"| num | "<<val.num();break;
case vm_str: std::clog<<"| str | <0x"<<std::hex<<(u64)p
<<"> "<<rawstr(val.str(),16)<<std::dec;break;
<<"> "<<rawstr(val.str(),16)
<<std::dec;break;
case vm_func: std::clog<<"| func | <0x"<<std::hex<<(u64)p
<<"> entry:0x"<<val.func().entry
<<std::dec;break;

View File

@ -622,9 +622,12 @@ inline void vm::o_callfv() {
return;
}
auto& func = local[-1].func();
// swap funcr with local[-1]
var tmp = local[-1];
local[-1] = ctx.funcr;
ctx.funcr = tmp;
// top-argc+lsize(local) +1(old pc) +1(old localr) +1(old upvalr)
if (ctx.top-argc+func.lsize+3>=ctx.canary) {
die("stack overflow");
@ -645,9 +648,9 @@ inline void vm::o_callfv() {
}
} else if (psize<argc) {
// load arguments to "arg", located at stack+1
stack[1] = ngc.alloc(vm_vec);
dynamic = ngc.alloc(vm_vec);
for(u32 i = psize; i<argc; ++i) {
stack[1].vec().elems.push_back(local[i]);
dynamic.vec().elems.push_back(local[i]);
}
}
// should reset stack top after allocating vector
@ -662,13 +665,12 @@ inline void vm::o_callfv() {
local[i] = local[i-1];
}
local[0] = func.local[0];// load "me"
// load local scope & default arguments
for(u32 i = min_size+1; i<func.lsize; ++i) {
local[i] = func.local[i];
}
if (func.dpara>=0) {
local[psize+1] = dynamic;
}
local[func.dpara>=0? psize+1:func.lsize-1] = dynamic;
ctx.top[0] = ctx.upvalr;
(++ctx.top)[0] = var::addr(ctx.localr);
@ -912,9 +914,6 @@ inline void vm::o_ret() {
ctx.funcr = ctx.top[0];
ctx.top[0] = ret; // rewrite func with returned value
// reset "arg"
stack[1] = nil;
if (up.type==vm_upval) { // synchronize upvalue
auto& upval = up.upval();
auto size = func.func().lsize;

View File

@ -478,7 +478,7 @@ var os = {
# runtime gives us some functions that we could manage it manually.
var runtime = {
# command line arguments
argv: func() {return __sysargv;},
argv: func() {return globals.arg;},
gc: {
extend: func(type) {return __gcextd;}
}

View File

@ -16,9 +16,12 @@ globals.test_func();
var f = func() {
println(arg);
func() {println(arg);}(114, 514, 1919, 810);
println(arg);
}
f(1, 2, 3);
# command line arguments
println(arg);
println(globals.arg);

View File

@ -1,16 +1,16 @@
# This file is written by Andy Ross, and is protected by GPLv2.0
# A no-op function used below to get this file to run. Ignore and read on...
dummyFunc = func { 1 }
var dummyFunc = func { 1 }
#
# Literal numbers can be decimal, exponential, or hex constants. All
# numbers are stored internally as IEEE double-precision values.
#
n1 = 3;
n2 = 3.14;
n3 = 6.023e23;
n3 = 0x123456;
var n1 = 3;
var n2 = 3.14;
var n3 = 6.023e23;
var n3 = 0x123456;
#
# Two identical string literals with different quotes. Double quotes
@ -19,14 +19,14 @@ n3 = 0x123456;
# whitespace like newlines). Double quotes handle the following
# C-like escapes: \n \r \t \xnn \"
#
s1 = 'Andy\'s "computer" has a C:\righteous\newstuff directory.';
s2 = "Andy's \"computer\" has a C:\\righteous\\newstuff directory.";
var s1 = 'Andy\'s "computer" has a C:\righteous\newstuff directory.';
var s2 = "Andy's \"computer\" has a C:\\righteous\\newstuff directory.";
#
# Literal lists use square brackets with a comma-separated expression
# list.
#
list1 = ["a", "b", 1, 2];
var list1 = ["a", "b", 1, 2];
#
# Literal hashes (or objects -- same thing) use curlies and colons to
@ -35,8 +35,8 @@ list1 = ["a", "b", 1, 2];
# to use symbols, lookup tables of other types will be more
# comfortable with literals.
#
hash1 = { name : "Andy", job : "Hacker" };
EnglishEspanol = { "one" : "uno", "two": "dos", "blue" : "azul" };
var hash1 = { name : "Andy", job : "Hacker" };
var EnglishEspanol = { "one" : "uno", "two": "dos", "blue" : "azul" };
#
# Both vectors and hashes use square brackets for the lookup operation:
@ -50,7 +50,7 @@ hash1["name"] == "Andy";
# (anonymous) function argument to the local "log_message" variable.
# There is no function declaration syntax in Nasal.
#
log_message = func {
var log_message = func {
print(arg[0]);
}
@ -58,10 +58,10 @@ log_message = func {
# You can also pass named arguments to a function, thus saving the
# typing and performance costs of extracting them from the arg array.
#
sqrt = dummyFunc;
dist = func(x1, y1, x2, y2) {
dx = x2-x1;
dy = y2-y1;
var sqrt = dummyFunc;
var dist = func(x1, y1, x2, y2) {
var dx = x2-x1;
var dy = y2-y1;
return sqrt(dx*dx + dy*dy);
}
dist(0,0,1,1); # == sqrt(2)
@ -71,14 +71,14 @@ dist(0,0,1,1); # == sqrt(2)
# default value must be a scalar (number, string, function, nil) and
# not a mutable composite object (list, hash).
#
read = func(bytes, flags=0) { }
var read = func(bytes, flags=0) { }
#
# Any extra arguments after the named list are placed in the "arg"
# vector as above. You can rename this to something other than "arg"
# by specifying a final argument name with an ellipsis:
#
listify = func(elements...) { return elements; }
var listify = func(elements...) { return elements; }
listify(1, 2, 3, 4); # returns a list: [1, 2, 3, 4]
#
@ -87,7 +87,7 @@ listify(1, 2, 3, 4); # returns a list: [1, 2, 3, 4]
# good practice in general, although it is not required. Note that
# this is not a "declaration", just a qualifier on the "=" operator.
#
innerFunc = func {
var innerFunc = func {
for(var dist=0; dist<100; dist += 1) {
# Does not interfere with the "dist" symbol defined above
}
@ -99,26 +99,26 @@ innerFunc = func {
# what the ?: does in C. The last semicolon in a code block is
# optional, to make this prettier.
#
abs = func(n) { if(n<0) { -n } else { n } }
var abs = func(n) { if(n<0) { -n } else { n } }
#
# But for those who don't like typing, the ternary operator works like
# you expect:
#
abs = func(n) { n < 0 ? -n : n }
var abs = func(n) { n < 0 ? -n : n }
#
# Nasal supports a "nil" value for use as a null pointer equivalent.
# It can be tested for equality, matching only other nils.
#
listNode = { data : ["what", "ever"], next : nil };
var listNode = { data : ["what", "ever"], next : nil };
#
# Nasal's binary boolean operators are "and" and "or", unlike C.
# unary not is still "!" however. They short-circuit like you expect
#
toggle = 0;
a = nil;
var toggle = 0;
var a = nil;
if(a and a.field == 42) {
toggle = !toggle; # doesn't crash when a is nil
}
@ -129,24 +129,24 @@ if(a and a.field == 42) {
# takes a local variable name as its first argument and a vector as
# its second.
#
doSomething = dummyFunc;
var doSomething = dummyFunc;
stillGoing = 0;
var stillGoing = 0;
while(stillGoing) { doSomething(); }
for(i=0; i < 3; i = i+1) {
for(var i=0; i < 3; i = i+1) {
elem = list1[i];
doSomething(elem);
}
foreach(elem; list1) { doSomething(elem) } # Shorthand for above
foreach(var elem; list1) { doSomething(elem) } # Shorthand for above
#
# There is also a "forindex", which is like foreach except that it
# assigns the index of each element, instead of the value, to the loop
# variable.
#
forindex(i; list1) { doSomething(list1[i]); }
forindex(var i; list1) { doSomething(list1[i]); }
#
# Define a class object with one method, one field and one "new"
@ -155,10 +155,10 @@ forindex(i; list1) { doSomething(list1[i]); }
# appropriately. Member functions can get their local object (the
# equivalent of the "this" pointer in C++) as the "me" variable.
#
Class1 = {};
var Class1 = {};
Class1.new = func {
obj = { parents : [Class1],
var obj = { parents : [Class1],
count : 0 };
return obj;
}
@ -168,7 +168,7 @@ Class1.getcount = func {
return me.count;
}
c = Class1.new();
var c = Class1.new();
print(c.getcount(), "\n"); # prints 1
print(c.getcount(), "\n"); # prints 2
print(c.getcount(), "\n"); # prints 3
@ -177,18 +177,18 @@ print(c.getcount(), "\n"); # prints 3
# But *set* operations always go to the local object. You can't
# corrupt a parent class via OOP operations on its instances (but you
# *can* get to it via hand-inspection of the parents arrays).
c2 = Class1.new();
c2.getcount() = func { 12345 }; # custom "derived" function!
var c2 = Class1.new();
c2.getcount = func { return 12345 }; # custom "derived" function!
print(c2.getcount(), "\n"); # prints 12345
print(c1.getcount(), "\n"); # prints 4, Class1.getcount is unchanged
print(c.getcount(), "\n"); # prints 4, Class1.getcount is unchanged
#
# This creates an identical class using alternative syntax.
#
Class2 = {
var Class2 = {
new : func {
obj = {};
var obj = {};
obj.parents = [Class2];
obj.count = 0;
return obj;
@ -206,7 +206,7 @@ Class2 = {
# C (although note that there is no nul termination -- get the length
# with size()):
#
string = "abcdefghijklmnopqrstuvwxyz";
var string = "abcdefghijklmnopqrstuvwxyz";
var ascii_sum = 0;
for(var i=0; i<size(string); i+=1) { ascii_sum += string[i]; }
@ -222,7 +222,7 @@ if(`©` != 169) { print("Unicode violation bug!\n"); }
# can make a mutable string either with the append operator or the
# bits.buf() function.
#
ascii_lc = func(string) {
var ascii_lc = func(string) {
var mutable = string ~ "";
for(var i=0; i<size(mutable); i+=1) {
if(mutable[i] >= `A` and mutable[i] <= `Z`) {
@ -237,14 +237,14 @@ print(ascii_lc("ABCDEFG"), "\n"); # prints "abcdefg"
# Advanced vectors: The lookup index can be negative, where -1
# indicates the last element in the vector (or string).
#
next_to_last = list1[-2];
var next_to_last = list1[-2];
#
# Remember that strings look syntactically like vectors of bytes; so
# conversely, the "~" concatenation operator works equally well to
# concatenate vectors:
#
joined_list = [1, 2, 3] ~ [4, 5, 6];
var joined_list = [1, 2, 3] ~ [4, 5, 6];
###
### Now some fun examples:
@ -254,9 +254,9 @@ joined_list = [1, 2, 3] ~ [4, 5, 6];
# Make a "inverted index" hash out of a vector that returns the index
# for each element.
#
invert = func(vec) {
hash = {};
for(i=0; i<size(vec); i = i+1) {
var invert = func(vec) {
var hash = {};
for(var i=0; i<size(vec); i = i+1) {
hash[vec[i]] = i;
}
return hash;
@ -266,16 +266,16 @@ invert = func(vec) {
# Use the return value of the above function to do an "index of"
# lookup on a vector
#
vecfind = func(vec, elem) { return invert(vec)[elem]; }
var vecfind = func(vec, elem) { return invert(vec)[elem]; }
#
# Joins its arguments with the empty string and returns a scalar.
# Note use of "~" operator to do string concatenation (Nasal's only
# funny syntax).
#
join = func {
s = "";
foreach(elem; arg) { s = s ~ elem; }
var join = func {
var s = "";
foreach(var elem; arg) { s = s ~ elem; }
return s;
}
@ -283,14 +283,16 @@ join = func {
# Labeled break/continue syntax puts the label in as an extra first
# argument to the for/while/foreach.
#
doneWithInnerLoopEarly = dummyFunc;
completelyDone = dummyFunc;
for(OUTER; i=0; i<100; i = i+1) {
for(j=0; j<100; j = j+1) {
var doneWithInnerLoopEarly = dummyFunc;
var completelyDone = dummyFunc;
# not supported now
for(#OUTER;
var i=0; i<100; i = i+1) {
for(var j=0; j<100; j = j+1) {
if(doneWithInnerLoopEarly()) {
break;
} elsif(completelyDone()) {
break OUTER;
break #OUTER;
}
}
}
@ -303,10 +305,10 @@ for(OUTER; i=0; i<100; i = i+1) {
## also makes no attempt to escape special characters in strings, which
## can break re-parsing in strange (and possibly insecure!) ways.
##
dump = func(o) {
result = "";
var dump = func(o) {
var result = "";
if(typeof(o) == "scalar") {
n = num(o);
var n = num(o);
if(n == nil) { result = result ~ '"' ~ o ~ '"'; }
else { result = result ~ o; }
} elsif(typeof(o) == "vector") {
@ -317,14 +319,14 @@ dump = func(o) {
}
result = result ~ " ]";
} elsif(typeof(o) == "hash") {
ks = keys(o);
var ks = keys(o);
result = result ~ "{ ";
if(size(o) > 0) {
k = ks[0];
var k = ks[0];
result = result ~ k ~ ":" ~ dump(o[k]);
}
for(i=1; i<size(o); i=i+1) {
k = ks[i];
var k = ks[i];
result = result ~ ", " ~ k ~ " : " ~ dump(o[k]);
}
result = result ~ " }";
@ -345,7 +347,7 @@ dump = func(o) {
# normal function definition. Oh well, every language has a syntactic
# quirk or two...)
#
a = (func(n){ n + 1 })(232); # "a" now equals 233
var a = (func(n){ n + 1 })(232); # "a" now equals 233
#
# Functional programming B. All expressions have a value, the last
@ -354,7 +356,7 @@ a = (func(n){ n + 1 })(232); # "a" now equals 233
# (assignment, duh) have side effects. e.g. The "if" expression works
# both for code flow and as the ?: expression in C/C++.
#
factorial = func(n) { if(n == 0) { 1 }
var factorial = func(n) { if(n == 0) { 1 }
else { n * factorial(n-1) } }
print(factorial(10), "\n");
@ -364,8 +366,8 @@ print(factorial(10), "\n");
# local variables in the outer scope even after their creator has
# returned.
#
getcounter = func { count = 0; return func { count = count + 1 } }
mycounter = getcounter();
var getcounter = func { var count = 0; return func { count = count + 1 } }
var mycounter = getcounter();
print(mycounter(), "\n"); # prints 1
print(mycounter(), "\n"); # prints 2
print(mycounter(), "\n"); # prints 3