diff --git a/CMakeLists.txt b/CMakeLists.txt index c45022b..d2f7497 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -39,7 +39,6 @@ set(NASAL_OBJECT_SOURCE_FILE ${CMAKE_SOURCE_DIR}/src/nasal_gc.cpp ${CMAKE_SOURCE_DIR}/src/nasal_import.cpp ${CMAKE_SOURCE_DIR}/src/nasal_lexer.cpp - ${CMAKE_SOURCE_DIR}/src/nasal_misc.cpp ${CMAKE_SOURCE_DIR}/src/nasal_opcode.cpp ${CMAKE_SOURCE_DIR}/src/nasal_parse.cpp ${CMAKE_SOURCE_DIR}/src/nasal_type.cpp diff --git a/makefile b/makefile index ffbf9ad..698e9c2 100644 --- a/makefile +++ b/makefile @@ -54,7 +54,6 @@ NASAL_OBJECT = \ build/nasal_opcode.o\ build/symbol_finder.o\ build/nasal_codegen.o\ - build/nasal_misc.o\ build/nasal_gc.o\ build/builtin.o\ build/fg_props.o\ @@ -92,9 +91,6 @@ build: build/main.o: $(NASAL_HEADER) src/main.cpp | build $(CXX) $(CXXFLAGS) src/main.cpp -o build/main.o -build/nasal_misc.o: src/nasal.h src/util/util.h src/nasal_misc.cpp | build - $(CXX) $(CXXFLAGS) src/nasal_misc.cpp -o build/nasal_misc.o - build/cli.o: src/cli/cli.h src/cli/cli.cpp | build $(CXX) $(CXXFLAGS) src/cli/cli.cpp -o build/cli.o @@ -110,7 +106,10 @@ build/repl.o: $(NASAL_HEADER) src/repl/repl.h src/repl/repl.cpp | build build/nasal_err.o: src/nasal.h src/repl/repl.h src/nasal_err.h src/nasal_err.cpp | build $(CXX) $(CXXFLAGS) src/nasal_err.cpp -o build/nasal_err.o -build/nasal_type.o: src/nasal.h src/nasal_type.h src/nasal_type.cpp | build +build/nasal_type.o:\ + src/nasal.h\ + src/util/util.h\ + src/nasal_type.h src/nasal_type.cpp | build $(CXX) $(CXXFLAGS) src/nasal_type.cpp -o build/nasal_type.o build/nasal_gc.o: src/nasal.h src/nasal_type.h src/nasal_gc.h src/nasal_gc.cpp | build @@ -129,6 +128,7 @@ build/nasal_import.o: \ build/nasal_lexer.o: \ src/nasal.h\ src/repl/repl.h\ + src/util/util.h\ src/util/fs.h\ src/nasal_err.h\ src/nasal_lexer.h src/nasal_lexer.cpp | build @@ -163,7 +163,6 @@ build/bits_lib.o: \ src/natives/bits_lib.h src/natives/bits_lib.cpp | build $(CXX) $(CXXFLAGS) src/natives/bits_lib.cpp -o build/bits_lib.o - build/math_lib.o: \ src/nasal.h\ src/nasal_type.h\ @@ -190,6 +189,7 @@ build/json_lib.o: \ src/nasal.h\ src/nasal_type.h\ src/nasal_gc.h\ + src/util/util.h\ src/natives/json_lib.h src/natives/json_lib.cpp | build $(CXX) $(CXXFLAGS) src/natives/json_lib.cpp -o build/json_lib.o @@ -220,6 +220,7 @@ build/nasal_codegen.o: $(NASAL_HEADER) src/nasal_codegen.h src/nasal_codegen.cpp build/nasal_opcode.o: \ src/nasal.h\ src/natives/builtin.h\ + src/util/util.h\ src/nasal_opcode.h src/nasal_opcode.cpp | build $(CXX) $(CXXFLAGS) src/nasal_opcode.cpp -o build/nasal_opcode.o @@ -228,6 +229,7 @@ build/nasal_parse.o: \ src/nasal_ast.h\ src/nasal_lexer.h\ src/nasal_err.h\ + src/util/util.h\ src/nasal_parse.h src/nasal_parse.cpp src/nasal_ast.h | build $(CXX) $(CXXFLAGS) src/nasal_parse.cpp -o build/nasal_parse.o @@ -259,6 +261,7 @@ build/ast_dumper.o: \ src/nasal_err.h\ src/nasal_ast.h\ src/ast_visitor.h\ + src/util/util.h\ src/ast_dumper.h src/ast_dumper.cpp | build $(CXX) $(CXXFLAGS) src/ast_dumper.cpp -o build/ast_dumper.o diff --git a/src/ast_dumper.cpp b/src/ast_dumper.cpp index a12a0e7..03fbb71 100644 --- a/src/ast_dumper.cpp +++ b/src/ast_dumper.cpp @@ -1,4 +1,5 @@ #include "ast_dumper.h" +#include "util/util.h" #include @@ -39,7 +40,7 @@ bool ast_dumper::visit_number_literal(number_literal* node) { bool ast_dumper::visit_string_literal(string_literal* node) { dump_indent(); - std::cout << "string \"" << rawstr(node->get_content()) << "\""; + std::cout << "string \"" << util::rawstr(node->get_content()) << "\""; std::cout << format_location(node); return true; } diff --git a/src/nasal.h b/src/nasal.h index 7d0c256..a324ad5 100644 --- a/src/nasal.h +++ b/src/nasal.h @@ -5,11 +5,6 @@ #endif #include -#include -#include -#include -#include -#include // abbreviation of some useful basic type using i32 = std::int32_t; @@ -21,24 +16,5 @@ using u64 = std::uint64_t; using usize = std::size_t; using f64 = double; -namespace nasal { - // virtual machine stack depth, both global depth and value stack depth const u32 VM_STACK_DEPTH = UINT16_MAX; - -f64 hex_to_f64(const char*); -f64 oct_to_f64(const char*); -// we have the same reason not using atof here -// just as andy's interpreter does. -// it is not platform independent, and may have strange output. -// so we write a new function here to convert str to number manually. -// but this also makes 0.1+0.2==0.3, -// not another result that you may get in other languages. -f64 dec_to_f64(const char*); - -f64 str_to_num(const char*); -i32 utf8_hdchk(const char); -std::string char_to_hex(const char); -std::string rawstr(const std::string&, const usize maxlen = 0); - -} \ No newline at end of file diff --git a/src/nasal_codegen.cpp b/src/nasal_codegen.cpp index 06f7ea9..2a0b3cc 100644 --- a/src/nasal_codegen.cpp +++ b/src/nasal_codegen.cpp @@ -1,4 +1,5 @@ #include "nasal_codegen.h" +#include "util/util.h" namespace nasal { @@ -1390,7 +1391,7 @@ void codegen::print(std::ostream& out) { // print const strings for(const auto& str : const_string_table) { - out << " .symbol \"" << rawstr(str) << "\"\n"; + out << " .symbol \"" << util::rawstr(str) << "\"\n"; } // print blank line diff --git a/src/nasal_gc.h b/src/nasal_gc.h index 7acfa57..399773e 100644 --- a/src/nasal_gc.h +++ b/src/nasal_gc.h @@ -13,6 +13,7 @@ #include #include #include +#include #include "nasal.h" #include "nasal_type.h" diff --git a/src/nasal_lexer.cpp b/src/nasal_lexer.cpp index f98be38..7ff1f21 100644 --- a/src/nasal_lexer.cpp +++ b/src/nasal_lexer.cpp @@ -6,6 +6,7 @@ #include "nasal_lexer.h" #include "repl/repl.h" +#include "util/util.h" #include "util/fs.h" namespace nasal { @@ -62,7 +63,7 @@ void lexer::err_char() { char c = res[ptr++]; err.err("lexer", {line, column-1, line, column, filename}, - "invalid character 0x" + char_to_hex(c) + "invalid character 0x" + util::char_to_hex(c) ); ++invalid_char; } @@ -109,7 +110,7 @@ std::string lexer::utf8_gen() { std::string str = ""; while(ptr(head); - if ((c>>5)==0x06) { // 110x xxxx (10xx xxxx)^1 - return 1; - } - if ((c>>4)==0x0e) { // 1110 xxxx (10xx xxxx)^2 - return 2; - } - if ((c>>3)==0x1e) { // 1111 0xxx (10xx xxxx)^3 - return 3; - } - return 0; -} - -std::string char_to_hex(const char c) { - const char hextbl[] = "0123456789abcdef"; - return {hextbl[(c&0xf0)>>4], hextbl[c&0x0f]}; -} - -std::string rawstr(const std::string& str, const usize maxlen) { - std::string ret(""); - for(auto i : str) { - // windows doesn't output unicode normally, so we output the hex - if (util::is_windows() && i<=0) { - ret += "\\x" + char_to_hex(i); - continue; - } - switch(i) { - case '\0': ret += "\\0"; break; - case '\a': ret += "\\a"; break; - case '\b': ret += "\\b"; break; - case '\t': ret += "\\t"; break; - case '\n': ret += "\\n"; break; - case '\v': ret += "\\v"; break; - case '\f': ret += "\\f"; break; - case '\r': ret += "\\r"; break; - case '\033':ret += "\\e"; break; - case '\"': ret += "\\\""; break; - case '\'': ret += "\\\'"; break; - case '\\': ret += "\\\\"; break; - default: ret += i; break; - } - } - if (maxlen && ret.length()>maxlen) { - ret = ret.substr(0, maxlen)+"..."; - } - return ret; -} - -} diff --git a/src/nasal_opcode.cpp b/src/nasal_opcode.cpp index 80a3439..b8ed77d 100644 --- a/src/nasal_opcode.cpp +++ b/src/nasal_opcode.cpp @@ -1,4 +1,5 @@ #include "nasal_opcode.h" +#include "util/util.h" namespace nasal { @@ -80,7 +81,7 @@ void codestream::dump(std::ostream& out) const { break; case op_lnkeqc: out << hex << "0x" << num << dec; - out << " (" << rawstr(const_string[num], 16) << ")"; + out << " (" << util::rawstr(const_string[num], 16) << ")"; break; case op_addecp: case op_subecp: @@ -91,7 +92,7 @@ void codestream::dump(std::ostream& out) const { break; case op_lnkecp: out << hex << "0x" << num << dec; - out << " (" << rawstr(const_string[num], 16) << ") sp-1"; + out << " (" << util::rawstr(const_string[num], 16) << ") sp-1"; break; case op_addc: case op_subc: @@ -141,7 +142,7 @@ void codestream::dump(std::ostream& out) const { case op_deft: case op_dyn: out << hex << "0x" << num << dec; - out << " (" << rawstr(const_string[num], 16) << ")"; + out << " (" << util::rawstr(const_string[num], 16) << ")"; break; default: if (files) { diff --git a/src/nasal_parse.cpp b/src/nasal_parse.cpp index e3628e6..811c6ce 100644 --- a/src/nasal_parse.cpp +++ b/src/nasal_parse.cpp @@ -1,5 +1,6 @@ #include "nasal_ast.h" #include "nasal_parse.h" +#include "util/util.h" namespace nasal { @@ -227,8 +228,10 @@ nil_expr* parse::nil() { } number_literal* parse::num() { - auto node = new number_literal(toks[ptr].loc, - str_to_num(toks[ptr].str.c_str())); + auto node = new number_literal( + toks[ptr].loc, + util::str_to_num(toks[ptr].str.c_str()) + ); match(tok::tk_num); return node; } diff --git a/src/nasal_type.cpp b/src/nasal_type.cpp index cdb6b38..c163541 100644 --- a/src/nasal_type.cpp +++ b/src/nasal_type.cpp @@ -1,4 +1,5 @@ #include "nasal_type.h" +#include "util/util.h" #include #include @@ -268,7 +269,7 @@ void nas_val::clear() { } f64 var::to_num() { - return type!=vm_type::vm_str? val.num:str_to_num(str().c_str()); + return type!=vm_type::vm_str? val.num:util::str_to_num(str().c_str()); } std::string var::to_str() { diff --git a/src/nasal_type.h b/src/nasal_type.h index 499befc..208babb 100644 --- a/src/nasal_type.h +++ b/src/nasal_type.h @@ -2,6 +2,9 @@ #include "nasal.h" +#include +#include +#include #include #include diff --git a/src/nasal_vm.cpp b/src/nasal_vm.cpp index abf9454..9515c1a 100644 --- a/src/nasal_vm.cpp +++ b/src/nasal_vm.cpp @@ -1,4 +1,5 @@ #include "nasal_vm.h" +#include "util/util.h" namespace nasal { @@ -75,7 +76,7 @@ void vm::value_info(var& val) { case vm_type::vm_nil: std::clog << "| nil |"; break; case vm_type::vm_num: std::clog << "| num | " << val.num(); break; case vm_type::vm_str: std::clog << "| str | <0x" << std::hex << p - << "> \"" << rawstr(val.str(), 16) + << "> \"" << util::rawstr(val.str(), 16) << "\"" << std::dec; break; case vm_type::vm_func: std::clog << "| func | <0x" << std::hex << p << std::dec << "> " << val.func(); diff --git a/src/nasal_vm.h b/src/nasal_vm.h index 2553481..8ed1109 100644 --- a/src/nasal_vm.h +++ b/src/nasal_vm.h @@ -8,6 +8,7 @@ #include "nasal_import.h" #include "nasal_gc.h" #include "nasal_codegen.h" +#include "util/util.h" #ifdef _MSC_VER #pragma warning (disable:4244) @@ -216,7 +217,7 @@ inline bool vm::cond(var& val) { if (val.is_num()) { return val.num(); } else if (val.is_str()) { - const f64 num = str_to_num(val.str().c_str()); + const f64 num = util::str_to_num(val.str().c_str()); return std::isnan(num)? !val.str().empty():num; } return false; @@ -333,7 +334,7 @@ inline void vm::o_lnot() { case vm_type::vm_nil: ctx.top[0] = one; break; case vm_type::vm_num: ctx.top[0] = val.num()? zero:one; break; case vm_type::vm_str: { - const f64 num = str_to_num(val.str().c_str()); + const f64 num = util::str_to_num(val.str().c_str()); if (std::isnan(num)) { ctx.top[0] = var::num(static_cast(val.str().empty())); } else { diff --git a/src/natives/io_lib.cpp b/src/natives/io_lib.cpp index 389a6a5..c5efb0b 100644 --- a/src/natives/io_lib.cpp +++ b/src/natives/io_lib.cpp @@ -1,6 +1,7 @@ #include "natives/io_lib.h" #include "util/fs.h" +#include #include namespace nasal { diff --git a/src/natives/json_lib.cpp b/src/natives/json_lib.cpp index e32cbfc..719120c 100644 --- a/src/natives/json_lib.cpp +++ b/src/natives/json_lib.cpp @@ -1,4 +1,5 @@ #include "natives/json_lib.h" +#include "util/util.h" #include #include @@ -170,7 +171,7 @@ void json::next() { } else if (text[ptr]!=' ' && text[ptr]!='\t' && text[ptr]!='\r') { error_info() += "json::parse: line " + std::to_string(line); error_info() += ": invalid character `0x"; - error_info() += char_to_hex(text[ptr]); + error_info() += util::char_to_hex(text[ptr]); error_info() += "`\n"; } ++ptr; @@ -253,7 +254,7 @@ void json::vector_member(nas_vec& vec, gc* ngc) { vec.elems.push_back(ngc->newstr(this_token.content)); next(); } else if (this_token.type==json_token_type::tok_num) { - vec.elems.push_back(var::num(str_to_num(this_token.content.c_str()))); + vec.elems.push_back(var::num(util::str_to_num(this_token.content.c_str()))); next(); } } @@ -292,7 +293,7 @@ void json::hash_member(nas_hash& hash, gc* ngc) { hash.elems.insert({name, ngc->newstr(this_token.content)}); next(); } else if (this_token.type==json_token_type::tok_num) { - hash.elems.insert({name, var::num(str_to_num(this_token.content.c_str()))}); + hash.elems.insert({name, var::num(util::str_to_num(this_token.content.c_str()))}); next(); } } diff --git a/src/util/util.cpp b/src/util/util.cpp index cf027fc..95dc052 100644 --- a/src/util/util.cpp +++ b/src/util/util.cpp @@ -1,5 +1,7 @@ #include "util/util.h" +#include + namespace nasal::util { bool is_windows() { @@ -118,4 +120,152 @@ const char* get_arch() { return "unknown"; } +u32 utf8_hdchk(const char head) { + // RFC-2279 but now we use RFC-3629 so nbytes is less than 4 + const auto c = static_cast(head); + if ((c>>5)==0x06) { // 110x xxxx (10xx xxxx)^1 + return 1; + } + if ((c>>4)==0x0e) { // 1110 xxxx (10xx xxxx)^2 + return 2; + } + if ((c>>3)==0x1e) { // 1111 0xxx (10xx xxxx)^3 + return 3; + } + return 0; +} + +std::string char_to_hex(const char c) { + const char hextbl[] = "0123456789abcdef"; + return {hextbl[(c&0xf0)>>4], hextbl[c&0x0f]}; +} + +std::string rawstr(const std::string& str, const usize maxlen) { + std::string ret(""); + for(auto i : str) { + // windows doesn't output unicode normally, so we output the hex + if (util::is_windows() && i<=0) { + ret += "\\x" + char_to_hex(i); + continue; + } + switch(i) { + case '\0': ret += "\\0"; break; + case '\a': ret += "\\a"; break; + case '\b': ret += "\\b"; break; + case '\t': ret += "\\t"; break; + case '\n': ret += "\\n"; break; + case '\v': ret += "\\v"; break; + case '\f': ret += "\\f"; break; + case '\r': ret += "\\r"; break; + case '\033':ret += "\\e"; break; + case '\"': ret += "\\\""; break; + case '\'': ret += "\\\'"; break; + case '\\': ret += "\\\\"; break; + default: ret += i; break; + } + } + if (maxlen && ret.length()>maxlen) { + ret = ret.substr(0, maxlen)+"..."; + } + return ret; +} + +f64 hex_to_f64(const char* str) { + f64 ret = 0; + for(; *str; ++str) { + if ('0'<=*str && *str<='9') { + ret = ret*16+(*str-'0'); + } else if ('a'<=*str && *str<='f') { + ret = ret*16+(*str-'a'+10); + } else if ('A'<=*str && *str<='F') { + ret = ret*16+(*str-'A'+10); + } else { + return nan(""); + } + } + return ret; +} + +f64 oct_to_f64(const char* str) { + f64 ret = 0; + while('0'<=*str && *str<'8') { + ret = ret*8+(*str++-'0'); + } + if (*str) { + return nan(""); + } + return ret; +} + +// we have the same reason not using atof here +// just as andy's interpreter does. +// it is not platform independent, and may have strange output. +// so we write a new function here to convert str to number manually. +// but this also makes 0.1+0.2==0.3, +// not another result that you may get in other languages. +f64 dec_to_f64(const char* str) { + f64 ret = 0, num_pow = 0; + bool negative = false; + while('0'<=*str && *str<='9') { + ret = ret*10+(*str++-'0'); + } + if (!*str) { + return ret; + } + if (*str=='.') { + if (!*++str) { + return nan(""); + } + num_pow = 0.1; + while('0'<=*str && *str<='9') { + ret += num_pow*(*str++-'0'); + num_pow *= 0.1; + } + if (!*str) { + return ret; + } + } + if (*str!='e' && *str!='E') { + return nan(""); + } + if (!*++str) { + return nan(""); + } + if (*str=='-' || *str=='+') { + negative = (*str++=='-'); + } + if (!*str) { + return nan(""); + } + num_pow = 0; + while('0'<=*str && *str<='9') { + num_pow = num_pow*10+(*str++-'0'); + } + if (*str) { + return nan(""); + } + return negative? + ret*std::pow(10, 1-num_pow)*0.1: + ret*std::pow(10, num_pow-1)*10; +} + +f64 str_to_num(const char* str) { + bool negative = false; + f64 res = 0; + if (*str=='-' || *str=='+') { + negative = (*str++=='-'); + } + if (!*str) { + return nan(""); + } + if (str[0]=='0' && str[1]=='x') { + res = hex_to_f64(str+2); + } else if (str[0]=='0' && str[1]=='o') { + res = oct_to_f64(str+2); + } else { + res = dec_to_f64(str); + } + return negative? -res:res; +} + } \ No newline at end of file diff --git a/src/util/util.h b/src/util/util.h index 551b264..4ab673e 100644 --- a/src/util/util.h +++ b/src/util/util.h @@ -1,4 +1,9 @@ -#pragma +#pragma once + +#include "nasal.h" + +#include +#include namespace nasal::util { @@ -16,4 +21,21 @@ bool is_superh(); const char* get_platform(); const char* get_arch(); +u32 utf8_hdchk(const char); + +std::string char_to_hex(const char); +std::string rawstr(const std::string&, const usize maxlen = 0); + +f64 hex_to_f64(const char*); +f64 oct_to_f64(const char*); +// we have the same reason not using atof here +// just as andy's interpreter does. +// it is not platform independent, and may have strange output. +// so we write a new function here to convert str to number manually. +// but this also makes 0.1+0.2==0.3, +// not another result that you may get in other languages. +f64 dec_to_f64(const char*); + +f64 str_to_num(const char*); + } \ No newline at end of file