From d42e4a58970b627f2bf65759d993c1003e98c641 Mon Sep 17 00:00:00 2001 From: ValKmjolnir Date: Sat, 2 Dec 2023 19:42:21 +0800 Subject: [PATCH] :memo: change CRLF to LF --- src/io_lib.cpp | 496 +++++++++++++------------- src/io_lib.h | 76 ++-- src/nasal_import.cpp | 802 ++++++++++++++++++++++--------------------- src/nasal_import.h | 114 +++--- src/nasal_lexer.cpp | 789 +++++++++++++++++++++--------------------- src/nasal_lexer.h | 4 - 6 files changed, 1140 insertions(+), 1141 deletions(-) diff --git a/src/io_lib.cpp b/src/io_lib.cpp index fdda983..bbe57d8 100644 --- a/src/io_lib.cpp +++ b/src/io_lib.cpp @@ -1,245 +1,251 @@ -#include "io_lib.h" - -namespace nasal { - -const auto file_type_name = "file"; - -void filehandle_destructor(void* ptr) { - fclose(static_cast(ptr)); -} - -var builtin_readfile(context* ctx, gc* ngc) { - auto filename = ctx->localr[1]; - if (!filename.is_str()) { - return nas_err("io::readfile", "\"filename\" must be string"); - } - std::ifstream in(filename.str(), std::ios::binary); - std::stringstream rd; - if (!in.fail()) { - rd << in.rdbuf(); - } - return ngc->newstr(rd.str()); -} - -var builtin_fout(context* ctx, gc* ngc) { - auto local = ctx->localr; - auto filename = local[1]; - auto source = local[2]; - if (!filename.is_str()) { - return nas_err("io::fout", "\"filename\" must be string"); - } - std::ofstream out(filename.str()); - if (out.fail()) { - return nas_err("io::fout", "cannot open <" + filename.str() + ">"); - } - out << source; - return nil; -} - -var builtin_exists(context* ctx, gc* ngc) { - auto filename = ctx->localr[1]; - if (!filename.is_str()) { - return zero; - } - return access(filename.str().c_str(), F_OK)!=-1? one:zero; -} - -var builtin_open(context* ctx, gc* ngc) { - auto local = ctx->localr; - auto name = local[1]; - auto mode = local[2]; - if (!name.is_str()) { - return nas_err("io::open", "\"filename\" must be string"); - } - if (!mode.is_str()) { - return nas_err("io::open", "\"mode\" must be string"); - } - auto file_descriptor = fopen(name.str().c_str(), mode.str().c_str()); - if (!file_descriptor) { - return nas_err("io::open", "failed to open file <" + name.str() + ">"); - } - var return_object = ngc->alloc(vm_type::vm_ghost); - return_object.ghost().set( - file_type_name, filehandle_destructor, nullptr, file_descriptor - ); - return return_object; -} - -var builtin_close(context* ctx, gc* ngc) { - var file_descriptor = ctx->localr[1]; - if (!file_descriptor.object_check(file_type_name)) { - return nas_err("io::close", "not a valid filehandle"); - } - file_descriptor.ghost().clear(); - return nil; -} - -var builtin_read(context* ctx, gc* ngc) { - auto local = ctx->localr; - auto file_descriptor = local[1]; - auto buffer = local[2]; - auto length = local[3]; - if (!file_descriptor.object_check(file_type_name)) { - return nas_err("io::read", "not a valid filehandle"); - } - if (!buffer.is_str() || buffer.val.gcobj->unmutable) { - return nas_err("io::read", "\"buf\" must be mutable string"); - } - if (!length.is_num()) { - return nas_err("io::read", "\"len\" must be number"); - } - if (length.num()<=0 || length.num()>=(1<<30)) { - return nas_err("io::read", "\"len\" less than 1 or too large"); - } - auto temp_buffer = new char[static_cast(length.num())+1]; - if (!temp_buffer) { - return nas_err("io::read", "malloc failed"); - } - auto read_size = fread( - temp_buffer, 1, length.num(), - static_cast(file_descriptor.ghost().pointer) - ); - buffer.str() = temp_buffer; - buffer.val.gcobj->unmutable = true; - delete []temp_buffer; - return var::num(read_size); -} - -var builtin_write(context* ctx, gc* ngc) { - auto local = ctx->localr; - auto file_descriptor = local[1]; - auto source = local[2]; - if (!file_descriptor.object_check(file_type_name)) { - return nas_err("io::write", "not a valid filehandle"); - } - if (!source.is_str()) { - return nas_err("io::write", "\"str\" must be string"); - } - return var::num(static_cast(fwrite( - source.str().c_str(), 1, source.str().length(), - static_cast(file_descriptor.ghost().pointer) - ))); -} - -var builtin_seek(context* ctx, gc* ngc) { - auto local = ctx->localr; - auto file_descriptor = local[1]; - auto position = local[2]; - auto whence = local[3]; - if (!file_descriptor.object_check(file_type_name)) { - return nas_err("io::seek", "not a valid filehandle"); - } - return var::num(static_cast(fseek( - static_cast(file_descriptor.ghost().pointer), - position.num(), - whence.num() - ))); -} - -var builtin_tell(context* ctx, gc* ngc) { - auto file_descriptor = ctx->localr[1]; - if (!file_descriptor.object_check(file_type_name)) { - return nas_err("io::tell", "not a valid filehandle"); - } - return var::num(static_cast( - ftell(static_cast(file_descriptor.ghost().pointer)) - )); -} - -var builtin_readln(context* ctx, gc* ngc) { - auto file_descriptor = ctx->localr[1]; - if (!file_descriptor.object_check(file_type_name)) { - return nas_err("io::readln", "not a valid filehandle"); - } - auto result = ngc->alloc(vm_type::vm_str); - char c; - while((c = fgetc(static_cast(file_descriptor.ghost().pointer)))!=EOF) { - if (c=='\r') { - continue; - } - if (c=='\n') { - return result; - } - result.str().push_back(c); - } - if (result.str().length()) { - return result; - } - return nil; -} - -var builtin_stat(context* ctx, gc* ngc) { - auto name = ctx->localr[1]; - if (!name.is_str()) { - return nas_err("io::stat", "\"filename\" must be string"); - } - struct stat buffer; - if (stat(name.str().c_str(), &buffer)<0) { - return nas_err("io::stat", "failed to open file <" + name.str() + ">"); - } - auto result = ngc->alloc(vm_type::vm_vec); - result.vec().elems = { - var::num(static_cast(buffer.st_dev)), - var::num(static_cast(buffer.st_ino)), - var::num(static_cast(buffer.st_mode)), - var::num(static_cast(buffer.st_nlink)), - var::num(static_cast(buffer.st_uid)), - var::num(static_cast(buffer.st_gid)), - var::num(static_cast(buffer.st_rdev)), - var::num(static_cast(buffer.st_size)), - var::num(static_cast(buffer.st_atime)), - var::num(static_cast(buffer.st_mtime)), - var::num(static_cast(buffer.st_ctime)) - }; - return result; -} - -var builtin_eof(context* ctx, gc* ngc) { - auto file_descriptor = ctx->localr[1]; - if (!file_descriptor.object_check(file_type_name)) { - return nas_err("io::readln", "not a valid filehandle"); - } - return var::num(static_cast( - feof(static_cast(file_descriptor.ghost().pointer)) - )); -} - -var builtin_stdin(context* ctx, gc* ngc) { - auto file_descriptor = ngc->alloc(vm_type::vm_ghost); - file_descriptor.ghost().set(file_type_name, nullptr, nullptr, stdin); - return file_descriptor; -} - -var builtin_stdout(context* ctx, gc* ngc) { - auto file_descriptor = ngc->alloc(vm_type::vm_ghost); - file_descriptor.ghost().set(file_type_name, nullptr, nullptr, stdout); - return file_descriptor; -} - -var builtin_stderr(context* ctx, gc* ngc) { - auto file_descriptor = ngc->alloc(vm_type::vm_ghost); - file_descriptor.ghost().set(file_type_name, nullptr, nullptr, stderr); - return file_descriptor; -} - - -nasal_builtin_table io_lib_native[] = { - {"__readfile", builtin_readfile}, - {"__fout", builtin_fout}, - {"__exists", builtin_exists}, - {"__open", builtin_open}, - {"__close", builtin_close}, - {"__read", builtin_read}, - {"__write", builtin_write}, - {"__seek", builtin_seek}, - {"__tell", builtin_tell}, - {"__readln", builtin_readln}, - {"__stat", builtin_stat}, - {"__eof", builtin_eof}, - {"__stdin", builtin_stdin}, - {"__stdout", builtin_stdout}, - {"__stderr", builtin_stderr}, - {nullptr, nullptr} -}; - -} +#include "io_lib.h" + +#ifdef _MSC_VER +#define F_OK 0 // fuck msc +#endif + +#include + +namespace nasal { + +const auto file_type_name = "file"; + +void filehandle_destructor(void* ptr) { + fclose(static_cast(ptr)); +} + +var builtin_readfile(context* ctx, gc* ngc) { + auto filename = ctx->localr[1]; + if (!filename.is_str()) { + return nas_err("io::readfile", "\"filename\" must be string"); + } + std::ifstream in(filename.str(), std::ios::binary); + std::stringstream rd; + if (!in.fail()) { + rd << in.rdbuf(); + } + return ngc->newstr(rd.str()); +} + +var builtin_fout(context* ctx, gc* ngc) { + auto local = ctx->localr; + auto filename = local[1]; + auto source = local[2]; + if (!filename.is_str()) { + return nas_err("io::fout", "\"filename\" must be string"); + } + std::ofstream out(filename.str()); + if (out.fail()) { + return nas_err("io::fout", "cannot open <" + filename.str() + ">"); + } + out << source; + return nil; +} + +var builtin_exists(context* ctx, gc* ngc) { + auto filename = ctx->localr[1]; + if (!filename.is_str()) { + return zero; + } + return access(filename.str().c_str(), F_OK)!=-1? one:zero; +} + +var builtin_open(context* ctx, gc* ngc) { + auto local = ctx->localr; + auto name = local[1]; + auto mode = local[2]; + if (!name.is_str()) { + return nas_err("io::open", "\"filename\" must be string"); + } + if (!mode.is_str()) { + return nas_err("io::open", "\"mode\" must be string"); + } + auto file_descriptor = fopen(name.str().c_str(), mode.str().c_str()); + if (!file_descriptor) { + return nas_err("io::open", "failed to open file <" + name.str() + ">"); + } + var return_object = ngc->alloc(vm_type::vm_ghost); + return_object.ghost().set( + file_type_name, filehandle_destructor, nullptr, file_descriptor + ); + return return_object; +} + +var builtin_close(context* ctx, gc* ngc) { + var file_descriptor = ctx->localr[1]; + if (!file_descriptor.object_check(file_type_name)) { + return nas_err("io::close", "not a valid filehandle"); + } + file_descriptor.ghost().clear(); + return nil; +} + +var builtin_read(context* ctx, gc* ngc) { + auto local = ctx->localr; + auto file_descriptor = local[1]; + auto buffer = local[2]; + auto length = local[3]; + if (!file_descriptor.object_check(file_type_name)) { + return nas_err("io::read", "not a valid filehandle"); + } + if (!buffer.is_str() || buffer.val.gcobj->unmutable) { + return nas_err("io::read", "\"buf\" must be mutable string"); + } + if (!length.is_num()) { + return nas_err("io::read", "\"len\" must be number"); + } + if (length.num()<=0 || length.num()>=(1<<30)) { + return nas_err("io::read", "\"len\" less than 1 or too large"); + } + auto temp_buffer = new char[static_cast(length.num())+1]; + if (!temp_buffer) { + return nas_err("io::read", "malloc failed"); + } + auto read_size = fread( + temp_buffer, 1, length.num(), + static_cast(file_descriptor.ghost().pointer) + ); + buffer.str() = temp_buffer; + buffer.val.gcobj->unmutable = true; + delete []temp_buffer; + return var::num(read_size); +} + +var builtin_write(context* ctx, gc* ngc) { + auto local = ctx->localr; + auto file_descriptor = local[1]; + auto source = local[2]; + if (!file_descriptor.object_check(file_type_name)) { + return nas_err("io::write", "not a valid filehandle"); + } + if (!source.is_str()) { + return nas_err("io::write", "\"str\" must be string"); + } + return var::num(static_cast(fwrite( + source.str().c_str(), 1, source.str().length(), + static_cast(file_descriptor.ghost().pointer) + ))); +} + +var builtin_seek(context* ctx, gc* ngc) { + auto local = ctx->localr; + auto file_descriptor = local[1]; + auto position = local[2]; + auto whence = local[3]; + if (!file_descriptor.object_check(file_type_name)) { + return nas_err("io::seek", "not a valid filehandle"); + } + return var::num(static_cast(fseek( + static_cast(file_descriptor.ghost().pointer), + position.num(), + whence.num() + ))); +} + +var builtin_tell(context* ctx, gc* ngc) { + auto file_descriptor = ctx->localr[1]; + if (!file_descriptor.object_check(file_type_name)) { + return nas_err("io::tell", "not a valid filehandle"); + } + return var::num(static_cast( + ftell(static_cast(file_descriptor.ghost().pointer)) + )); +} + +var builtin_readln(context* ctx, gc* ngc) { + auto file_descriptor = ctx->localr[1]; + if (!file_descriptor.object_check(file_type_name)) { + return nas_err("io::readln", "not a valid filehandle"); + } + auto result = ngc->alloc(vm_type::vm_str); + char c; + while((c = fgetc(static_cast(file_descriptor.ghost().pointer)))!=EOF) { + if (c=='\r') { + continue; + } + if (c=='\n') { + return result; + } + result.str().push_back(c); + } + if (result.str().length()) { + return result; + } + return nil; +} + +var builtin_stat(context* ctx, gc* ngc) { + auto name = ctx->localr[1]; + if (!name.is_str()) { + return nas_err("io::stat", "\"filename\" must be string"); + } + struct stat buffer; + if (stat(name.str().c_str(), &buffer)<0) { + return nas_err("io::stat", "failed to open file <" + name.str() + ">"); + } + auto result = ngc->alloc(vm_type::vm_vec); + result.vec().elems = { + var::num(static_cast(buffer.st_dev)), + var::num(static_cast(buffer.st_ino)), + var::num(static_cast(buffer.st_mode)), + var::num(static_cast(buffer.st_nlink)), + var::num(static_cast(buffer.st_uid)), + var::num(static_cast(buffer.st_gid)), + var::num(static_cast(buffer.st_rdev)), + var::num(static_cast(buffer.st_size)), + var::num(static_cast(buffer.st_atime)), + var::num(static_cast(buffer.st_mtime)), + var::num(static_cast(buffer.st_ctime)) + }; + return result; +} + +var builtin_eof(context* ctx, gc* ngc) { + auto file_descriptor = ctx->localr[1]; + if (!file_descriptor.object_check(file_type_name)) { + return nas_err("io::readln", "not a valid filehandle"); + } + return var::num(static_cast( + feof(static_cast(file_descriptor.ghost().pointer)) + )); +} + +var builtin_stdin(context* ctx, gc* ngc) { + auto file_descriptor = ngc->alloc(vm_type::vm_ghost); + file_descriptor.ghost().set(file_type_name, nullptr, nullptr, stdin); + return file_descriptor; +} + +var builtin_stdout(context* ctx, gc* ngc) { + auto file_descriptor = ngc->alloc(vm_type::vm_ghost); + file_descriptor.ghost().set(file_type_name, nullptr, nullptr, stdout); + return file_descriptor; +} + +var builtin_stderr(context* ctx, gc* ngc) { + auto file_descriptor = ngc->alloc(vm_type::vm_ghost); + file_descriptor.ghost().set(file_type_name, nullptr, nullptr, stderr); + return file_descriptor; +} + + +nasal_builtin_table io_lib_native[] = { + {"__readfile", builtin_readfile}, + {"__fout", builtin_fout}, + {"__exists", builtin_exists}, + {"__open", builtin_open}, + {"__close", builtin_close}, + {"__read", builtin_read}, + {"__write", builtin_write}, + {"__seek", builtin_seek}, + {"__tell", builtin_tell}, + {"__readln", builtin_readln}, + {"__stat", builtin_stat}, + {"__eof", builtin_eof}, + {"__stdin", builtin_stdin}, + {"__stdout", builtin_stdout}, + {"__stderr", builtin_stderr}, + {nullptr, nullptr} +}; + +} diff --git a/src/io_lib.h b/src/io_lib.h index e1f4e97..0940867 100644 --- a/src/io_lib.h +++ b/src/io_lib.h @@ -1,41 +1,35 @@ -#pragma once - -#include "nasal.h" -#include "nasal_gc.h" -#include "nasal_builtin.h" - -#include - -#ifndef _MSC_VER -#include -#else -#include -#endif - -#ifdef _MSC_VER -#define F_OK 0 // fuck msc -#endif - -namespace nasal { - -void filehandle_destructor(void*); - -var builtin_readfile(context*, gc*); -var builtin_fout(context*, gc*); -var builtin_exists(context*, gc*); -var builtin_open(context*, gc*); -var builtin_close(context*, gc*); -var builtin_read(context*, gc*); -var builtin_write(context*, gc*); -var builtin_seek(context*, gc*); -var builtin_tell(context*, gc*); -var builtin_readln(context*, gc*); -var builtin_stat(context*, gc*); -var builtin_eof(context*, gc*); -var builtin_stdin(context*, gc*); -var builtin_stdout(context*, gc*); -var builtin_stderr(context*, gc*); - -extern nasal_builtin_table io_lib_native[]; - -} +#pragma once + +#include "nasal.h" +#include "nasal_gc.h" +#include "nasal_builtin.h" + +#ifndef _MSC_VER +#include +#else +#include +#endif + +namespace nasal { + +void filehandle_destructor(void*); + +var builtin_readfile(context*, gc*); +var builtin_fout(context*, gc*); +var builtin_exists(context*, gc*); +var builtin_open(context*, gc*); +var builtin_close(context*, gc*); +var builtin_read(context*, gc*); +var builtin_write(context*, gc*); +var builtin_seek(context*, gc*); +var builtin_tell(context*, gc*); +var builtin_readln(context*, gc*); +var builtin_stat(context*, gc*); +var builtin_eof(context*, gc*); +var builtin_stdin(context*, gc*); +var builtin_stdout(context*, gc*); +var builtin_stderr(context*, gc*); + +extern nasal_builtin_table io_lib_native[]; + +} diff --git a/src/nasal_import.cpp b/src/nasal_import.cpp index 2904b82..b0fe98f 100644 --- a/src/nasal_import.cpp +++ b/src/nasal_import.cpp @@ -1,399 +1,403 @@ -#include "nasal_import.h" -#include "symbol_finder.h" - -#include -#include - -namespace nasal { - -linker::linker(): show_path_flag(false), library_loaded(false), this_file("") { - const auto seperator= is_windows()? ';':':'; - const auto PATH = std::string(getenv("PATH")); - usize last = 0, position = PATH.find(seperator, 0); - while(position!=std::string::npos) { - std::string dirpath = PATH.substr(last, position-last); - if (dirpath.length()) { - envpath.push_back(dirpath); - } - last = position+1; - position = PATH.find(seperator, last); - } - if (last!=PATH.length()) { - envpath.push_back(PATH.substr(last)); - } -} - -std::string linker::get_path(expr* node) { - if (node->get_type()==expr_type::ast_use) { - auto file_relative_path = std::string(""); - const auto& path = reinterpret_cast(node)->get_path(); - for(auto i : path) { - file_relative_path += i->get_name(); - if (i!=path.back()) { - file_relative_path += (is_windows()? "\\":"/"); - } - } - return file_relative_path + ".nas"; - } - auto call_node = reinterpret_cast(node); - auto arguments = reinterpret_cast(call_node->get_calls()[0]); - auto content = reinterpret_cast(arguments->get_argument()[0]); - return content->get_content(); -} - -std::string linker::find_real_file_path( - const std::string& filename, const span& location) { - // first add file name itself into the file path - std::vector path_list = {filename}; - - // generate search path from environ path - for(const auto& p : envpath) { - path_list.push_back(p + (is_windows()? "\\":"/") + filename); - } - - // search file - for(const auto& path : path_list) { - if (access(path.c_str(), F_OK)!=-1) { - return path; - } - } - - // we will find lib.nas in nasal std directory - if (filename=="lib.nas") { - return is_windows()? - find_real_file_path("std\\lib.nas", location): - find_real_file_path("std/lib.nas", location); - } - if (!show_path_flag) { - err.err("link", - "in <" + location.file + ">: " + - "cannot find file <" + filename + ">, " + - "use <-d> to get detail search path" - ); - return ""; - } - auto path_list_info = std::string(""); - for(const auto& path : path_list) { - path_list_info += " -> " + path + "\n"; - } - err.err("link", - "in <" + location.file + ">: " + - "cannot find file <" + filename + - "> in these paths:\n" + path_list_info - ); - return ""; -} - -bool linker::import_check(expr* node) { - if (node->get_type()==expr_type::ast_use) { - return true; - } -/* - call - |_id:import - |_call_func - |_string:'filename' -*/ - if (node->get_type()!=expr_type::ast_call) { - return false; - } - auto call_node = reinterpret_cast(node); - auto first_expr = call_node->get_first(); - if (first_expr->get_type()!=expr_type::ast_id) { - return false; - } - if (reinterpret_cast(first_expr)->get_name()!="import") { - return false; - } - if (!call_node->get_calls().size()) { - return false; - } - - // import("xxx"); - if (call_node->get_calls().size()!=1) { - return false; - } - auto maybe_func_call = call_node->get_calls()[0]; - if (maybe_func_call->get_type()!=expr_type::ast_callf) { - return false; - } - auto func_call = reinterpret_cast(maybe_func_call); - if (func_call->get_argument().size()!=1) { - return false; - } - if (func_call->get_argument()[0]->get_type()!=expr_type::ast_str) { - return false; - } - return true; -} - -bool linker::check_exist_or_record_file(const std::string& file) { - // avoid importing the same file - for(const auto& name : imported_files) { - if (file==name) { - return true; - } - } - imported_files.push_back(file); - return false; -} - -bool linker::check_self_import(const std::string& file) { - for(const auto& name : module_load_stack) { - if (file==name) { - return true; - } - } - return false; -} - -std::string linker::generate_self_import_path(const std::string& filename) { - std::string res = ""; - for(const auto& i : module_load_stack) { - res += "[" + i + "] -> "; - } - return res + "[" + filename + "]"; -} - -void linker::link(code_block* new_tree_root, code_block* old_tree_root) { - // add children of add_root to the back of root - for(auto& i : old_tree_root->get_expressions()) { - new_tree_root->add_expression(i); - } - // clean old root - old_tree_root->get_expressions().clear(); -} - -code_block* linker::import_regular_file( - expr* node, std::unordered_set& used_modules) { - // get filename - auto filename = get_path(node); - - // avoid infinite loading loop - filename = find_real_file_path(filename, node->get_location()); - // if get empty string(error) or this file is used before, do not parse - if (!filename.length() || used_modules.count(filename)) { - return new code_block({0, 0, 0, 0, filename}); - } - - // check self import, avoid infinite loading loop - if (check_self_import(filename)) { - err.err("link", - "self-referenced module <" + filename + ">:\n" + - " reference path: " + generate_self_import_path(filename) - ); - return new code_block({0, 0, 0, 0, filename}); - } - check_exist_or_record_file(filename); - - module_load_stack.push_back(filename); - // start importing... - lexer nasal_lexer; - parse nasal_parser; - if (nasal_lexer.scan(filename).geterr()) { - err.err("link", "error occurred when analysing <" + filename + ">"); - return new code_block({0, 0, 0, 0, filename}); - } - if (nasal_parser.compile(nasal_lexer).geterr()) { - err.err("link", "error occurred when analysing <" + filename + ">"); - return new code_block({0, 0, 0, 0, filename}); - } - // swap result out - auto parse_result = nasal_parser.swap(nullptr); - - // check if parse result has 'import' - auto result = load(parse_result, filename); - module_load_stack.pop_back(); - return result; -} - -code_block* linker::import_nasal_lib() { - auto path = find_real_file_path( - "lib.nas", {0, 0, 0, 0, this_file} - ); - if (!path.length()) { - return new code_block({0, 0, 0, 0, path}); - } - - // avoid infinite loading library - if (check_exist_or_record_file(path)) { - return new code_block({0, 0, 0, 0, path}); - } - - // start importing... - lexer nasal_lexer; - parse nasal_parser; - if (nasal_lexer.scan(path).geterr()) { - err.err("link", - "error occurred when analysing library <" + path + ">" - ); - return new code_block({0, 0, 0, 0, path}); - } - if (nasal_parser.compile(nasal_lexer).geterr()) { - err.err("link", - "error occurred when analysing library <" + path + ">" - ); - return new code_block({0, 0, 0, 0, path}); - } - // swap result out - auto parse_result = nasal_parser.swap(nullptr); - // check if library has 'import' (in fact it should not) - return load(parse_result, path); -} - -std::string linker::generate_module_name(const std::string& file_path) { - auto error_name = "module@[" + file_path + "]"; - if (!file_path.length()) { - return error_name; - } - - // check file suffix and get file suffix position - auto suffix_position = file_path.find(".nas"); - if (suffix_position==std::string::npos) { - err.warn("link", - "get invalid module name from <" + file_path + ">, " + - "will not be easily accessed. " + - "\".nas\" suffix is required." - ); - return error_name; - } - if (suffix_position+4!=file_path.length()) { - err.warn("link", - "get invalid module name from <" + file_path + ">, " + - "will not be easily accessed. " + - "only one \".nas\" suffix is required in the path." - ); - return error_name; - } - - // only get the file name as module name, directory path is not included - auto split_position = file_path.find_last_of("/"); - // find "\\" in windows platform - if (split_position==std::string::npos) { - split_position = file_path.find_last_of("\\"); - } - - // split file path to get module name - auto module_name = split_position==std::string::npos? - file_path.substr(0, suffix_position): - file_path.substr(split_position+1, suffix_position-split_position-1); - - // check validation of module name - if (!module_name.length()) { - err.warn("link", - "get empty module name from <" + file_path + ">, " + - "will not be easily accessed." - ); - return module_name; - } - if (std::isdigit(module_name[0]) || - module_name.find(".")!=std::string::npos || - module_name.find("-")!=std::string::npos) { - err.warn("link", - "get module <" + module_name + "> from <" + file_path + ">, " + - "will not be easily accessed." - ); - } - return module_name; -} - -return_expr* linker::generate_module_return(code_block* block) { - auto finder = std::unique_ptr(new symbol_finder); - auto result = new return_expr(block->get_location()); - auto value = new hash_expr(block->get_location()); - result->set_value(value); - for(const auto& i : finder->do_find(block)) { - auto pair = new hash_pair(block->get_location()); - // do not export symbol begins with '_' - if (i.name.length() && i.name[0]=='_') { - continue; - } - pair->set_name(i.name); - pair->set_value(new identifier(block->get_location(), i.name)); - value->add_member(pair); - } - return result; -} - -definition_expr* linker::generate_module_definition(code_block* block) { - auto def = new definition_expr(block->get_location()); - def->set_identifier(new identifier( - block->get_location(), - generate_module_name(block->get_location().file) - )); - - auto call = new call_expr(block->get_location()); - auto func = new function(block->get_location()); - func->set_code_block(block); - func->get_code_block()->add_expression(generate_module_return(block)); - call->set_first(func); - call->add_call(new call_function(block->get_location())); - - def->set_value(call); - return def; -} - -code_block* linker::load(code_block* program_root, const std::string& filename) { - auto tree = new code_block({0, 0, 0, 0, filename}); - // load library, this ast will be linked with root directly - // so no extra namespace is generated - if (!library_loaded) { - auto nasal_lib_code_block = import_nasal_lib(); - // insert nasal lib code to the back of tree - link(tree, nasal_lib_code_block); - delete nasal_lib_code_block; - library_loaded = true; - } - - // load imported modules - std::unordered_set used_modules = {}; - for(auto& import_node : program_root->get_expressions()) { - if (!import_check(import_node)) { - break; - } - // parse file and get ast - auto module_code_block = import_regular_file(import_node, used_modules); - auto replace_node = new null_expr(import_node->get_location()); - // after importing the regular file as module, delete this node - delete import_node; - // and replace the node with null_expr node - import_node = replace_node; - - // avoid repeatedly importing the same module - const auto& module_path = module_code_block->get_location().file; - if (used_modules.count(module_path)) { - delete module_code_block; - continue; - } - - // then we generate a function warping the code block, - // and export the necessary global symbols in this code block - // by generate a return statement, with a hashmap return value - used_modules.insert(module_path); - tree->add_expression(generate_module_definition(module_code_block)); - } - - // insert program root to the back of tree - link(tree, program_root); - return tree; -} - -const error& linker::link( - parse& parse, const std::string& self, bool spath = false) { - // switch for showing path when errors occur - show_path_flag = spath; - - // initializing file map - this_file = self; - imported_files = {self}; - module_load_stack = {self}; - - // scan root and import files - // then generate a new ast and return to import_ast - auto new_tree_root = load(parse.tree(), self); - auto old_tree_root = parse.swap(new_tree_root); - delete old_tree_root; - return err; -} - -} +#include "nasal_import.h" +#include "symbol_finder.h" + +#include +#include + +#ifdef _MSC_VER +#define F_OK 0 // fuck msc +#endif + +namespace nasal { + +linker::linker(): show_path_flag(false), library_loaded(false), this_file("") { + const auto seperator= is_windows()? ';':':'; + const auto PATH = std::string(getenv("PATH")); + usize last = 0, position = PATH.find(seperator, 0); + while(position!=std::string::npos) { + std::string dirpath = PATH.substr(last, position-last); + if (dirpath.length()) { + envpath.push_back(dirpath); + } + last = position+1; + position = PATH.find(seperator, last); + } + if (last!=PATH.length()) { + envpath.push_back(PATH.substr(last)); + } +} + +std::string linker::get_path(expr* node) { + if (node->get_type()==expr_type::ast_use) { + auto file_relative_path = std::string(""); + const auto& path = reinterpret_cast(node)->get_path(); + for(auto i : path) { + file_relative_path += i->get_name(); + if (i!=path.back()) { + file_relative_path += (is_windows()? "\\":"/"); + } + } + return file_relative_path + ".nas"; + } + auto call_node = reinterpret_cast(node); + auto arguments = reinterpret_cast(call_node->get_calls()[0]); + auto content = reinterpret_cast(arguments->get_argument()[0]); + return content->get_content(); +} + +std::string linker::find_real_file_path( + const std::string& filename, const span& location) { + // first add file name itself into the file path + std::vector path_list = {filename}; + + // generate search path from environ path + for(const auto& p : envpath) { + path_list.push_back(p + (is_windows()? "\\":"/") + filename); + } + + // search file + for(const auto& path : path_list) { + if (access(path.c_str(), F_OK)!=-1) { + return path; + } + } + + // we will find lib.nas in nasal std directory + if (filename=="lib.nas") { + return is_windows()? + find_real_file_path("std\\lib.nas", location): + find_real_file_path("std/lib.nas", location); + } + if (!show_path_flag) { + err.err("link", + "in <" + location.file + ">: " + + "cannot find file <" + filename + ">, " + + "use <-d> to get detail search path" + ); + return ""; + } + auto path_list_info = std::string(""); + for(const auto& path : path_list) { + path_list_info += " -> " + path + "\n"; + } + err.err("link", + "in <" + location.file + ">: " + + "cannot find file <" + filename + + "> in these paths:\n" + path_list_info + ); + return ""; +} + +bool linker::import_check(expr* node) { + if (node->get_type()==expr_type::ast_use) { + return true; + } +/* + call + |_id:import + |_call_func + |_string:'filename' +*/ + if (node->get_type()!=expr_type::ast_call) { + return false; + } + auto call_node = reinterpret_cast(node); + auto first_expr = call_node->get_first(); + if (first_expr->get_type()!=expr_type::ast_id) { + return false; + } + if (reinterpret_cast(first_expr)->get_name()!="import") { + return false; + } + if (!call_node->get_calls().size()) { + return false; + } + + // import("xxx"); + if (call_node->get_calls().size()!=1) { + return false; + } + auto maybe_func_call = call_node->get_calls()[0]; + if (maybe_func_call->get_type()!=expr_type::ast_callf) { + return false; + } + auto func_call = reinterpret_cast(maybe_func_call); + if (func_call->get_argument().size()!=1) { + return false; + } + if (func_call->get_argument()[0]->get_type()!=expr_type::ast_str) { + return false; + } + return true; +} + +bool linker::check_exist_or_record_file(const std::string& file) { + // avoid importing the same file + for(const auto& name : imported_files) { + if (file==name) { + return true; + } + } + imported_files.push_back(file); + return false; +} + +bool linker::check_self_import(const std::string& file) { + for(const auto& name : module_load_stack) { + if (file==name) { + return true; + } + } + return false; +} + +std::string linker::generate_self_import_path(const std::string& filename) { + std::string res = ""; + for(const auto& i : module_load_stack) { + res += "[" + i + "] -> "; + } + return res + "[" + filename + "]"; +} + +void linker::link(code_block* new_tree_root, code_block* old_tree_root) { + // add children of add_root to the back of root + for(auto& i : old_tree_root->get_expressions()) { + new_tree_root->add_expression(i); + } + // clean old root + old_tree_root->get_expressions().clear(); +} + +code_block* linker::import_regular_file( + expr* node, std::unordered_set& used_modules) { + // get filename + auto filename = get_path(node); + + // avoid infinite loading loop + filename = find_real_file_path(filename, node->get_location()); + // if get empty string(error) or this file is used before, do not parse + if (!filename.length() || used_modules.count(filename)) { + return new code_block({0, 0, 0, 0, filename}); + } + + // check self import, avoid infinite loading loop + if (check_self_import(filename)) { + err.err("link", + "self-referenced module <" + filename + ">:\n" + + " reference path: " + generate_self_import_path(filename) + ); + return new code_block({0, 0, 0, 0, filename}); + } + check_exist_or_record_file(filename); + + module_load_stack.push_back(filename); + // start importing... + lexer nasal_lexer; + parse nasal_parser; + if (nasal_lexer.scan(filename).geterr()) { + err.err("link", "error occurred when analysing <" + filename + ">"); + return new code_block({0, 0, 0, 0, filename}); + } + if (nasal_parser.compile(nasal_lexer).geterr()) { + err.err("link", "error occurred when analysing <" + filename + ">"); + return new code_block({0, 0, 0, 0, filename}); + } + // swap result out + auto parse_result = nasal_parser.swap(nullptr); + + // check if parse result has 'import' + auto result = load(parse_result, filename); + module_load_stack.pop_back(); + return result; +} + +code_block* linker::import_nasal_lib() { + auto path = find_real_file_path( + "lib.nas", {0, 0, 0, 0, this_file} + ); + if (!path.length()) { + return new code_block({0, 0, 0, 0, path}); + } + + // avoid infinite loading library + if (check_exist_or_record_file(path)) { + return new code_block({0, 0, 0, 0, path}); + } + + // start importing... + lexer nasal_lexer; + parse nasal_parser; + if (nasal_lexer.scan(path).geterr()) { + err.err("link", + "error occurred when analysing library <" + path + ">" + ); + return new code_block({0, 0, 0, 0, path}); + } + if (nasal_parser.compile(nasal_lexer).geterr()) { + err.err("link", + "error occurred when analysing library <" + path + ">" + ); + return new code_block({0, 0, 0, 0, path}); + } + // swap result out + auto parse_result = nasal_parser.swap(nullptr); + // check if library has 'import' (in fact it should not) + return load(parse_result, path); +} + +std::string linker::generate_module_name(const std::string& file_path) { + auto error_name = "module@[" + file_path + "]"; + if (!file_path.length()) { + return error_name; + } + + // check file suffix and get file suffix position + auto suffix_position = file_path.find(".nas"); + if (suffix_position==std::string::npos) { + err.warn("link", + "get invalid module name from <" + file_path + ">, " + + "will not be easily accessed. " + + "\".nas\" suffix is required." + ); + return error_name; + } + if (suffix_position+4!=file_path.length()) { + err.warn("link", + "get invalid module name from <" + file_path + ">, " + + "will not be easily accessed. " + + "only one \".nas\" suffix is required in the path." + ); + return error_name; + } + + // only get the file name as module name, directory path is not included + auto split_position = file_path.find_last_of("/"); + // find "\\" in windows platform + if (split_position==std::string::npos) { + split_position = file_path.find_last_of("\\"); + } + + // split file path to get module name + auto module_name = split_position==std::string::npos? + file_path.substr(0, suffix_position): + file_path.substr(split_position+1, suffix_position-split_position-1); + + // check validation of module name + if (!module_name.length()) { + err.warn("link", + "get empty module name from <" + file_path + ">, " + + "will not be easily accessed." + ); + return module_name; + } + if (std::isdigit(module_name[0]) || + module_name.find(".")!=std::string::npos || + module_name.find("-")!=std::string::npos) { + err.warn("link", + "get module <" + module_name + "> from <" + file_path + ">, " + + "will not be easily accessed." + ); + } + return module_name; +} + +return_expr* linker::generate_module_return(code_block* block) { + auto finder = std::unique_ptr(new symbol_finder); + auto result = new return_expr(block->get_location()); + auto value = new hash_expr(block->get_location()); + result->set_value(value); + for(const auto& i : finder->do_find(block)) { + auto pair = new hash_pair(block->get_location()); + // do not export symbol begins with '_' + if (i.name.length() && i.name[0]=='_') { + continue; + } + pair->set_name(i.name); + pair->set_value(new identifier(block->get_location(), i.name)); + value->add_member(pair); + } + return result; +} + +definition_expr* linker::generate_module_definition(code_block* block) { + auto def = new definition_expr(block->get_location()); + def->set_identifier(new identifier( + block->get_location(), + generate_module_name(block->get_location().file) + )); + + auto call = new call_expr(block->get_location()); + auto func = new function(block->get_location()); + func->set_code_block(block); + func->get_code_block()->add_expression(generate_module_return(block)); + call->set_first(func); + call->add_call(new call_function(block->get_location())); + + def->set_value(call); + return def; +} + +code_block* linker::load(code_block* program_root, const std::string& filename) { + auto tree = new code_block({0, 0, 0, 0, filename}); + // load library, this ast will be linked with root directly + // so no extra namespace is generated + if (!library_loaded) { + auto nasal_lib_code_block = import_nasal_lib(); + // insert nasal lib code to the back of tree + link(tree, nasal_lib_code_block); + delete nasal_lib_code_block; + library_loaded = true; + } + + // load imported modules + std::unordered_set used_modules = {}; + for(auto& import_node : program_root->get_expressions()) { + if (!import_check(import_node)) { + break; + } + // parse file and get ast + auto module_code_block = import_regular_file(import_node, used_modules); + auto replace_node = new null_expr(import_node->get_location()); + // after importing the regular file as module, delete this node + delete import_node; + // and replace the node with null_expr node + import_node = replace_node; + + // avoid repeatedly importing the same module + const auto& module_path = module_code_block->get_location().file; + if (used_modules.count(module_path)) { + delete module_code_block; + continue; + } + + // then we generate a function warping the code block, + // and export the necessary global symbols in this code block + // by generate a return statement, with a hashmap return value + used_modules.insert(module_path); + tree->add_expression(generate_module_definition(module_code_block)); + } + + // insert program root to the back of tree + link(tree, program_root); + return tree; +} + +const error& linker::link( + parse& parse, const std::string& self, bool spath = false) { + // switch for showing path when errors occur + show_path_flag = spath; + + // initializing file map + this_file = self; + imported_files = {self}; + module_load_stack = {self}; + + // scan root and import files + // then generate a new ast and return to import_ast + auto new_tree_root = load(parse.tree(), self); + auto old_tree_root = parse.swap(new_tree_root); + delete old_tree_root; + return err; +} + +} diff --git a/src/nasal_import.h b/src/nasal_import.h index 7fa0df3..6dd1e2a 100644 --- a/src/nasal_import.h +++ b/src/nasal_import.h @@ -1,59 +1,55 @@ -#pragma once - -#ifndef _MSC_VER -#include -#else -#define _CRT_SECURE_NO_DEPRECATE 1 -#define _CRT_NONSTDC_NO_DEPRECATE 1 -#include -#endif - -#ifdef _MSC_VER -#define F_OK 0 -#endif - -#include "nasal.h" -#include "nasal_ast.h" -#include "nasal_lexer.h" -#include "nasal_parse.h" -#include "symbol_finder.h" - -#include -#include -#include -#include - -namespace nasal { - -class linker { -private: - bool show_path_flag; - bool library_loaded; - std::string this_file; - error err; - std::vector imported_files; - std::vector module_load_stack; - std::vector envpath; - -private: - bool import_check(expr*); - bool check_exist_or_record_file(const std::string&); - bool check_self_import(const std::string&); - std::string generate_self_import_path(const std::string&); - void link(code_block*, code_block*); - std::string get_path(expr*); - std::string find_real_file_path(const std::string&, const span&); - code_block* import_regular_file(expr*, std::unordered_set&); - code_block* import_nasal_lib(); - std::string generate_module_name(const std::string&); - return_expr* generate_module_return(code_block*); - definition_expr* generate_module_definition(code_block*); - code_block* load(code_block*, const std::string&); - -public: - linker(); - const error& link(parse&, const std::string&, bool); - const auto& get_file_list() const {return imported_files;} -}; - -} +#pragma once + +#ifndef _MSC_VER +#include +#else +#define _CRT_SECURE_NO_DEPRECATE 1 +#define _CRT_NONSTDC_NO_DEPRECATE 1 +#include +#endif + +#include "nasal.h" +#include "nasal_ast.h" +#include "nasal_lexer.h" +#include "nasal_parse.h" +#include "symbol_finder.h" + +#include +#include +#include +#include + +namespace nasal { + +class linker { +private: + bool show_path_flag; + bool library_loaded; + std::string this_file; + error err; + std::vector imported_files; + std::vector module_load_stack; + std::vector envpath; + +private: + bool import_check(expr*); + bool check_exist_or_record_file(const std::string&); + bool check_self_import(const std::string&); + std::string generate_self_import_path(const std::string&); + void link(code_block*, code_block*); + std::string get_path(expr*); + std::string find_real_file_path(const std::string&, const span&); + code_block* import_regular_file(expr*, std::unordered_set&); + code_block* import_nasal_lib(); + std::string generate_module_name(const std::string&); + return_expr* generate_module_return(code_block*); + definition_expr* generate_module_definition(code_block*); + code_block* load(code_block*, const std::string&); + +public: + linker(); + const error& link(parse&, const std::string&, bool); + const auto& get_file_list() const {return imported_files;} +}; + +} diff --git a/src/nasal_lexer.cpp b/src/nasal_lexer.cpp index cd8cce3..e6a12f7 100644 --- a/src/nasal_lexer.cpp +++ b/src/nasal_lexer.cpp @@ -1,393 +1,396 @@ -#ifdef _MSC_VER -#pragma warning (disable:4244) -#pragma warning (disable:4267) -#pragma warning (disable:4102) -#endif - -#include "nasal_lexer.h" -#include "repl.h" - -namespace nasal { - -bool lexer::skip(char c) { - return c==' ' || c=='\n' || c=='\t' || c=='\r' || c==0; -} - -bool lexer::is_id(char c) { - return (c=='_') || std::isalpha(c) || (c<0); -} - -bool lexer::is_hex(char c) { - return std::isxdigit(c); -} - -bool lexer::is_oct(char c) { - return '0'<=c && c<='7'; -} - -bool lexer::is_dec(char c) { - return std::isdigit(c); -} - -bool lexer::is_str(char c) { - return c=='\'' || c=='\"' || c=='`'; -} - -bool lexer::is_single_opr(char c) { - return ( - c=='(' || c==')' || c=='[' || c==']' || - c=='{' || c=='}' || c==',' || c==';' || - c==':' || c=='?' || c=='`' || c=='@' || - c=='%' || c=='$' || c=='\\' - ); -} - -bool lexer::is_calc_opr(char c) { - return ( - c=='=' || c=='+' || c=='-' || c=='*' || - c=='!' || c=='/' || c=='<' || c=='>' || - c=='~' || c=='|' || c=='&' || c=='^' - ); -} - -void lexer::skip_note() { - // avoid note, after this process ptr will point to '\n' - // so next loop line counter+1 - while(++ptrin_repl_mode && - repl::info::instance()->repl_file_name==file) { - err.load(file); - filename = file; - res = repl::info::instance()->repl_file_source; - return; - } - - // check file exsits and it is a regular file - struct stat buffer; - if (stat(file.c_str(), &buffer)==0 && !S_ISREG(buffer.st_mode)) { - err.err("lexer", "<"+file+"> is not a regular file"); - err.chkerr(); - } - - // load - filename = file; - std::ifstream in(file, std::ios::binary); - if (in.fail()) { - err.err("lexer", "failed to open <" + file + ">"); - res = ""; - return; - } - err.load(file); - std::stringstream ss; - ss << in.rdbuf(); - res = ss.str(); -} - -tok lexer::get_type(const std::string& str) { - return typetbl.count(str)? typetbl.at(str):tok::null; -} - -std::string lexer::utf8_gen() { - std::string str = ""; - while(ptr" - ); - ++invalid_char; - } - str += tmp; - // may have some problems because not all the unicode takes 2 space - column += 2; - } - return str; -} - -token lexer::id_gen() { - u32 begin_line = line; - u32 begin_column = column; - std::string str = ""; - while(ptr [0~9][0~9]*(.[0~9]*)(e|E(+|-)0|[1~9][0~9]*) - std::string str = ""; - while(ptr=res.size()) { - err.err("lexer", - {begin_line, begin_column, line, column, filename}, - "get EOF when generating string" - ); - return {{begin_line, begin_column, line, column, filename}, tok::str, str}; - } - ++column; - - // if is not utf8, 1+utf8_hdchk should be 1 - if (begin=='`' && str.length()!=1+utf8_hdchk(str[0])) { - err.err("lexer", - {begin_line, begin_column, line, column, filename}, - "\'`\' is used for string including one character" - ); - } - return {{begin_line, begin_column, line, column, filename}, tok::str, str}; -} - -token lexer::single_opr() { - u32 begin_line = line; - u32 begin_column = column; - std::string str(1, res[ptr]); - ++column; - tok type = get_type(str); - if (type==tok::null) { - err.err("lexer", - {begin_line, begin_column, line, column, filename}, - "invalid operator `"+str+"`" - ); - } - ++ptr; - return {{begin_line, begin_column, line, column, filename}, type, str}; -} - -token lexer::dots() { - u32 begin_line = line; - u32 begin_column = column; - std::string str = "."; - if (ptr+2=res.size()) { - break; - } - if (is_id(res[ptr])) { - toks.push_back(id_gen()); - } else if (is_dec(res[ptr])) { - toks.push_back(num_gen()); - } else if (is_str(res[ptr])) { - toks.push_back(str_gen()); - } else if (is_single_opr(res[ptr])) { - toks.push_back(single_opr()); - } else if (res[ptr]=='.') { - toks.push_back(dots()); - } else if (is_calc_opr(res[ptr])) { - toks.push_back(calc_opr()); - } else if (res[ptr]=='#') { - skip_note(); - } else { - err_char(); - } - if (invalid_char>10) { - err.err("lexer", "too many invalid characters, stop"); - break; - } - } - if (toks.size()) { - // eof token's location is the last token's location - toks.push_back({toks.back().loc, tok::eof, ""}); - } else { - // if token sequence is empty, generate a default location - toks.push_back({{line, column, line, column, filename}, tok::eof, ""}); - } - res = ""; - return err; -} - -} +#ifdef _MSC_VER +#pragma warning (disable:4244) +#pragma warning (disable:4267) +#pragma warning (disable:4102) +#endif + +#include "nasal_lexer.h" +#include "repl.h" + +namespace nasal { + +bool lexer::skip(char c) { + return c==' ' || c=='\n' || c=='\t' || c=='\r' || c==0; +} + +bool lexer::is_id(char c) { + return (c=='_') || std::isalpha(c) || (c<0); +} + +bool lexer::is_hex(char c) { + return std::isxdigit(c); +} + +bool lexer::is_oct(char c) { + return '0'<=c && c<='7'; +} + +bool lexer::is_dec(char c) { + return std::isdigit(c); +} + +bool lexer::is_str(char c) { + return c=='\'' || c=='\"' || c=='`'; +} + +bool lexer::is_single_opr(char c) { + return ( + c=='(' || c==')' || c=='[' || c==']' || + c=='{' || c=='}' || c==',' || c==';' || + c==':' || c=='?' || c=='`' || c=='@' || + c=='%' || c=='$' || c=='\\' + ); +} + +bool lexer::is_calc_opr(char c) { + return ( + c=='=' || c=='+' || c=='-' || c=='*' || + c=='!' || c=='/' || c=='<' || c=='>' || + c=='~' || c=='|' || c=='&' || c=='^' + ); +} + +void lexer::skip_note() { + // avoid note, after this process ptr will point to '\n' + // so next loop line counter+1 + while(++ptrin_repl_mode && + repl::info::instance()->repl_file_name==file) { + err.load(file); + filename = file; + res = repl::info::instance()->repl_file_source; + return; + } + + // check file exsits and it is a regular file +#ifdef _MSC_VER + #define S_ISREG(m) (((m)&0xF000)==0x8000) +#endif + struct stat buffer; + if (stat(file.c_str(), &buffer)==0 && !S_ISREG(buffer.st_mode)) { + err.err("lexer", "<"+file+"> is not a regular file"); + err.chkerr(); + } + + // load + filename = file; + std::ifstream in(file, std::ios::binary); + if (in.fail()) { + err.err("lexer", "failed to open <" + file + ">"); + res = ""; + return; + } + err.load(file); + std::stringstream ss; + ss << in.rdbuf(); + res = ss.str(); +} + +tok lexer::get_type(const std::string& str) { + return typetbl.count(str)? typetbl.at(str):tok::null; +} + +std::string lexer::utf8_gen() { + std::string str = ""; + while(ptr" + ); + ++invalid_char; + } + str += tmp; + // may have some problems because not all the unicode takes 2 space + column += 2; + } + return str; +} + +token lexer::id_gen() { + u32 begin_line = line; + u32 begin_column = column; + std::string str = ""; + while(ptr [0~9][0~9]*(.[0~9]*)(e|E(+|-)0|[1~9][0~9]*) + std::string str = ""; + while(ptr=res.size()) { + err.err("lexer", + {begin_line, begin_column, line, column, filename}, + "get EOF when generating string" + ); + return {{begin_line, begin_column, line, column, filename}, tok::str, str}; + } + ++column; + + // if is not utf8, 1+utf8_hdchk should be 1 + if (begin=='`' && str.length()!=1+utf8_hdchk(str[0])) { + err.err("lexer", + {begin_line, begin_column, line, column, filename}, + "\'`\' is used for string including one character" + ); + } + return {{begin_line, begin_column, line, column, filename}, tok::str, str}; +} + +token lexer::single_opr() { + u32 begin_line = line; + u32 begin_column = column; + std::string str(1, res[ptr]); + ++column; + tok type = get_type(str); + if (type==tok::null) { + err.err("lexer", + {begin_line, begin_column, line, column, filename}, + "invalid operator `"+str+"`" + ); + } + ++ptr; + return {{begin_line, begin_column, line, column, filename}, type, str}; +} + +token lexer::dots() { + u32 begin_line = line; + u32 begin_column = column; + std::string str = "."; + if (ptr+2=res.size()) { + break; + } + if (is_id(res[ptr])) { + toks.push_back(id_gen()); + } else if (is_dec(res[ptr])) { + toks.push_back(num_gen()); + } else if (is_str(res[ptr])) { + toks.push_back(str_gen()); + } else if (is_single_opr(res[ptr])) { + toks.push_back(single_opr()); + } else if (res[ptr]=='.') { + toks.push_back(dots()); + } else if (is_calc_opr(res[ptr])) { + toks.push_back(calc_opr()); + } else if (res[ptr]=='#') { + skip_note(); + } else { + err_char(); + } + if (invalid_char>10) { + err.err("lexer", "too many invalid characters, stop"); + break; + } + } + if (toks.size()) { + // eof token's location is the last token's location + toks.push_back({toks.back().loc, tok::eof, ""}); + } else { + // if token sequence is empty, generate a default location + toks.push_back({{line, column, line, column, filename}, tok::eof, ""}); + } + res = ""; + return err; +} + +} diff --git a/src/nasal_lexer.h b/src/nasal_lexer.h index 99c200d..2b5bd38 100644 --- a/src/nasal_lexer.h +++ b/src/nasal_lexer.h @@ -15,10 +15,6 @@ #include "nasal.h" #include "nasal_err.h" -#ifdef _MSC_VER -#define S_ISREG(m) (((m)&0xF000)==0x8000) -#endif - namespace nasal { enum class tok:u32 {