📝 change CRLF to LF

This commit is contained in:
ValKmjolnir 2023-12-02 19:42:21 +08:00
parent 476fbdb859
commit d42e4a5897
6 changed files with 1140 additions and 1141 deletions

View File

@ -1,245 +1,251 @@
#include "io_lib.h"
namespace nasal {
const auto file_type_name = "file";
void filehandle_destructor(void* ptr) {
fclose(static_cast<FILE*>(ptr));
}
var builtin_readfile(context* ctx, gc* ngc) {
auto filename = ctx->localr[1];
if (!filename.is_str()) {
return nas_err("io::readfile", "\"filename\" must be string");
}
std::ifstream in(filename.str(), std::ios::binary);
std::stringstream rd;
if (!in.fail()) {
rd << in.rdbuf();
}
return ngc->newstr(rd.str());
}
var builtin_fout(context* ctx, gc* ngc) {
auto local = ctx->localr;
auto filename = local[1];
auto source = local[2];
if (!filename.is_str()) {
return nas_err("io::fout", "\"filename\" must be string");
}
std::ofstream out(filename.str());
if (out.fail()) {
return nas_err("io::fout", "cannot open <" + filename.str() + ">");
}
out << source;
return nil;
}
var builtin_exists(context* ctx, gc* ngc) {
auto filename = ctx->localr[1];
if (!filename.is_str()) {
return zero;
}
return access(filename.str().c_str(), F_OK)!=-1? one:zero;
}
var builtin_open(context* ctx, gc* ngc) {
auto local = ctx->localr;
auto name = local[1];
auto mode = local[2];
if (!name.is_str()) {
return nas_err("io::open", "\"filename\" must be string");
}
if (!mode.is_str()) {
return nas_err("io::open", "\"mode\" must be string");
}
auto file_descriptor = fopen(name.str().c_str(), mode.str().c_str());
if (!file_descriptor) {
return nas_err("io::open", "failed to open file <" + name.str() + ">");
}
var return_object = ngc->alloc(vm_type::vm_ghost);
return_object.ghost().set(
file_type_name, filehandle_destructor, nullptr, file_descriptor
);
return return_object;
}
var builtin_close(context* ctx, gc* ngc) {
var file_descriptor = ctx->localr[1];
if (!file_descriptor.object_check(file_type_name)) {
return nas_err("io::close", "not a valid filehandle");
}
file_descriptor.ghost().clear();
return nil;
}
var builtin_read(context* ctx, gc* ngc) {
auto local = ctx->localr;
auto file_descriptor = local[1];
auto buffer = local[2];
auto length = local[3];
if (!file_descriptor.object_check(file_type_name)) {
return nas_err("io::read", "not a valid filehandle");
}
if (!buffer.is_str() || buffer.val.gcobj->unmutable) {
return nas_err("io::read", "\"buf\" must be mutable string");
}
if (!length.is_num()) {
return nas_err("io::read", "\"len\" must be number");
}
if (length.num()<=0 || length.num()>=(1<<30)) {
return nas_err("io::read", "\"len\" less than 1 or too large");
}
auto temp_buffer = new char[static_cast<usize>(length.num())+1];
if (!temp_buffer) {
return nas_err("io::read", "malloc failed");
}
auto read_size = fread(
temp_buffer, 1, length.num(),
static_cast<FILE*>(file_descriptor.ghost().pointer)
);
buffer.str() = temp_buffer;
buffer.val.gcobj->unmutable = true;
delete []temp_buffer;
return var::num(read_size);
}
var builtin_write(context* ctx, gc* ngc) {
auto local = ctx->localr;
auto file_descriptor = local[1];
auto source = local[2];
if (!file_descriptor.object_check(file_type_name)) {
return nas_err("io::write", "not a valid filehandle");
}
if (!source.is_str()) {
return nas_err("io::write", "\"str\" must be string");
}
return var::num(static_cast<f64>(fwrite(
source.str().c_str(), 1, source.str().length(),
static_cast<FILE*>(file_descriptor.ghost().pointer)
)));
}
var builtin_seek(context* ctx, gc* ngc) {
auto local = ctx->localr;
auto file_descriptor = local[1];
auto position = local[2];
auto whence = local[3];
if (!file_descriptor.object_check(file_type_name)) {
return nas_err("io::seek", "not a valid filehandle");
}
return var::num(static_cast<f64>(fseek(
static_cast<FILE*>(file_descriptor.ghost().pointer),
position.num(),
whence.num()
)));
}
var builtin_tell(context* ctx, gc* ngc) {
auto file_descriptor = ctx->localr[1];
if (!file_descriptor.object_check(file_type_name)) {
return nas_err("io::tell", "not a valid filehandle");
}
return var::num(static_cast<f64>(
ftell(static_cast<FILE*>(file_descriptor.ghost().pointer))
));
}
var builtin_readln(context* ctx, gc* ngc) {
auto file_descriptor = ctx->localr[1];
if (!file_descriptor.object_check(file_type_name)) {
return nas_err("io::readln", "not a valid filehandle");
}
auto result = ngc->alloc(vm_type::vm_str);
char c;
while((c = fgetc(static_cast<FILE*>(file_descriptor.ghost().pointer)))!=EOF) {
if (c=='\r') {
continue;
}
if (c=='\n') {
return result;
}
result.str().push_back(c);
}
if (result.str().length()) {
return result;
}
return nil;
}
var builtin_stat(context* ctx, gc* ngc) {
auto name = ctx->localr[1];
if (!name.is_str()) {
return nas_err("io::stat", "\"filename\" must be string");
}
struct stat buffer;
if (stat(name.str().c_str(), &buffer)<0) {
return nas_err("io::stat", "failed to open file <" + name.str() + ">");
}
auto result = ngc->alloc(vm_type::vm_vec);
result.vec().elems = {
var::num(static_cast<f64>(buffer.st_dev)),
var::num(static_cast<f64>(buffer.st_ino)),
var::num(static_cast<f64>(buffer.st_mode)),
var::num(static_cast<f64>(buffer.st_nlink)),
var::num(static_cast<f64>(buffer.st_uid)),
var::num(static_cast<f64>(buffer.st_gid)),
var::num(static_cast<f64>(buffer.st_rdev)),
var::num(static_cast<f64>(buffer.st_size)),
var::num(static_cast<f64>(buffer.st_atime)),
var::num(static_cast<f64>(buffer.st_mtime)),
var::num(static_cast<f64>(buffer.st_ctime))
};
return result;
}
var builtin_eof(context* ctx, gc* ngc) {
auto file_descriptor = ctx->localr[1];
if (!file_descriptor.object_check(file_type_name)) {
return nas_err("io::readln", "not a valid filehandle");
}
return var::num(static_cast<f64>(
feof(static_cast<FILE*>(file_descriptor.ghost().pointer))
));
}
var builtin_stdin(context* ctx, gc* ngc) {
auto file_descriptor = ngc->alloc(vm_type::vm_ghost);
file_descriptor.ghost().set(file_type_name, nullptr, nullptr, stdin);
return file_descriptor;
}
var builtin_stdout(context* ctx, gc* ngc) {
auto file_descriptor = ngc->alloc(vm_type::vm_ghost);
file_descriptor.ghost().set(file_type_name, nullptr, nullptr, stdout);
return file_descriptor;
}
var builtin_stderr(context* ctx, gc* ngc) {
auto file_descriptor = ngc->alloc(vm_type::vm_ghost);
file_descriptor.ghost().set(file_type_name, nullptr, nullptr, stderr);
return file_descriptor;
}
nasal_builtin_table io_lib_native[] = {
{"__readfile", builtin_readfile},
{"__fout", builtin_fout},
{"__exists", builtin_exists},
{"__open", builtin_open},
{"__close", builtin_close},
{"__read", builtin_read},
{"__write", builtin_write},
{"__seek", builtin_seek},
{"__tell", builtin_tell},
{"__readln", builtin_readln},
{"__stat", builtin_stat},
{"__eof", builtin_eof},
{"__stdin", builtin_stdin},
{"__stdout", builtin_stdout},
{"__stderr", builtin_stderr},
{nullptr, nullptr}
};
}
#include "io_lib.h"
#ifdef _MSC_VER
#define F_OK 0 // fuck msc
#endif
#include <sys/stat.h>
namespace nasal {
const auto file_type_name = "file";
void filehandle_destructor(void* ptr) {
fclose(static_cast<FILE*>(ptr));
}
var builtin_readfile(context* ctx, gc* ngc) {
auto filename = ctx->localr[1];
if (!filename.is_str()) {
return nas_err("io::readfile", "\"filename\" must be string");
}
std::ifstream in(filename.str(), std::ios::binary);
std::stringstream rd;
if (!in.fail()) {
rd << in.rdbuf();
}
return ngc->newstr(rd.str());
}
var builtin_fout(context* ctx, gc* ngc) {
auto local = ctx->localr;
auto filename = local[1];
auto source = local[2];
if (!filename.is_str()) {
return nas_err("io::fout", "\"filename\" must be string");
}
std::ofstream out(filename.str());
if (out.fail()) {
return nas_err("io::fout", "cannot open <" + filename.str() + ">");
}
out << source;
return nil;
}
var builtin_exists(context* ctx, gc* ngc) {
auto filename = ctx->localr[1];
if (!filename.is_str()) {
return zero;
}
return access(filename.str().c_str(), F_OK)!=-1? one:zero;
}
var builtin_open(context* ctx, gc* ngc) {
auto local = ctx->localr;
auto name = local[1];
auto mode = local[2];
if (!name.is_str()) {
return nas_err("io::open", "\"filename\" must be string");
}
if (!mode.is_str()) {
return nas_err("io::open", "\"mode\" must be string");
}
auto file_descriptor = fopen(name.str().c_str(), mode.str().c_str());
if (!file_descriptor) {
return nas_err("io::open", "failed to open file <" + name.str() + ">");
}
var return_object = ngc->alloc(vm_type::vm_ghost);
return_object.ghost().set(
file_type_name, filehandle_destructor, nullptr, file_descriptor
);
return return_object;
}
var builtin_close(context* ctx, gc* ngc) {
var file_descriptor = ctx->localr[1];
if (!file_descriptor.object_check(file_type_name)) {
return nas_err("io::close", "not a valid filehandle");
}
file_descriptor.ghost().clear();
return nil;
}
var builtin_read(context* ctx, gc* ngc) {
auto local = ctx->localr;
auto file_descriptor = local[1];
auto buffer = local[2];
auto length = local[3];
if (!file_descriptor.object_check(file_type_name)) {
return nas_err("io::read", "not a valid filehandle");
}
if (!buffer.is_str() || buffer.val.gcobj->unmutable) {
return nas_err("io::read", "\"buf\" must be mutable string");
}
if (!length.is_num()) {
return nas_err("io::read", "\"len\" must be number");
}
if (length.num()<=0 || length.num()>=(1<<30)) {
return nas_err("io::read", "\"len\" less than 1 or too large");
}
auto temp_buffer = new char[static_cast<usize>(length.num())+1];
if (!temp_buffer) {
return nas_err("io::read", "malloc failed");
}
auto read_size = fread(
temp_buffer, 1, length.num(),
static_cast<FILE*>(file_descriptor.ghost().pointer)
);
buffer.str() = temp_buffer;
buffer.val.gcobj->unmutable = true;
delete []temp_buffer;
return var::num(read_size);
}
var builtin_write(context* ctx, gc* ngc) {
auto local = ctx->localr;
auto file_descriptor = local[1];
auto source = local[2];
if (!file_descriptor.object_check(file_type_name)) {
return nas_err("io::write", "not a valid filehandle");
}
if (!source.is_str()) {
return nas_err("io::write", "\"str\" must be string");
}
return var::num(static_cast<f64>(fwrite(
source.str().c_str(), 1, source.str().length(),
static_cast<FILE*>(file_descriptor.ghost().pointer)
)));
}
var builtin_seek(context* ctx, gc* ngc) {
auto local = ctx->localr;
auto file_descriptor = local[1];
auto position = local[2];
auto whence = local[3];
if (!file_descriptor.object_check(file_type_name)) {
return nas_err("io::seek", "not a valid filehandle");
}
return var::num(static_cast<f64>(fseek(
static_cast<FILE*>(file_descriptor.ghost().pointer),
position.num(),
whence.num()
)));
}
var builtin_tell(context* ctx, gc* ngc) {
auto file_descriptor = ctx->localr[1];
if (!file_descriptor.object_check(file_type_name)) {
return nas_err("io::tell", "not a valid filehandle");
}
return var::num(static_cast<f64>(
ftell(static_cast<FILE*>(file_descriptor.ghost().pointer))
));
}
var builtin_readln(context* ctx, gc* ngc) {
auto file_descriptor = ctx->localr[1];
if (!file_descriptor.object_check(file_type_name)) {
return nas_err("io::readln", "not a valid filehandle");
}
auto result = ngc->alloc(vm_type::vm_str);
char c;
while((c = fgetc(static_cast<FILE*>(file_descriptor.ghost().pointer)))!=EOF) {
if (c=='\r') {
continue;
}
if (c=='\n') {
return result;
}
result.str().push_back(c);
}
if (result.str().length()) {
return result;
}
return nil;
}
var builtin_stat(context* ctx, gc* ngc) {
auto name = ctx->localr[1];
if (!name.is_str()) {
return nas_err("io::stat", "\"filename\" must be string");
}
struct stat buffer;
if (stat(name.str().c_str(), &buffer)<0) {
return nas_err("io::stat", "failed to open file <" + name.str() + ">");
}
auto result = ngc->alloc(vm_type::vm_vec);
result.vec().elems = {
var::num(static_cast<f64>(buffer.st_dev)),
var::num(static_cast<f64>(buffer.st_ino)),
var::num(static_cast<f64>(buffer.st_mode)),
var::num(static_cast<f64>(buffer.st_nlink)),
var::num(static_cast<f64>(buffer.st_uid)),
var::num(static_cast<f64>(buffer.st_gid)),
var::num(static_cast<f64>(buffer.st_rdev)),
var::num(static_cast<f64>(buffer.st_size)),
var::num(static_cast<f64>(buffer.st_atime)),
var::num(static_cast<f64>(buffer.st_mtime)),
var::num(static_cast<f64>(buffer.st_ctime))
};
return result;
}
var builtin_eof(context* ctx, gc* ngc) {
auto file_descriptor = ctx->localr[1];
if (!file_descriptor.object_check(file_type_name)) {
return nas_err("io::readln", "not a valid filehandle");
}
return var::num(static_cast<f64>(
feof(static_cast<FILE*>(file_descriptor.ghost().pointer))
));
}
var builtin_stdin(context* ctx, gc* ngc) {
auto file_descriptor = ngc->alloc(vm_type::vm_ghost);
file_descriptor.ghost().set(file_type_name, nullptr, nullptr, stdin);
return file_descriptor;
}
var builtin_stdout(context* ctx, gc* ngc) {
auto file_descriptor = ngc->alloc(vm_type::vm_ghost);
file_descriptor.ghost().set(file_type_name, nullptr, nullptr, stdout);
return file_descriptor;
}
var builtin_stderr(context* ctx, gc* ngc) {
auto file_descriptor = ngc->alloc(vm_type::vm_ghost);
file_descriptor.ghost().set(file_type_name, nullptr, nullptr, stderr);
return file_descriptor;
}
nasal_builtin_table io_lib_native[] = {
{"__readfile", builtin_readfile},
{"__fout", builtin_fout},
{"__exists", builtin_exists},
{"__open", builtin_open},
{"__close", builtin_close},
{"__read", builtin_read},
{"__write", builtin_write},
{"__seek", builtin_seek},
{"__tell", builtin_tell},
{"__readln", builtin_readln},
{"__stat", builtin_stat},
{"__eof", builtin_eof},
{"__stdin", builtin_stdin},
{"__stdout", builtin_stdout},
{"__stderr", builtin_stderr},
{nullptr, nullptr}
};
}

View File

@ -1,41 +1,35 @@
#pragma once
#include "nasal.h"
#include "nasal_gc.h"
#include "nasal_builtin.h"
#include <sys/stat.h>
#ifndef _MSC_VER
#include <unistd.h>
#else
#include <io.h>
#endif
#ifdef _MSC_VER
#define F_OK 0 // fuck msc
#endif
namespace nasal {
void filehandle_destructor(void*);
var builtin_readfile(context*, gc*);
var builtin_fout(context*, gc*);
var builtin_exists(context*, gc*);
var builtin_open(context*, gc*);
var builtin_close(context*, gc*);
var builtin_read(context*, gc*);
var builtin_write(context*, gc*);
var builtin_seek(context*, gc*);
var builtin_tell(context*, gc*);
var builtin_readln(context*, gc*);
var builtin_stat(context*, gc*);
var builtin_eof(context*, gc*);
var builtin_stdin(context*, gc*);
var builtin_stdout(context*, gc*);
var builtin_stderr(context*, gc*);
extern nasal_builtin_table io_lib_native[];
}
#pragma once
#include "nasal.h"
#include "nasal_gc.h"
#include "nasal_builtin.h"
#ifndef _MSC_VER
#include <unistd.h>
#else
#include <io.h>
#endif
namespace nasal {
void filehandle_destructor(void*);
var builtin_readfile(context*, gc*);
var builtin_fout(context*, gc*);
var builtin_exists(context*, gc*);
var builtin_open(context*, gc*);
var builtin_close(context*, gc*);
var builtin_read(context*, gc*);
var builtin_write(context*, gc*);
var builtin_seek(context*, gc*);
var builtin_tell(context*, gc*);
var builtin_readln(context*, gc*);
var builtin_stat(context*, gc*);
var builtin_eof(context*, gc*);
var builtin_stdin(context*, gc*);
var builtin_stdout(context*, gc*);
var builtin_stderr(context*, gc*);
extern nasal_builtin_table io_lib_native[];
}

View File

@ -1,399 +1,403 @@
#include "nasal_import.h"
#include "symbol_finder.h"
#include <memory>
#include <unordered_set>
namespace nasal {
linker::linker(): show_path_flag(false), library_loaded(false), this_file("") {
const auto seperator= is_windows()? ';':':';
const auto PATH = std::string(getenv("PATH"));
usize last = 0, position = PATH.find(seperator, 0);
while(position!=std::string::npos) {
std::string dirpath = PATH.substr(last, position-last);
if (dirpath.length()) {
envpath.push_back(dirpath);
}
last = position+1;
position = PATH.find(seperator, last);
}
if (last!=PATH.length()) {
envpath.push_back(PATH.substr(last));
}
}
std::string linker::get_path(expr* node) {
if (node->get_type()==expr_type::ast_use) {
auto file_relative_path = std::string("");
const auto& path = reinterpret_cast<use_stmt*>(node)->get_path();
for(auto i : path) {
file_relative_path += i->get_name();
if (i!=path.back()) {
file_relative_path += (is_windows()? "\\":"/");
}
}
return file_relative_path + ".nas";
}
auto call_node = reinterpret_cast<call_expr*>(node);
auto arguments = reinterpret_cast<call_function*>(call_node->get_calls()[0]);
auto content = reinterpret_cast<string_literal*>(arguments->get_argument()[0]);
return content->get_content();
}
std::string linker::find_real_file_path(
const std::string& filename, const span& location) {
// first add file name itself into the file path
std::vector<std::string> path_list = {filename};
// generate search path from environ path
for(const auto& p : envpath) {
path_list.push_back(p + (is_windows()? "\\":"/") + filename);
}
// search file
for(const auto& path : path_list) {
if (access(path.c_str(), F_OK)!=-1) {
return path;
}
}
// we will find lib.nas in nasal std directory
if (filename=="lib.nas") {
return is_windows()?
find_real_file_path("std\\lib.nas", location):
find_real_file_path("std/lib.nas", location);
}
if (!show_path_flag) {
err.err("link",
"in <" + location.file + ">: " +
"cannot find file <" + filename + ">, " +
"use <-d> to get detail search path"
);
return "";
}
auto path_list_info = std::string("");
for(const auto& path : path_list) {
path_list_info += " -> " + path + "\n";
}
err.err("link",
"in <" + location.file + ">: " +
"cannot find file <" + filename +
"> in these paths:\n" + path_list_info
);
return "";
}
bool linker::import_check(expr* node) {
if (node->get_type()==expr_type::ast_use) {
return true;
}
/*
call
|_id:import
|_call_func
|_string:'filename'
*/
if (node->get_type()!=expr_type::ast_call) {
return false;
}
auto call_node = reinterpret_cast<call_expr*>(node);
auto first_expr = call_node->get_first();
if (first_expr->get_type()!=expr_type::ast_id) {
return false;
}
if (reinterpret_cast<identifier*>(first_expr)->get_name()!="import") {
return false;
}
if (!call_node->get_calls().size()) {
return false;
}
// import("xxx");
if (call_node->get_calls().size()!=1) {
return false;
}
auto maybe_func_call = call_node->get_calls()[0];
if (maybe_func_call->get_type()!=expr_type::ast_callf) {
return false;
}
auto func_call = reinterpret_cast<call_function*>(maybe_func_call);
if (func_call->get_argument().size()!=1) {
return false;
}
if (func_call->get_argument()[0]->get_type()!=expr_type::ast_str) {
return false;
}
return true;
}
bool linker::check_exist_or_record_file(const std::string& file) {
// avoid importing the same file
for(const auto& name : imported_files) {
if (file==name) {
return true;
}
}
imported_files.push_back(file);
return false;
}
bool linker::check_self_import(const std::string& file) {
for(const auto& name : module_load_stack) {
if (file==name) {
return true;
}
}
return false;
}
std::string linker::generate_self_import_path(const std::string& filename) {
std::string res = "";
for(const auto& i : module_load_stack) {
res += "[" + i + "] -> ";
}
return res + "[" + filename + "]";
}
void linker::link(code_block* new_tree_root, code_block* old_tree_root) {
// add children of add_root to the back of root
for(auto& i : old_tree_root->get_expressions()) {
new_tree_root->add_expression(i);
}
// clean old root
old_tree_root->get_expressions().clear();
}
code_block* linker::import_regular_file(
expr* node, std::unordered_set<std::string>& used_modules) {
// get filename
auto filename = get_path(node);
// avoid infinite loading loop
filename = find_real_file_path(filename, node->get_location());
// if get empty string(error) or this file is used before, do not parse
if (!filename.length() || used_modules.count(filename)) {
return new code_block({0, 0, 0, 0, filename});
}
// check self import, avoid infinite loading loop
if (check_self_import(filename)) {
err.err("link",
"self-referenced module <" + filename + ">:\n" +
" reference path: " + generate_self_import_path(filename)
);
return new code_block({0, 0, 0, 0, filename});
}
check_exist_or_record_file(filename);
module_load_stack.push_back(filename);
// start importing...
lexer nasal_lexer;
parse nasal_parser;
if (nasal_lexer.scan(filename).geterr()) {
err.err("link", "error occurred when analysing <" + filename + ">");
return new code_block({0, 0, 0, 0, filename});
}
if (nasal_parser.compile(nasal_lexer).geterr()) {
err.err("link", "error occurred when analysing <" + filename + ">");
return new code_block({0, 0, 0, 0, filename});
}
// swap result out
auto parse_result = nasal_parser.swap(nullptr);
// check if parse result has 'import'
auto result = load(parse_result, filename);
module_load_stack.pop_back();
return result;
}
code_block* linker::import_nasal_lib() {
auto path = find_real_file_path(
"lib.nas", {0, 0, 0, 0, this_file}
);
if (!path.length()) {
return new code_block({0, 0, 0, 0, path});
}
// avoid infinite loading library
if (check_exist_or_record_file(path)) {
return new code_block({0, 0, 0, 0, path});
}
// start importing...
lexer nasal_lexer;
parse nasal_parser;
if (nasal_lexer.scan(path).geterr()) {
err.err("link",
"error occurred when analysing library <" + path + ">"
);
return new code_block({0, 0, 0, 0, path});
}
if (nasal_parser.compile(nasal_lexer).geterr()) {
err.err("link",
"error occurred when analysing library <" + path + ">"
);
return new code_block({0, 0, 0, 0, path});
}
// swap result out
auto parse_result = nasal_parser.swap(nullptr);
// check if library has 'import' (in fact it should not)
return load(parse_result, path);
}
std::string linker::generate_module_name(const std::string& file_path) {
auto error_name = "module@[" + file_path + "]";
if (!file_path.length()) {
return error_name;
}
// check file suffix and get file suffix position
auto suffix_position = file_path.find(".nas");
if (suffix_position==std::string::npos) {
err.warn("link",
"get invalid module name from <" + file_path + ">, " +
"will not be easily accessed. " +
"\".nas\" suffix is required."
);
return error_name;
}
if (suffix_position+4!=file_path.length()) {
err.warn("link",
"get invalid module name from <" + file_path + ">, " +
"will not be easily accessed. " +
"only one \".nas\" suffix is required in the path."
);
return error_name;
}
// only get the file name as module name, directory path is not included
auto split_position = file_path.find_last_of("/");
// find "\\" in windows platform
if (split_position==std::string::npos) {
split_position = file_path.find_last_of("\\");
}
// split file path to get module name
auto module_name = split_position==std::string::npos?
file_path.substr(0, suffix_position):
file_path.substr(split_position+1, suffix_position-split_position-1);
// check validation of module name
if (!module_name.length()) {
err.warn("link",
"get empty module name from <" + file_path + ">, " +
"will not be easily accessed."
);
return module_name;
}
if (std::isdigit(module_name[0]) ||
module_name.find(".")!=std::string::npos ||
module_name.find("-")!=std::string::npos) {
err.warn("link",
"get module <" + module_name + "> from <" + file_path + ">, " +
"will not be easily accessed."
);
}
return module_name;
}
return_expr* linker::generate_module_return(code_block* block) {
auto finder = std::unique_ptr<symbol_finder>(new symbol_finder);
auto result = new return_expr(block->get_location());
auto value = new hash_expr(block->get_location());
result->set_value(value);
for(const auto& i : finder->do_find(block)) {
auto pair = new hash_pair(block->get_location());
// do not export symbol begins with '_'
if (i.name.length() && i.name[0]=='_') {
continue;
}
pair->set_name(i.name);
pair->set_value(new identifier(block->get_location(), i.name));
value->add_member(pair);
}
return result;
}
definition_expr* linker::generate_module_definition(code_block* block) {
auto def = new definition_expr(block->get_location());
def->set_identifier(new identifier(
block->get_location(),
generate_module_name(block->get_location().file)
));
auto call = new call_expr(block->get_location());
auto func = new function(block->get_location());
func->set_code_block(block);
func->get_code_block()->add_expression(generate_module_return(block));
call->set_first(func);
call->add_call(new call_function(block->get_location()));
def->set_value(call);
return def;
}
code_block* linker::load(code_block* program_root, const std::string& filename) {
auto tree = new code_block({0, 0, 0, 0, filename});
// load library, this ast will be linked with root directly
// so no extra namespace is generated
if (!library_loaded) {
auto nasal_lib_code_block = import_nasal_lib();
// insert nasal lib code to the back of tree
link(tree, nasal_lib_code_block);
delete nasal_lib_code_block;
library_loaded = true;
}
// load imported modules
std::unordered_set<std::string> used_modules = {};
for(auto& import_node : program_root->get_expressions()) {
if (!import_check(import_node)) {
break;
}
// parse file and get ast
auto module_code_block = import_regular_file(import_node, used_modules);
auto replace_node = new null_expr(import_node->get_location());
// after importing the regular file as module, delete this node
delete import_node;
// and replace the node with null_expr node
import_node = replace_node;
// avoid repeatedly importing the same module
const auto& module_path = module_code_block->get_location().file;
if (used_modules.count(module_path)) {
delete module_code_block;
continue;
}
// then we generate a function warping the code block,
// and export the necessary global symbols in this code block
// by generate a return statement, with a hashmap return value
used_modules.insert(module_path);
tree->add_expression(generate_module_definition(module_code_block));
}
// insert program root to the back of tree
link(tree, program_root);
return tree;
}
const error& linker::link(
parse& parse, const std::string& self, bool spath = false) {
// switch for showing path when errors occur
show_path_flag = spath;
// initializing file map
this_file = self;
imported_files = {self};
module_load_stack = {self};
// scan root and import files
// then generate a new ast and return to import_ast
auto new_tree_root = load(parse.tree(), self);
auto old_tree_root = parse.swap(new_tree_root);
delete old_tree_root;
return err;
}
}
#include "nasal_import.h"
#include "symbol_finder.h"
#include <memory>
#include <unordered_set>
#ifdef _MSC_VER
#define F_OK 0 // fuck msc
#endif
namespace nasal {
linker::linker(): show_path_flag(false), library_loaded(false), this_file("") {
const auto seperator= is_windows()? ';':':';
const auto PATH = std::string(getenv("PATH"));
usize last = 0, position = PATH.find(seperator, 0);
while(position!=std::string::npos) {
std::string dirpath = PATH.substr(last, position-last);
if (dirpath.length()) {
envpath.push_back(dirpath);
}
last = position+1;
position = PATH.find(seperator, last);
}
if (last!=PATH.length()) {
envpath.push_back(PATH.substr(last));
}
}
std::string linker::get_path(expr* node) {
if (node->get_type()==expr_type::ast_use) {
auto file_relative_path = std::string("");
const auto& path = reinterpret_cast<use_stmt*>(node)->get_path();
for(auto i : path) {
file_relative_path += i->get_name();
if (i!=path.back()) {
file_relative_path += (is_windows()? "\\":"/");
}
}
return file_relative_path + ".nas";
}
auto call_node = reinterpret_cast<call_expr*>(node);
auto arguments = reinterpret_cast<call_function*>(call_node->get_calls()[0]);
auto content = reinterpret_cast<string_literal*>(arguments->get_argument()[0]);
return content->get_content();
}
std::string linker::find_real_file_path(
const std::string& filename, const span& location) {
// first add file name itself into the file path
std::vector<std::string> path_list = {filename};
// generate search path from environ path
for(const auto& p : envpath) {
path_list.push_back(p + (is_windows()? "\\":"/") + filename);
}
// search file
for(const auto& path : path_list) {
if (access(path.c_str(), F_OK)!=-1) {
return path;
}
}
// we will find lib.nas in nasal std directory
if (filename=="lib.nas") {
return is_windows()?
find_real_file_path("std\\lib.nas", location):
find_real_file_path("std/lib.nas", location);
}
if (!show_path_flag) {
err.err("link",
"in <" + location.file + ">: " +
"cannot find file <" + filename + ">, " +
"use <-d> to get detail search path"
);
return "";
}
auto path_list_info = std::string("");
for(const auto& path : path_list) {
path_list_info += " -> " + path + "\n";
}
err.err("link",
"in <" + location.file + ">: " +
"cannot find file <" + filename +
"> in these paths:\n" + path_list_info
);
return "";
}
bool linker::import_check(expr* node) {
if (node->get_type()==expr_type::ast_use) {
return true;
}
/*
call
|_id:import
|_call_func
|_string:'filename'
*/
if (node->get_type()!=expr_type::ast_call) {
return false;
}
auto call_node = reinterpret_cast<call_expr*>(node);
auto first_expr = call_node->get_first();
if (first_expr->get_type()!=expr_type::ast_id) {
return false;
}
if (reinterpret_cast<identifier*>(first_expr)->get_name()!="import") {
return false;
}
if (!call_node->get_calls().size()) {
return false;
}
// import("xxx");
if (call_node->get_calls().size()!=1) {
return false;
}
auto maybe_func_call = call_node->get_calls()[0];
if (maybe_func_call->get_type()!=expr_type::ast_callf) {
return false;
}
auto func_call = reinterpret_cast<call_function*>(maybe_func_call);
if (func_call->get_argument().size()!=1) {
return false;
}
if (func_call->get_argument()[0]->get_type()!=expr_type::ast_str) {
return false;
}
return true;
}
bool linker::check_exist_or_record_file(const std::string& file) {
// avoid importing the same file
for(const auto& name : imported_files) {
if (file==name) {
return true;
}
}
imported_files.push_back(file);
return false;
}
bool linker::check_self_import(const std::string& file) {
for(const auto& name : module_load_stack) {
if (file==name) {
return true;
}
}
return false;
}
std::string linker::generate_self_import_path(const std::string& filename) {
std::string res = "";
for(const auto& i : module_load_stack) {
res += "[" + i + "] -> ";
}
return res + "[" + filename + "]";
}
void linker::link(code_block* new_tree_root, code_block* old_tree_root) {
// add children of add_root to the back of root
for(auto& i : old_tree_root->get_expressions()) {
new_tree_root->add_expression(i);
}
// clean old root
old_tree_root->get_expressions().clear();
}
code_block* linker::import_regular_file(
expr* node, std::unordered_set<std::string>& used_modules) {
// get filename
auto filename = get_path(node);
// avoid infinite loading loop
filename = find_real_file_path(filename, node->get_location());
// if get empty string(error) or this file is used before, do not parse
if (!filename.length() || used_modules.count(filename)) {
return new code_block({0, 0, 0, 0, filename});
}
// check self import, avoid infinite loading loop
if (check_self_import(filename)) {
err.err("link",
"self-referenced module <" + filename + ">:\n" +
" reference path: " + generate_self_import_path(filename)
);
return new code_block({0, 0, 0, 0, filename});
}
check_exist_or_record_file(filename);
module_load_stack.push_back(filename);
// start importing...
lexer nasal_lexer;
parse nasal_parser;
if (nasal_lexer.scan(filename).geterr()) {
err.err("link", "error occurred when analysing <" + filename + ">");
return new code_block({0, 0, 0, 0, filename});
}
if (nasal_parser.compile(nasal_lexer).geterr()) {
err.err("link", "error occurred when analysing <" + filename + ">");
return new code_block({0, 0, 0, 0, filename});
}
// swap result out
auto parse_result = nasal_parser.swap(nullptr);
// check if parse result has 'import'
auto result = load(parse_result, filename);
module_load_stack.pop_back();
return result;
}
code_block* linker::import_nasal_lib() {
auto path = find_real_file_path(
"lib.nas", {0, 0, 0, 0, this_file}
);
if (!path.length()) {
return new code_block({0, 0, 0, 0, path});
}
// avoid infinite loading library
if (check_exist_or_record_file(path)) {
return new code_block({0, 0, 0, 0, path});
}
// start importing...
lexer nasal_lexer;
parse nasal_parser;
if (nasal_lexer.scan(path).geterr()) {
err.err("link",
"error occurred when analysing library <" + path + ">"
);
return new code_block({0, 0, 0, 0, path});
}
if (nasal_parser.compile(nasal_lexer).geterr()) {
err.err("link",
"error occurred when analysing library <" + path + ">"
);
return new code_block({0, 0, 0, 0, path});
}
// swap result out
auto parse_result = nasal_parser.swap(nullptr);
// check if library has 'import' (in fact it should not)
return load(parse_result, path);
}
std::string linker::generate_module_name(const std::string& file_path) {
auto error_name = "module@[" + file_path + "]";
if (!file_path.length()) {
return error_name;
}
// check file suffix and get file suffix position
auto suffix_position = file_path.find(".nas");
if (suffix_position==std::string::npos) {
err.warn("link",
"get invalid module name from <" + file_path + ">, " +
"will not be easily accessed. " +
"\".nas\" suffix is required."
);
return error_name;
}
if (suffix_position+4!=file_path.length()) {
err.warn("link",
"get invalid module name from <" + file_path + ">, " +
"will not be easily accessed. " +
"only one \".nas\" suffix is required in the path."
);
return error_name;
}
// only get the file name as module name, directory path is not included
auto split_position = file_path.find_last_of("/");
// find "\\" in windows platform
if (split_position==std::string::npos) {
split_position = file_path.find_last_of("\\");
}
// split file path to get module name
auto module_name = split_position==std::string::npos?
file_path.substr(0, suffix_position):
file_path.substr(split_position+1, suffix_position-split_position-1);
// check validation of module name
if (!module_name.length()) {
err.warn("link",
"get empty module name from <" + file_path + ">, " +
"will not be easily accessed."
);
return module_name;
}
if (std::isdigit(module_name[0]) ||
module_name.find(".")!=std::string::npos ||
module_name.find("-")!=std::string::npos) {
err.warn("link",
"get module <" + module_name + "> from <" + file_path + ">, " +
"will not be easily accessed."
);
}
return module_name;
}
return_expr* linker::generate_module_return(code_block* block) {
auto finder = std::unique_ptr<symbol_finder>(new symbol_finder);
auto result = new return_expr(block->get_location());
auto value = new hash_expr(block->get_location());
result->set_value(value);
for(const auto& i : finder->do_find(block)) {
auto pair = new hash_pair(block->get_location());
// do not export symbol begins with '_'
if (i.name.length() && i.name[0]=='_') {
continue;
}
pair->set_name(i.name);
pair->set_value(new identifier(block->get_location(), i.name));
value->add_member(pair);
}
return result;
}
definition_expr* linker::generate_module_definition(code_block* block) {
auto def = new definition_expr(block->get_location());
def->set_identifier(new identifier(
block->get_location(),
generate_module_name(block->get_location().file)
));
auto call = new call_expr(block->get_location());
auto func = new function(block->get_location());
func->set_code_block(block);
func->get_code_block()->add_expression(generate_module_return(block));
call->set_first(func);
call->add_call(new call_function(block->get_location()));
def->set_value(call);
return def;
}
code_block* linker::load(code_block* program_root, const std::string& filename) {
auto tree = new code_block({0, 0, 0, 0, filename});
// load library, this ast will be linked with root directly
// so no extra namespace is generated
if (!library_loaded) {
auto nasal_lib_code_block = import_nasal_lib();
// insert nasal lib code to the back of tree
link(tree, nasal_lib_code_block);
delete nasal_lib_code_block;
library_loaded = true;
}
// load imported modules
std::unordered_set<std::string> used_modules = {};
for(auto& import_node : program_root->get_expressions()) {
if (!import_check(import_node)) {
break;
}
// parse file and get ast
auto module_code_block = import_regular_file(import_node, used_modules);
auto replace_node = new null_expr(import_node->get_location());
// after importing the regular file as module, delete this node
delete import_node;
// and replace the node with null_expr node
import_node = replace_node;
// avoid repeatedly importing the same module
const auto& module_path = module_code_block->get_location().file;
if (used_modules.count(module_path)) {
delete module_code_block;
continue;
}
// then we generate a function warping the code block,
// and export the necessary global symbols in this code block
// by generate a return statement, with a hashmap return value
used_modules.insert(module_path);
tree->add_expression(generate_module_definition(module_code_block));
}
// insert program root to the back of tree
link(tree, program_root);
return tree;
}
const error& linker::link(
parse& parse, const std::string& self, bool spath = false) {
// switch for showing path when errors occur
show_path_flag = spath;
// initializing file map
this_file = self;
imported_files = {self};
module_load_stack = {self};
// scan root and import files
// then generate a new ast and return to import_ast
auto new_tree_root = load(parse.tree(), self);
auto old_tree_root = parse.swap(new_tree_root);
delete old_tree_root;
return err;
}
}

View File

@ -1,59 +1,55 @@
#pragma once
#ifndef _MSC_VER
#include <unistd.h>
#else
#define _CRT_SECURE_NO_DEPRECATE 1
#define _CRT_NONSTDC_NO_DEPRECATE 1
#include <io.h>
#endif
#ifdef _MSC_VER
#define F_OK 0
#endif
#include "nasal.h"
#include "nasal_ast.h"
#include "nasal_lexer.h"
#include "nasal_parse.h"
#include "symbol_finder.h"
#include <cstring>
#include <sstream>
#include <vector>
#include <unordered_set>
namespace nasal {
class linker {
private:
bool show_path_flag;
bool library_loaded;
std::string this_file;
error err;
std::vector<std::string> imported_files;
std::vector<std::string> module_load_stack;
std::vector<std::string> envpath;
private:
bool import_check(expr*);
bool check_exist_or_record_file(const std::string&);
bool check_self_import(const std::string&);
std::string generate_self_import_path(const std::string&);
void link(code_block*, code_block*);
std::string get_path(expr*);
std::string find_real_file_path(const std::string&, const span&);
code_block* import_regular_file(expr*, std::unordered_set<std::string>&);
code_block* import_nasal_lib();
std::string generate_module_name(const std::string&);
return_expr* generate_module_return(code_block*);
definition_expr* generate_module_definition(code_block*);
code_block* load(code_block*, const std::string&);
public:
linker();
const error& link(parse&, const std::string&, bool);
const auto& get_file_list() const {return imported_files;}
};
}
#pragma once
#ifndef _MSC_VER
#include <unistd.h>
#else
#define _CRT_SECURE_NO_DEPRECATE 1
#define _CRT_NONSTDC_NO_DEPRECATE 1
#include <io.h>
#endif
#include "nasal.h"
#include "nasal_ast.h"
#include "nasal_lexer.h"
#include "nasal_parse.h"
#include "symbol_finder.h"
#include <cstring>
#include <sstream>
#include <vector>
#include <unordered_set>
namespace nasal {
class linker {
private:
bool show_path_flag;
bool library_loaded;
std::string this_file;
error err;
std::vector<std::string> imported_files;
std::vector<std::string> module_load_stack;
std::vector<std::string> envpath;
private:
bool import_check(expr*);
bool check_exist_or_record_file(const std::string&);
bool check_self_import(const std::string&);
std::string generate_self_import_path(const std::string&);
void link(code_block*, code_block*);
std::string get_path(expr*);
std::string find_real_file_path(const std::string&, const span&);
code_block* import_regular_file(expr*, std::unordered_set<std::string>&);
code_block* import_nasal_lib();
std::string generate_module_name(const std::string&);
return_expr* generate_module_return(code_block*);
definition_expr* generate_module_definition(code_block*);
code_block* load(code_block*, const std::string&);
public:
linker();
const error& link(parse&, const std::string&, bool);
const auto& get_file_list() const {return imported_files;}
};
}

View File

@ -1,393 +1,396 @@
#ifdef _MSC_VER
#pragma warning (disable:4244)
#pragma warning (disable:4267)
#pragma warning (disable:4102)
#endif
#include "nasal_lexer.h"
#include "repl.h"
namespace nasal {
bool lexer::skip(char c) {
return c==' ' || c=='\n' || c=='\t' || c=='\r' || c==0;
}
bool lexer::is_id(char c) {
return (c=='_') || std::isalpha(c) || (c<0);
}
bool lexer::is_hex(char c) {
return std::isxdigit(c);
}
bool lexer::is_oct(char c) {
return '0'<=c && c<='7';
}
bool lexer::is_dec(char c) {
return std::isdigit(c);
}
bool lexer::is_str(char c) {
return c=='\'' || c=='\"' || c=='`';
}
bool lexer::is_single_opr(char c) {
return (
c=='(' || c==')' || c=='[' || c==']' ||
c=='{' || c=='}' || c==',' || c==';' ||
c==':' || c=='?' || c=='`' || c=='@' ||
c=='%' || c=='$' || c=='\\'
);
}
bool lexer::is_calc_opr(char c) {
return (
c=='=' || c=='+' || c=='-' || c=='*' ||
c=='!' || c=='/' || c=='<' || c=='>' ||
c=='~' || c=='|' || c=='&' || c=='^'
);
}
void lexer::skip_note() {
// avoid note, after this process ptr will point to '\n'
// so next loop line counter+1
while(++ptr<res.size() && res[ptr]!='\n') {}
}
void lexer::err_char() {
++column;
char c = res[ptr++];
err.err("lexer",
{line, column-1, line, column, filename},
"invalid character 0x"+chrhex(c)
);
++invalid_char;
}
void lexer::open(const std::string& file) {
if (repl::info::instance()->in_repl_mode &&
repl::info::instance()->repl_file_name==file) {
err.load(file);
filename = file;
res = repl::info::instance()->repl_file_source;
return;
}
// check file exsits and it is a regular file
struct stat buffer;
if (stat(file.c_str(), &buffer)==0 && !S_ISREG(buffer.st_mode)) {
err.err("lexer", "<"+file+"> is not a regular file");
err.chkerr();
}
// load
filename = file;
std::ifstream in(file, std::ios::binary);
if (in.fail()) {
err.err("lexer", "failed to open <" + file + ">");
res = "";
return;
}
err.load(file);
std::stringstream ss;
ss << in.rdbuf();
res = ss.str();
}
tok lexer::get_type(const std::string& str) {
return typetbl.count(str)? typetbl.at(str):tok::null;
}
std::string lexer::utf8_gen() {
std::string str = "";
while(ptr<res.size() && res[ptr]<0) {
std::string tmp = "";
u32 nbytes = utf8_hdchk(res[ptr]);
if (!nbytes) {
++ptr;
++column;
continue;
}
tmp += res[ptr++];
for(u32 i = 0; i<nbytes; ++i, ++ptr) {
if (ptr<res.size() && (res[ptr]&0xc0)==0x80) {
tmp += res[ptr];
}
}
// utf8 character's total length is 1+nbytes
if (tmp.length()!=1+nbytes) {
++column;
std::string utf_info = "0x"+chrhex(tmp[0]);
for(u32 i = 1; i<tmp.size(); ++i) {
utf_info += " 0x"+chrhex(tmp[i]);
}
err.err("lexer",
{line, column-1, line, column, filename},
"invalid utf-8 <"+utf_info+">"
);
++invalid_char;
}
str += tmp;
// may have some problems because not all the unicode takes 2 space
column += 2;
}
return str;
}
token lexer::id_gen() {
u32 begin_line = line;
u32 begin_column = column;
std::string str = "";
while(ptr<res.size() && (is_id(res[ptr]) || is_dec(res[ptr]))) {
if (res[ptr]<0) { // utf-8
str += utf8_gen();
} else { // ascii
str += res[ptr++];
++column;
}
}
tok type = get_type(str);
return {
{begin_line, begin_column, line, column, filename},
(type!=tok::null)? type:tok::id, str
};
}
token lexer::num_gen() {
u32 begin_line = line;
u32 begin_column = column;
// generate hex number
if (ptr+1<res.size() && res[ptr]=='0' && res[ptr+1]=='x') {
std::string str = "0x";
ptr += 2;
while(ptr<res.size() && is_hex(res[ptr])) {
str += res[ptr++];
}
column += str.length();
// "0x"
if (str.length()<3) {
err.err("lexer",
{begin_line, begin_column, line, column, filename},
"invalid number `"+str+"`"
);
}
return {{begin_line, begin_column, line, column, filename}, tok::num, str};
} else if (ptr+1<res.size() && res[ptr]=='0' && res[ptr+1]=='o') { // generate oct number
std::string str = "0o";
ptr += 2;
while(ptr<res.size() && is_oct(res[ptr])) {
str += res[ptr++];
}
bool erfmt = false;
while(ptr<res.size() && (is_dec(res[ptr]) || is_hex(res[ptr]))) {
erfmt = true;
str += res[ptr++];
}
column += str.length();
if (str.length()==2 || erfmt) {
err.err("lexer",
{begin_line, begin_column, line, column, filename},
"invalid number `"+str+"`"
);
}
return {{begin_line, begin_column, line, column, filename}, tok::num, str};
}
// generate dec number
// dec number -> [0~9][0~9]*(.[0~9]*)(e|E(+|-)0|[1~9][0~9]*)
std::string str = "";
while(ptr<res.size() && is_dec(res[ptr])) {
str += res[ptr++];
}
if (ptr<res.size() && res[ptr]=='.') {
str += res[ptr++];
while(ptr<res.size() && is_dec(res[ptr])) {
str += res[ptr++];
}
// "xxxx." is not a correct number
if (str.back()=='.') {
column += str.length();
err.err("lexer",
{begin_line, begin_column, line, column, filename},
"invalid number `"+str+"`"
);
return {{begin_line, begin_column, line, column, filename}, tok::num, "0"};
}
}
if (ptr<res.size() && (res[ptr]=='e' || res[ptr]=='E')) {
str += res[ptr++];
if (ptr<res.size() && (res[ptr]=='-' || res[ptr]=='+')) {
str += res[ptr++];
}
while(ptr<res.size() && is_dec(res[ptr])) {
str += res[ptr++];
}
// "xxxe(-|+)" is not a correct number
if (str.back()=='e' || str.back()=='E' || str.back()=='-' || str.back()=='+') {
column += str.length();
err.err("lexer",
{begin_line, begin_column, line, column, filename},
"invalid number `"+str+"`"
);
return {{begin_line, begin_column, line, column, filename}, tok::num, "0"};
}
}
column += str.length();
return {{begin_line, begin_column, line, column, filename}, tok::num, str};
}
token lexer::str_gen() {
u32 begin_line = line;
u32 begin_column = column;
std::string str = "";
const char begin = res[ptr];
++column;
while(++ptr<res.size() && res[ptr]!=begin) {
++column;
if (res[ptr]=='\n') {
column = 0;
++line;
}
if (res[ptr]=='\\' && ptr+1<res.size()) {
++column;
++ptr;
switch(res[ptr]) {
case '0': str += '\0'; break;
case 'a': str += '\a'; break;
case 'b': str += '\b'; break;
case 'e': str += '\033'; break;
case 't': str += '\t'; break;
case 'n': str += '\n'; break;
case 'v': str += '\v'; break;
case 'f': str += '\f'; break;
case 'r': str += '\r'; break;
case '?': str += '\?'; break;
case '\\':str += '\\'; break;
case '\'':str += '\''; break;
case '\"':str += '\"'; break;
default: str += res[ptr];break;
}
if (res[ptr]=='\n') {
column = 0;
++line;
}
continue;
}
str += res[ptr];
}
// check if this string ends with a " or '
if (ptr++>=res.size()) {
err.err("lexer",
{begin_line, begin_column, line, column, filename},
"get EOF when generating string"
);
return {{begin_line, begin_column, line, column, filename}, tok::str, str};
}
++column;
// if is not utf8, 1+utf8_hdchk should be 1
if (begin=='`' && str.length()!=1+utf8_hdchk(str[0])) {
err.err("lexer",
{begin_line, begin_column, line, column, filename},
"\'`\' is used for string including one character"
);
}
return {{begin_line, begin_column, line, column, filename}, tok::str, str};
}
token lexer::single_opr() {
u32 begin_line = line;
u32 begin_column = column;
std::string str(1, res[ptr]);
++column;
tok type = get_type(str);
if (type==tok::null) {
err.err("lexer",
{begin_line, begin_column, line, column, filename},
"invalid operator `"+str+"`"
);
}
++ptr;
return {{begin_line, begin_column, line, column, filename}, type, str};
}
token lexer::dots() {
u32 begin_line = line;
u32 begin_column = column;
std::string str = ".";
if (ptr+2<res.size() && res[ptr+1]=='.' && res[ptr+2]=='.') {
str += "..";
}
ptr += str.length();
column += str.length();
return {{begin_line, begin_column, line, column, filename}, get_type(str), str};
}
token lexer::calc_opr() {
u32 begin_line = line;
u32 begin_column = column;
// get calculation operator
std::string str(1, res[ptr++]);
if (ptr<res.size() && res[ptr]=='=') {
str += res[ptr++];
}
column += str.length();
return {{begin_line, begin_column, line, column, filename}, get_type(str), str};
}
const error& lexer::scan(const std::string& file) {
line = 1;
column = 0;
ptr = 0;
toks = {};
open(file);
while(ptr<res.size()) {
while(ptr<res.size() && skip(res[ptr])) {
// these characters will be ignored, and '\n' will cause ++line
++column;
if (res[ptr++]=='\n') {
++line;
column = 0;
}
}
if (ptr>=res.size()) {
break;
}
if (is_id(res[ptr])) {
toks.push_back(id_gen());
} else if (is_dec(res[ptr])) {
toks.push_back(num_gen());
} else if (is_str(res[ptr])) {
toks.push_back(str_gen());
} else if (is_single_opr(res[ptr])) {
toks.push_back(single_opr());
} else if (res[ptr]=='.') {
toks.push_back(dots());
} else if (is_calc_opr(res[ptr])) {
toks.push_back(calc_opr());
} else if (res[ptr]=='#') {
skip_note();
} else {
err_char();
}
if (invalid_char>10) {
err.err("lexer", "too many invalid characters, stop");
break;
}
}
if (toks.size()) {
// eof token's location is the last token's location
toks.push_back({toks.back().loc, tok::eof, "<eof>"});
} else {
// if token sequence is empty, generate a default location
toks.push_back({{line, column, line, column, filename}, tok::eof, "<eof>"});
}
res = "";
return err;
}
}
#ifdef _MSC_VER
#pragma warning (disable:4244)
#pragma warning (disable:4267)
#pragma warning (disable:4102)
#endif
#include "nasal_lexer.h"
#include "repl.h"
namespace nasal {
bool lexer::skip(char c) {
return c==' ' || c=='\n' || c=='\t' || c=='\r' || c==0;
}
bool lexer::is_id(char c) {
return (c=='_') || std::isalpha(c) || (c<0);
}
bool lexer::is_hex(char c) {
return std::isxdigit(c);
}
bool lexer::is_oct(char c) {
return '0'<=c && c<='7';
}
bool lexer::is_dec(char c) {
return std::isdigit(c);
}
bool lexer::is_str(char c) {
return c=='\'' || c=='\"' || c=='`';
}
bool lexer::is_single_opr(char c) {
return (
c=='(' || c==')' || c=='[' || c==']' ||
c=='{' || c=='}' || c==',' || c==';' ||
c==':' || c=='?' || c=='`' || c=='@' ||
c=='%' || c=='$' || c=='\\'
);
}
bool lexer::is_calc_opr(char c) {
return (
c=='=' || c=='+' || c=='-' || c=='*' ||
c=='!' || c=='/' || c=='<' || c=='>' ||
c=='~' || c=='|' || c=='&' || c=='^'
);
}
void lexer::skip_note() {
// avoid note, after this process ptr will point to '\n'
// so next loop line counter+1
while(++ptr<res.size() && res[ptr]!='\n') {}
}
void lexer::err_char() {
++column;
char c = res[ptr++];
err.err("lexer",
{line, column-1, line, column, filename},
"invalid character 0x"+chrhex(c)
);
++invalid_char;
}
void lexer::open(const std::string& file) {
if (repl::info::instance()->in_repl_mode &&
repl::info::instance()->repl_file_name==file) {
err.load(file);
filename = file;
res = repl::info::instance()->repl_file_source;
return;
}
// check file exsits and it is a regular file
#ifdef _MSC_VER
#define S_ISREG(m) (((m)&0xF000)==0x8000)
#endif
struct stat buffer;
if (stat(file.c_str(), &buffer)==0 && !S_ISREG(buffer.st_mode)) {
err.err("lexer", "<"+file+"> is not a regular file");
err.chkerr();
}
// load
filename = file;
std::ifstream in(file, std::ios::binary);
if (in.fail()) {
err.err("lexer", "failed to open <" + file + ">");
res = "";
return;
}
err.load(file);
std::stringstream ss;
ss << in.rdbuf();
res = ss.str();
}
tok lexer::get_type(const std::string& str) {
return typetbl.count(str)? typetbl.at(str):tok::null;
}
std::string lexer::utf8_gen() {
std::string str = "";
while(ptr<res.size() && res[ptr]<0) {
std::string tmp = "";
u32 nbytes = utf8_hdchk(res[ptr]);
if (!nbytes) {
++ptr;
++column;
continue;
}
tmp += res[ptr++];
for(u32 i = 0; i<nbytes; ++i, ++ptr) {
if (ptr<res.size() && (res[ptr]&0xc0)==0x80) {
tmp += res[ptr];
}
}
// utf8 character's total length is 1+nbytes
if (tmp.length()!=1+nbytes) {
++column;
std::string utf_info = "0x"+chrhex(tmp[0]);
for(u32 i = 1; i<tmp.size(); ++i) {
utf_info += " 0x"+chrhex(tmp[i]);
}
err.err("lexer",
{line, column-1, line, column, filename},
"invalid utf-8 <"+utf_info+">"
);
++invalid_char;
}
str += tmp;
// may have some problems because not all the unicode takes 2 space
column += 2;
}
return str;
}
token lexer::id_gen() {
u32 begin_line = line;
u32 begin_column = column;
std::string str = "";
while(ptr<res.size() && (is_id(res[ptr]) || is_dec(res[ptr]))) {
if (res[ptr]<0) { // utf-8
str += utf8_gen();
} else { // ascii
str += res[ptr++];
++column;
}
}
tok type = get_type(str);
return {
{begin_line, begin_column, line, column, filename},
(type!=tok::null)? type:tok::id, str
};
}
token lexer::num_gen() {
u32 begin_line = line;
u32 begin_column = column;
// generate hex number
if (ptr+1<res.size() && res[ptr]=='0' && res[ptr+1]=='x') {
std::string str = "0x";
ptr += 2;
while(ptr<res.size() && is_hex(res[ptr])) {
str += res[ptr++];
}
column += str.length();
// "0x"
if (str.length()<3) {
err.err("lexer",
{begin_line, begin_column, line, column, filename},
"invalid number `"+str+"`"
);
}
return {{begin_line, begin_column, line, column, filename}, tok::num, str};
} else if (ptr+1<res.size() && res[ptr]=='0' && res[ptr+1]=='o') { // generate oct number
std::string str = "0o";
ptr += 2;
while(ptr<res.size() && is_oct(res[ptr])) {
str += res[ptr++];
}
bool erfmt = false;
while(ptr<res.size() && (is_dec(res[ptr]) || is_hex(res[ptr]))) {
erfmt = true;
str += res[ptr++];
}
column += str.length();
if (str.length()==2 || erfmt) {
err.err("lexer",
{begin_line, begin_column, line, column, filename},
"invalid number `"+str+"`"
);
}
return {{begin_line, begin_column, line, column, filename}, tok::num, str};
}
// generate dec number
// dec number -> [0~9][0~9]*(.[0~9]*)(e|E(+|-)0|[1~9][0~9]*)
std::string str = "";
while(ptr<res.size() && is_dec(res[ptr])) {
str += res[ptr++];
}
if (ptr<res.size() && res[ptr]=='.') {
str += res[ptr++];
while(ptr<res.size() && is_dec(res[ptr])) {
str += res[ptr++];
}
// "xxxx." is not a correct number
if (str.back()=='.') {
column += str.length();
err.err("lexer",
{begin_line, begin_column, line, column, filename},
"invalid number `"+str+"`"
);
return {{begin_line, begin_column, line, column, filename}, tok::num, "0"};
}
}
if (ptr<res.size() && (res[ptr]=='e' || res[ptr]=='E')) {
str += res[ptr++];
if (ptr<res.size() && (res[ptr]=='-' || res[ptr]=='+')) {
str += res[ptr++];
}
while(ptr<res.size() && is_dec(res[ptr])) {
str += res[ptr++];
}
// "xxxe(-|+)" is not a correct number
if (str.back()=='e' || str.back()=='E' || str.back()=='-' || str.back()=='+') {
column += str.length();
err.err("lexer",
{begin_line, begin_column, line, column, filename},
"invalid number `"+str+"`"
);
return {{begin_line, begin_column, line, column, filename}, tok::num, "0"};
}
}
column += str.length();
return {{begin_line, begin_column, line, column, filename}, tok::num, str};
}
token lexer::str_gen() {
u32 begin_line = line;
u32 begin_column = column;
std::string str = "";
const char begin = res[ptr];
++column;
while(++ptr<res.size() && res[ptr]!=begin) {
++column;
if (res[ptr]=='\n') {
column = 0;
++line;
}
if (res[ptr]=='\\' && ptr+1<res.size()) {
++column;
++ptr;
switch(res[ptr]) {
case '0': str += '\0'; break;
case 'a': str += '\a'; break;
case 'b': str += '\b'; break;
case 'e': str += '\033'; break;
case 't': str += '\t'; break;
case 'n': str += '\n'; break;
case 'v': str += '\v'; break;
case 'f': str += '\f'; break;
case 'r': str += '\r'; break;
case '?': str += '\?'; break;
case '\\':str += '\\'; break;
case '\'':str += '\''; break;
case '\"':str += '\"'; break;
default: str += res[ptr];break;
}
if (res[ptr]=='\n') {
column = 0;
++line;
}
continue;
}
str += res[ptr];
}
// check if this string ends with a " or '
if (ptr++>=res.size()) {
err.err("lexer",
{begin_line, begin_column, line, column, filename},
"get EOF when generating string"
);
return {{begin_line, begin_column, line, column, filename}, tok::str, str};
}
++column;
// if is not utf8, 1+utf8_hdchk should be 1
if (begin=='`' && str.length()!=1+utf8_hdchk(str[0])) {
err.err("lexer",
{begin_line, begin_column, line, column, filename},
"\'`\' is used for string including one character"
);
}
return {{begin_line, begin_column, line, column, filename}, tok::str, str};
}
token lexer::single_opr() {
u32 begin_line = line;
u32 begin_column = column;
std::string str(1, res[ptr]);
++column;
tok type = get_type(str);
if (type==tok::null) {
err.err("lexer",
{begin_line, begin_column, line, column, filename},
"invalid operator `"+str+"`"
);
}
++ptr;
return {{begin_line, begin_column, line, column, filename}, type, str};
}
token lexer::dots() {
u32 begin_line = line;
u32 begin_column = column;
std::string str = ".";
if (ptr+2<res.size() && res[ptr+1]=='.' && res[ptr+2]=='.') {
str += "..";
}
ptr += str.length();
column += str.length();
return {{begin_line, begin_column, line, column, filename}, get_type(str), str};
}
token lexer::calc_opr() {
u32 begin_line = line;
u32 begin_column = column;
// get calculation operator
std::string str(1, res[ptr++]);
if (ptr<res.size() && res[ptr]=='=') {
str += res[ptr++];
}
column += str.length();
return {{begin_line, begin_column, line, column, filename}, get_type(str), str};
}
const error& lexer::scan(const std::string& file) {
line = 1;
column = 0;
ptr = 0;
toks = {};
open(file);
while(ptr<res.size()) {
while(ptr<res.size() && skip(res[ptr])) {
// these characters will be ignored, and '\n' will cause ++line
++column;
if (res[ptr++]=='\n') {
++line;
column = 0;
}
}
if (ptr>=res.size()) {
break;
}
if (is_id(res[ptr])) {
toks.push_back(id_gen());
} else if (is_dec(res[ptr])) {
toks.push_back(num_gen());
} else if (is_str(res[ptr])) {
toks.push_back(str_gen());
} else if (is_single_opr(res[ptr])) {
toks.push_back(single_opr());
} else if (res[ptr]=='.') {
toks.push_back(dots());
} else if (is_calc_opr(res[ptr])) {
toks.push_back(calc_opr());
} else if (res[ptr]=='#') {
skip_note();
} else {
err_char();
}
if (invalid_char>10) {
err.err("lexer", "too many invalid characters, stop");
break;
}
}
if (toks.size()) {
// eof token's location is the last token's location
toks.push_back({toks.back().loc, tok::eof, "<eof>"});
} else {
// if token sequence is empty, generate a default location
toks.push_back({{line, column, line, column, filename}, tok::eof, "<eof>"});
}
res = "";
return err;
}
}

View File

@ -15,10 +15,6 @@
#include "nasal.h"
#include "nasal_err.h"
#ifdef _MSC_VER
#define S_ISREG(m) (((m)&0xF000)==0x8000)
#endif
namespace nasal {
enum class tok:u32 {